常用的hive命令
切换数据库 | use test; |
查询表的建表信息 | show create table 数据库名称.表名; |
查看表的类型信息 | desc formatted 数据库名称.表名; |
删除内部表 |
drop table 数据库名称.表名; |
先启动hdfs ,mysql , hiveservice2,beeline
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name
[(col_name data_type [COMMENT COL_COMMENT],.....)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment],....)]
[CLUSTERED BY (col_name,col_name,....)]
[SORTED BY (col_name [ASC|DESC],...)] INFO num_buckets BUCKETS]
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ',']
[STORED AS file_format]
[LOCATION hdfs_path]
字段解释
1 CREATE TABLE创建一个指定名字的表,如果名字相同抛出异常,用户可以使用IF NOT EXISTS来忽略异
常
2 EXTERNAL关键字可以创建一个外部表,在建表的同时指定一个实际数据的路径(LOCATION)
,hive在删除表的时候,内部表的元数据和数据会被一起删除,而外部表只删除元数据,不删除数据
3 COMMENT是为表和列添加注释
4 PARTITIONED BY是分区表
5 CLUSTERED BY 是建分桶(不常用)
6 SORTED BY 是指定字段进行排序(不常用)
7 ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 是指每行数据中列分隔符为","
默认分隔符为" \001"
8 STORED AS 指定存储文件类型,数据文件是纯文本,可以使用STORED AS TEXTFILE
9 LOCATION 指定表在HDFS上的存储位置,内部表不要指定,
但是如果定义的是外部表,则需要直接指定一个路径。
--创建数据库test
create database if not exists test;
--在数据库test中创建emp1表
--有两种方式
--1.先切换数据库 再创建表
use test;
create table if not exists emp1(
emp_id int,
emp_name string,
department_id int
)
stored as textfile
;
--将以上内容写入sql文件中,再用source 执行文件
source /opt/sql/create_emp.sql;
--查询表的建表信息
--show create table 数据库名称.表名;
show create table test.emp1;
+----------------------------------------------------+
| createtab_stmt |
+----------------------------------------------------+
| CREATE TABLE `test.emp1`( |
| `emp_id` int, |
| `emp_name` string, |
| `department_id` int) |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://bigdata004:8020/user/hive/warehouse/test.db/emp1' |
| TBLPROPERTIES ( |
| 'bucketing_version'='2', |
| 'transient_lastDdlTime'='1719865642') |
+----------------------------------------------------+
--2.直接在建表语句中指定对应的数据库
create table if not exists test.emp1(
emp_id int,
emp_name string,
department_id int
)
stored as textfile
;
--查看表的类型信息
--desc formatted 数据库名称.表名;
desc formatted test.emp1;
+-------------------------------+----------------------------------------------------+----------------------------------------------------+
| col_name | data_type | comment |
+-------------------------------+----------------------------------------------------+----------------------------------------------------+
| # col_name | data_type | comment |
| emp_id | int | |
| emp_name | string | |
| department_id | int | |
| | NULL | NULL |
| # Detailed Table Information | NULL | NULL |
| Database: | test | NULL |
| OwnerType: | USER | NULL |
| Owner: | root | NULL |
| CreateTime: | Tue Jul 02 04:27:22 CST 2024 | NULL |
| LastAccessTime: | UNKNOWN | NULL |
| Retention: | 0 | NULL |
| Location: | hdfs://bigdata004:8020/user/hive/warehouse/test.db/emp1 | NULL |
| Table Type: | MANAGED_TABLE | NULL |
| Table Parameters: | NULL | NULL |
| | COLUMN_STATS_ACCURATE | {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"department_id\":\"true\",\"emp_id\":\"true\",\"emp_name\":\"true\"}} |
| | bucketing_version | 2 |
| | numFiles | 0 |
| | numRows | 0 |
| | rawDataSize | 0 |
| | totalSize | 0 |
| | transient_lastDdlTime | 1719865642 |
| | NULL | NULL |
| # Storage Information | NULL | NULL |
| SerDe Library: | org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | NULL |
| InputFormat: | org.apache.hadoop.mapred.TextInputFormat | NULL |
| OutputFormat: | org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | NULL |
| Compressed: | No | NULL |
| Num Buckets: | -1 | NULL |
| Bucket Columns: | [] | NULL |
| Sort Columns: | [] | NULL |
| Storage Desc Params: | NULL | NULL |
| | serialization.format | 1 |
+-------------------------------+----------------------------------------------------+----------------------------------------------------+
33 rows selected (0.153 seconds)
--创建带有注释的内部表emp2
create table if not exists test.emp2(
emp_id int comment "员工id",
emp_name string comment "员工姓名",
department_id int comment "部门id"
)
comment "员工内部表"
stored as textfile
;
--将以上内容写入sql文件中,再用source
--在beeline命令行中执行sql文件
此时查看建表信息
+----------------------------------------------------+
| createtab_stmt |
+----------------------------------------------------+
| CREATE TABLE `test.emp2`( |
| `emp_id` int COMMENT '??id', |
| `emp_name` string COMMENT '????', |
| `department_id` int COMMENT '??id') |
| COMMENT '?????' |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://bigdata004:8020/user/hive/warehouse/test.db/emp2' |
| TBLPROPERTIES ( |
| 'bucketing_version'='2', |
| 'transient_lastDdlTime'='1719868467') |
+----------------------------------------------------+
16 rows selected (0.104 seconds)
问号部分就是注释里的中文,要解决下其乱码问题
解决hive中文乱码
1.登录mysql 执行如下命令 然后重启mysql
得先退出hive 并杀死进程 kill -9 进程号
#修改字段注释字符集
alter table hive.COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
#修改表注释字符集
alter table hive.TABLE_PARAMS modify column PARAM_VALUE varchar(20000) character set utf8;
#修改分区参数,支持分区建用中文表示
alter table hive.PARTITION_PARAMS modify column PARAM_VALUE varchar(20000) character set utf8;
alter table hive.PARTITION_KEYS modify column PKEY_COMMENT varchar(20000) character set utf8;
打开navicat
将命令输入 ,点击运行
重启mysql
systemctl stop mysqld
systemctl start mysqld
##2. 在hive-site.xml配置文件中修改如下配置
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://bigdata004:3306/hive?createDatabaseIfNotExist=true&useSSL=false&useUnicode=true&characterEncoding=UTF-8</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
对于之前创建的带注释的表emp2不起效果,一=应该重新创建一个注释的表
+----------------------------------------------------+
| createtab_stmt |
+----------------------------------------------------+
| CREATE TABLE `test.emp2`( |
| `emp_id` int COMMENT '员工id', |
| `emp_name` string COMMENT '员工姓名', |
| `department_id` int COMMENT '部门id') |
| COMMENT '?????' |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://bigdata004:8020/user/hive/warehouse/test.db/emp2' |
| TBLPROPERTIES ( |
| 'bucketing_version'='2', |
| 'transient_lastDdlTime'='1719869823') |
+----------------------------------------------------+
成功了