一、Kerberos赋权
klist -kt */hive.keytab
kinit -kt */hive.keytab {principal}
二、不进入命令行执行SQL
1. 执行SQL语句
hive -e "example sql"
2. 执行SQL文件
hive -f example.sql
3. 临时hive配置
hive --hiveconf hive.execution.engine=mr -e "example sql"
三、进入命令行执行SQL
配置好环境变量后
1. hive-cli
hive
2. Beeline
beeline -u jdbc:hive2://$HS2_HOST:$HS2_PORT
3. 临时hive配置
hive> set hive.execution.engine=mr;
四、数据定义语言
1. 数据库操作
-- 查看包含的数据库
SHOW DATABASES;
-- 生成数据库
-- IF NOT EXISTS避免数据库存在时抛出错误信息
-- COMMENT为数据库的描述信息
-- LOCATION为数据库创建的位置
-- DBPROPERTIES为键值对属性信息
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
-- 删除数据库
-- HIVE不允许删除一个包含有表的数据库
-- CASCADE可以使HIVE自行先删除数据库中的表
-- RESTRICT为默认情况
DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE];
-- 更改数据库
ALTER DATABASE database_name SET DBPROPERTIES (property_name=property_value, ...);
ALTER DATABASE database_name SET LOCATION hdfs_path;
-- 使用数据库
USE database_name;
-- 转换到默认数据库
USE DEFAULT;
2. 表操作
-- 查看当前数据库包含的表
SHOW TABLES;
SHOW TABLES IN database_name;
-- 生成表
-- EXTERNAL使Hive生成外部表
-- TBLPROPERTIES为键值对属性信息
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database_name.]table_name (
col_name data_type
)
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)]
-- 拷贝一张已经存在的表模式(无需拷贝数据)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [database_name.]table_name
LIKE existing_table_name
[LOCATION hdfs_path]
-- 查看表信息
DESCRIBE FORMATTED [database_name.]table_name;
-- 删除表
DROP TABLE [IF EXISTS] table_name;
-- 清空表
TRUNCATE [TABLE] table_name;
-- 表重命名
ALTER TABLE table_name RENAME TO new_table_name;
-- 修改表属性
ALTER TABLE table_name SET TBLPROPERTIES (property_name = property_value, property_name = property_value, ... );
-- 修改表描述信息
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
3. 分区和列操作
-- 查看表分区
show partitions [database_name.]table_name;
-- 增加表分区
-- partition_spec: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'];
-- 修改表分区名称
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;
-- 移动位置修改分区的路径
ALTER TABLE table_name PARTITION partition_spec SET LOCATION 'location';
-- 清空表分区
TRUNCATE [TABLE] table_name PARTITION partition_spec;
-- 删除表分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec;
-- 修改列的名字/类型/位置/描述信息
-- FIRST|AFTER告诉在表中的哪个位置创建列。如果未指定此参数,则新列将添加到表的末尾。
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
-- 增加或替换列
ALTER TABLE table_name
[PARTITION partition_spec]
ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE|RESTRICT]
五、数据操作语言
-- 从文件导入数据到表中
-- LOCAL为从本地导入数据
-- OVERWRITE为覆盖导入
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
-- 从SQL导入数据到表中
-- values_row:( value [, value ...] )
INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...]
-- 从queries导入数据到表中
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
FROM from_statement
-- 多重导入
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
FROM from_statement
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;
-- 动态分区插入
INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
-- 通过queries从表中导出数据
-- ROW FORMAT DELIMITED 为列分隔符
-- COLLECTION ITEMS TERMINATED BY 为集合中元素分隔符(Array|Struct中的元素之间)
-- FIELDS TERMINATED BY 为字段分隔符
-- MAP KEYS TERMINATED BY 为Map中key与value的分隔符
-- LINES TERMINATED BY 为行之间的分隔符
-- row_format: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char][MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char][NULL DEFINED AS char]
INSERT OVERWRITE [LOCAL] DIRECTORY directory1
[ROW FORMAT row_format] [STORED AS file_format]
SELECT ... FROM ...
FROM from_statement
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1
[INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ...
六、SQL查询操作
-- SELECT语句可以是联合查询的一部分,也可以是另一个查询的子查询。
-- table_reference为查询的输入,可以是常规表、视图、联接构造或子查询。
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY col_list]
[CLUSTER BY col_list|[DISTRIBUTE BY col_list] [SORT BY col_list]]
[LIMIT rows]
-- 从表中检索所有列和所有行
SELECT * FROM [database_name.]table_name;