searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

Hive的基本使用手册

2023-07-19 08:45:46
23
0

一、Kerberos赋权

klist -kt */hive.keytab
kinit -kt */hive.keytab {principal}

二、不进入命令行执行SQL

1.     执行SQL语句

hive -e "example sql"

2.     执行SQL文件

hive -f example.sql

3.     临时hive配置

hive --hiveconf hive.execution.engine=mr -e "example sql"

三、进入命令行执行SQL

配置好环境变量后

1.     hive-cli

hive

2.     Beeline

beeline -u jdbc:hive2://$HS2_HOST:$HS2_PORT

3.     临时hive配置

hive> set hive.execution.engine=mr;

四、数据定义语言

1.     数据库操作

-- 查看包含的数据库
SHOW DATABASES;

-- 生成数据库
-- IF NOT EXISTS避免数据库存在时抛出错误信息
-- COMMENT为数据库的描述信息
-- LOCATION为数据库创建的位置
-- DBPROPERTIES为键值对属性信息
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];

-- 删除数据库
-- HIVE不允许删除一个包含有表的数据库
-- CASCADE可以使HIVE自行先删除数据库中的表
-- RESTRICT为默认情况
DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE];

-- 更改数据库
ALTER DATABASE database_name SET DBPROPERTIES (property_name=property_value, ...);
ALTER DATABASE database_name SET LOCATION hdfs_path;

-- 使用数据库
USE database_name;

-- 转换到默认数据库
USE DEFAULT;

 

2.     表操作

-- 查看当前数据库包含的表
SHOW TABLES;
SHOW TABLES IN database_name;

-- 生成表
-- EXTERNAL使Hive生成外部表
-- TBLPROPERTIES为键值对属性信息
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database_name.]table_name (
       col_name data_type
) 
 [COMMENT table_comment]
 [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
 [LOCATION hdfs_path]
 [TBLPROPERTIES (property_name=property_value, ...)]  

-- 拷贝一张已经存在的表模式(无需拷贝数据)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [database_name.]table_name
  LIKE existing_table_name
  [LOCATION hdfs_path]

-- 查看表信息
DESCRIBE FORMATTED [database_name.]table_name;

-- 删除表
DROP TABLE [IF EXISTS] table_name;

-- 清空表
TRUNCATE [TABLE] table_name;

-- 表重命名
ALTER TABLE table_name RENAME TO new_table_name;

-- 修改表属性
ALTER TABLE table_name SET TBLPROPERTIES (property_name = property_value, property_name = property_value, ... );

-- 修改表描述信息
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);


3.     分区和列操作

-- 查看表分区
show partitions [database_name.]table_name;

-- 增加表分区
-- partition_spec: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'];

-- 修改表分区名称
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;

-- 移动位置修改分区的路径
ALTER TABLE table_name PARTITION partition_spec SET LOCATION 'location';

-- 清空表分区
TRUNCATE [TABLE] table_name PARTITION partition_spec;

-- 删除表分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec;

-- 修改列的名字/类型/位置/描述信息
-- FIRST|AFTER告诉在表中的哪个位置创建列。如果未指定此参数,则新列将添加到表的末尾。
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
  [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];

-- 增加或替换列
ALTER TABLE table_name
  [PARTITION partition_spec]               
  ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
  [CASCADE|RESTRICT]                    

五、数据操作语言

-- 从文件导入数据到表中
-- LOCAL为从本地导入数据
-- OVERWRITE为覆盖导入
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]

-- 从SQL导入数据到表中
-- values_row:( value [, value ...] )
INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...]
 
-- 从queries导入数据到表中
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
FROM from_statement

-- 多重导入
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
FROM from_statement
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;

-- 动态分区插入
INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;

-- 通过queries从表中导出数据
-- ROW FORMAT DELIMITED 为列分隔符
-- COLLECTION ITEMS TERMINATED BY 为集合中元素分隔符(Array|Struct中的元素之间)
-- FIELDS TERMINATED BY 为字段分隔符
-- MAP KEYS TERMINATED BY 为Map中key与value的分隔符
-- LINES TERMINATED BY 为行之间的分隔符
-- row_format: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char][MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char][NULL DEFINED AS char]
INSERT OVERWRITE [LOCAL] DIRECTORY directory1
  [ROW FORMAT row_format] [STORED AS file_format]
  SELECT ... FROM ...

FROM from_statement
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1
[INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ...

六、SQL查询操作

-- SELECT语句可以是联合查询的一部分,也可以是另一个查询的子查询。
-- table_reference为查询的输入,可以是常规表、视图、联接构造或子查询。
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
  FROM table_reference
  [WHERE where_condition]
  [GROUP BY col_list]
  [ORDER BY col_list]
  [CLUSTER BY col_list|[DISTRIBUTE BY col_list] [SORT BY col_list]]
 [LIMIT rows]

-- 从表中检索所有列和所有行
SELECT * FROM [database_name.]table_name;

 

七、参考文章

官方网址:Home - Apache Hive - Apache Software Foundation

0条评论
0 / 1000
cosmos
4文章数
0粉丝数
cosmos
4 文章 | 0 粉丝
cosmos
4文章数
0粉丝数
cosmos
4 文章 | 0 粉丝
原创

Hive的基本使用手册

2023-07-19 08:45:46
23
0

一、Kerberos赋权

klist -kt */hive.keytab
kinit -kt */hive.keytab {principal}

二、不进入命令行执行SQL

1.     执行SQL语句

hive -e "example sql"

2.     执行SQL文件

hive -f example.sql

3.     临时hive配置

hive --hiveconf hive.execution.engine=mr -e "example sql"

三、进入命令行执行SQL

配置好环境变量后

1.     hive-cli

hive

2.     Beeline

beeline -u jdbc:hive2://$HS2_HOST:$HS2_PORT

3.     临时hive配置

hive> set hive.execution.engine=mr;

四、数据定义语言

1.     数据库操作

-- 查看包含的数据库
SHOW DATABASES;

-- 生成数据库
-- IF NOT EXISTS避免数据库存在时抛出错误信息
-- COMMENT为数据库的描述信息
-- LOCATION为数据库创建的位置
-- DBPROPERTIES为键值对属性信息
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];

-- 删除数据库
-- HIVE不允许删除一个包含有表的数据库
-- CASCADE可以使HIVE自行先删除数据库中的表
-- RESTRICT为默认情况
DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE];

-- 更改数据库
ALTER DATABASE database_name SET DBPROPERTIES (property_name=property_value, ...);
ALTER DATABASE database_name SET LOCATION hdfs_path;

-- 使用数据库
USE database_name;

-- 转换到默认数据库
USE DEFAULT;

 

2.     表操作

-- 查看当前数据库包含的表
SHOW TABLES;
SHOW TABLES IN database_name;

-- 生成表
-- EXTERNAL使Hive生成外部表
-- TBLPROPERTIES为键值对属性信息
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database_name.]table_name (
       col_name data_type
) 
 [COMMENT table_comment]
 [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
 [LOCATION hdfs_path]
 [TBLPROPERTIES (property_name=property_value, ...)]  

-- 拷贝一张已经存在的表模式(无需拷贝数据)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [database_name.]table_name
  LIKE existing_table_name
  [LOCATION hdfs_path]

-- 查看表信息
DESCRIBE FORMATTED [database_name.]table_name;

-- 删除表
DROP TABLE [IF EXISTS] table_name;

-- 清空表
TRUNCATE [TABLE] table_name;

-- 表重命名
ALTER TABLE table_name RENAME TO new_table_name;

-- 修改表属性
ALTER TABLE table_name SET TBLPROPERTIES (property_name = property_value, property_name = property_value, ... );

-- 修改表描述信息
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);


3.     分区和列操作

-- 查看表分区
show partitions [database_name.]table_name;

-- 增加表分区
-- partition_spec: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'];

-- 修改表分区名称
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;

-- 移动位置修改分区的路径
ALTER TABLE table_name PARTITION partition_spec SET LOCATION 'location';

-- 清空表分区
TRUNCATE [TABLE] table_name PARTITION partition_spec;

-- 删除表分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec;

-- 修改列的名字/类型/位置/描述信息
-- FIRST|AFTER告诉在表中的哪个位置创建列。如果未指定此参数,则新列将添加到表的末尾。
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
  [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];

-- 增加或替换列
ALTER TABLE table_name
  [PARTITION partition_spec]               
  ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
  [CASCADE|RESTRICT]                    

五、数据操作语言

-- 从文件导入数据到表中
-- LOCAL为从本地导入数据
-- OVERWRITE为覆盖导入
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]

-- 从SQL导入数据到表中
-- values_row:( value [, value ...] )
INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...]
 
-- 从queries导入数据到表中
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
FROM from_statement

-- 多重导入
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
FROM from_statement
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
[INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
[INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;

-- 动态分区插入
INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;

-- 通过queries从表中导出数据
-- ROW FORMAT DELIMITED 为列分隔符
-- COLLECTION ITEMS TERMINATED BY 为集合中元素分隔符(Array|Struct中的元素之间)
-- FIELDS TERMINATED BY 为字段分隔符
-- MAP KEYS TERMINATED BY 为Map中key与value的分隔符
-- LINES TERMINATED BY 为行之间的分隔符
-- row_format: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char][MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char][NULL DEFINED AS char]
INSERT OVERWRITE [LOCAL] DIRECTORY directory1
  [ROW FORMAT row_format] [STORED AS file_format]
  SELECT ... FROM ...

FROM from_statement
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1
[INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ...

六、SQL查询操作

-- SELECT语句可以是联合查询的一部分,也可以是另一个查询的子查询。
-- table_reference为查询的输入,可以是常规表、视图、联接构造或子查询。
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
  FROM table_reference
  [WHERE where_condition]
  [GROUP BY col_list]
  [ORDER BY col_list]
  [CLUSTER BY col_list|[DISTRIBUTE BY col_list] [SORT BY col_list]]
 [LIMIT rows]

-- 从表中检索所有列和所有行
SELECT * FROM [database_name.]table_name;

 

七、参考文章

官方网址:Home - Apache Hive - Apache Software Foundation

文章来自个人专栏
Hive
1 文章 | 1 订阅
0条评论
0 / 1000
请输入你的评论
0
0