import pyhive as hive
import pandas as pd
"""
sql: 需要在hive当中执行的sql语句
file_columns: 查询的文件列名
group_columns: 分组列名,除此之外的列会做数字化处理
"""
def get_frame_from_pyhive(sql, file_columns, group_columns):
try:
conn = hive.connect(host="127.0.0.1", port=10000, database="database_name")
df = pd.read_sql(sql, conn)
df.columns = file_columns
num_cols = list(set(file_columns) - set(group_columns))
df[num_cols] = df[num_cols].apply(pd.to_numeric, **{'errors': 'coerce'})
conn.close()
except Exception as e:
print(e)
df = pd.DataFrame(columns=file_columns)
return df