导出的oracle的数据表结构csv文件
"Name","Virtual","Type","Nullable","Default/Expr.","Storage","Comments"
"DWH","N","VARCHAR2(100)","Y","","","单位号"
"BH","N","VARCHAR2(10)","N","","","班号"
"BJ","N","VARCHAR2(200)","Y","","","班级"
"JBNY","N","VARCHAR2(10)","Y","","","建班年月"
"BZRGH","N","VARCHAR2(10)","Y","","","班主任工号"
"BZXH","N","VARCHAR2(10)","Y","","","班长学号"
"FDYH","N","VARCHAR2(20)","Y","","","辅导员号"
"SSZY","N","VARCHAR2(200)","Y","","","所属专业"
"SSNJ","N","VARCHAR2(20)","Y","","","所属年级"
"XSLB","N","VARCHAR2(20)","Y","","","学生类别"
"QYBZ","N","VARCHAR2(20)","Y","","","启用标志"
"SFDDB","N","VARCHAR2(1)","Y","","","是否订单班(本数据项只适用于高
职院校)"
"LSH","N","VARCHAR2(100)","N","","","流水号"
"SJTBSJ","N","DATE","Y","","","数据同步时间"
"NJDM","N","VARCHAR2(20)","Y","","","年级代码"
"ZXRS","N","VARCHAR2(20)","Y","","","在校人数"
"XQH_ID","N","VARCHAR2(20)","Y","","","校区号"
python处理代码
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 6 14:07:21 2019
@author: liuyunsheng
"""
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
def mapType(otype):
if otype.startswith('VARCHAR'):
return 'STRING'
if 'NUMBER' in otype :
if ',' in otype:
return 'FLOAT'
else:
return 'INT'
if 'DATE' in otype:
return 'TIMESTAMP'
if 'BLOB' in otype:
return 'BINARY'
raise Exception('未经映射的类型: %s'% otype)
def csv2DDL(fname):
ddl = pd.read_csv(fname, encoding='GB2312', usecols=['Name', 'Type', 'Nullable', 'Comments'])
table_name = fname.split('.')[0][5:]
ddl.head()
#ddl.rename(columns = {'名称':'name', '类型':'type', '可为空':'nullable', '注释': 'comments'}, inplace=True)
ddl['hive_type'] = ddl.Type.apply(mapType)
ddl.head()
hive_cols = []
for row in ddl[['Name', 'hive_type', 'Comments']].set_index('Name').iterrows():
hive_cols.append(" " + str(row[0]) + " " + str(row[1][0]) + " COMMENT '%s'" % row[1][1] )
columns = ",\n".join(hive_cols)
DDL = """
CREATE TABLE %s (
%s
)
""" % (table_name, columns)
# print(DDL)
# print(columns)
return table_name, DDL
import glob
for fname in glob.glob("path/*.csv"):
table_name, DDL = csv2DDL(fname)
print(DDL)