基本代码如下:
from lightgbm import LGBMClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
# 首先生成dataframe
df = pd.DataFrame(np.random.randint(0, 100, size=(100, 5)))
df.loc[:50, 'label'] = 1
df.loc[50:, 'label'] = 0
# 使用SMOTE算法生成数据
x, y = df.drop(['label'], axis=1), df['label']
# 划分训练数据和测试数据
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
# 模型训练
gbm = LGBMClassifier(num_leaves=31, learning_rate=0.05, n_estimators=20)
gbm.fit(x_train, y_train, eval_set=[(x_test, y_test)], eval_metric='l1', early_stopping_rounds=5)
# 模型预测
y_pred = gbm.predict(x_test, num_iteration=gbm.best_iteration_)