# 实现基本的knn模型
# 基础结构.py
import numpy as np
# 数据集操作
from sklearn import neighbors, datasets, preprocessing
# 数据切分
from sklearn.model_selection import train_test_split
# 指标评估
from sklearn.metrics import accuracy_score
# 模型选择 交叉模型
from sklearn.model_selection import cross_val_score
# # 固定随机数
np.random.RandomState(0)
# 加载内置数据 鸾尾花
iris = datasets.load_iris()
# 划分训练集与测试集
x, y = iris.data, iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
# 数据预处理 存的有计算出来的均值和方差
scaler = preprocessing.StandardScaler().fit(x_train)
# 将数据标准化
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
# 创建模型
knn = neighbors.KNeighborsClassifier(n_neighbors=12)
# 模型拟合
knn.fit(x_train, y_train)
# 交叉验证 cv:测试次数, accuracy:评价指标设置为"精度"
scores = cross_val_score(knn, x_train, y_train, cv=5, scoring='accuracy')
print(scores) # 每组的评分结果
print(scores.mean())
# 预测
y_pred = knn.predict(x_test)
# 评估
print(accuracy_score(y_test, y_pred))