import numpy as np
from os import listdir
from sklearn.neighbors import KNeighborsClassifier as kNN
def img2vector(filename):
"""
将32x32的文本文件转换为1x1024的向量
:param filename: 要转换的文本文件的文件名
:return: 转换后的1x1024向量,如果出现错误则返回None
"""
try:
returnVect = np.zeros((1, 1024))
with open(filename) as fr:
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect[0, 32 * i + j] = int(lineStr[j])
return returnVect
except FileNotFoundError:
print(f"错误:文件 {filename} 未找到。")
return None
except Exception as e:
print(f"错误:处理文件 {filename} 时发生未知错误:{e}")
return None
def load_training_data():
"""
加载训练数据
:return: 训练数据矩阵和对应的标签列表,如果出现错误则返回None, None
"""
hwLabels = []
try:
trainingFileList = listdir('trainingDigits')
m = len(trainingFileList)
trainingMat = np.zeros((m, 1024))
for i in range(m):
fileNameStr = trainingFileList[i]
classNumber = int(fileNameStr.split('_')[0])
hwLabels.append(classNumber)
vector = img2vector(f'trainingDigits/{fileNameStr}')
if vector is not None:
trainingMat[i, :] = vector
return trainingMat, hwLabels
except FileNotFoundError:
print("错误:训练数据文件夹未找到。")
return None, None
except Exception as e:
print(f"错误:加载训练数据时发生未知错误:{e}")
return None, None
def load_test_data():
"""
加载测试数据
:return: 测试数据矩阵和对应的标签列表,如果出现错误则返回None, None
"""
try:
testFileList = listdir('testDigits')
mTest = len(testFileList)
testMat = np.zeros((mTest, 1024))
testLabels = []
for i in range(mTest):
fileNameStr = testFileList[i]
classNumber = int(fileNameStr.split('_')[0])
testLabels.append(classNumber)
vector = img2vector(f'testDigits/{fileNameStr}')
if vector is not None:
testMat[i, :] = vector
return testMat, testLabels
except FileNotFoundError:
print("错误:测试数据文件夹未找到。")
return None, None
except Exception as e:
print(f"错误:加载测试数据时发生未知错误:{e}")
return None, None
def handwritingClassTest():
"""
手写数字识别测试
"""
trainingMat, hwLabels = load_training_data()
if trainingMat is None or hwLabels is None:
return
neigh = kNN(n_neighbors=3, algorithm='auto')
neigh.fit(trainingMat, hwLabels)
testMat, testLabels = load_test_data()
if testMat is None or testLabels is None:
return
errorCount = 0.0
mTest = len(testLabels)
for i in range(mTest):
classifierResult = neigh.predict(testMat[i].reshape(1, -1))
print(f"分类返回结果为 {classifierResult[0]}\t真实结果为 {testLabels[i]}")
if classifierResult[0] != testLabels[i]:
errorCount += 1.0
print(f"总共错了 {int(errorCount)} 个数据\n错误率为 {errorCount / mTest * 100:.2f}%")
if __name__ == '__main__':
handwritingClassTest()
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import seaborn as sns
iris = load_iris()
iris.keys()
iris.feature_names
iris.target
x, y = iris.data, iris.target
x.shape
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y)
x_train.shape
knn_clf = KNeighborsClassifier(n_neighbors=6)
knn_clf.fit(x_train,y_train)
print('训练集准确率:%.2f'%knn_clf.score(x_train, y_train))
print('验证集准确率:%.2f'%knn_clf.score(x_test, y_test))
from sklearn.model_selection import GridSearchCV
n_neighbors = tuple(range(1,11,1))
cv = GridSearchCV(estimator=KNeighborsClassifier(),
param_grid = {'n_neighbors':n_neighbors},
cv = 5)
cv.fit(x,y)
cv.best_params_
训练集准确率:0.96
验证集准确率:0.95
{'n_neighbors': 6}