import numpy as np
import operator
def classify0(test_data: np.ndarray, training_data: np.ndarray, labels: list, k: int) -> str:
"""
kNN 算法分类器
:param test_data: 用于分类的数据(测试集)
:param training_data: 用于训练的数据(训练集)
:param labels: 分类标签
:param k: kNN 算法参数, 选择距离最小的 k 个点
:return: 分类结果
"""
distances = np.sqrt(np.sum((np.tile(test_data, (training_data.shape[0], 1)) - training_data) ** 2, axis=1))
sorted_indices = distances.argsort()
class_count = {}
for i in range(k):
label = labels[sorted_indices[i]]
class_count[label] = class_count.get(label, 0) + 1
sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)
return sorted_class_count[0][0]
def file2matrix(filename: str) -> (np.ndarray, list):
"""
打开并解析文件,对数据进行分类:1 代表不喜欢, 2 代表魅力一般, 3 代表极具魅力
:param filename: 文件名
:return: 特征矩阵和分类 Label 向量
"""
try:
with open(filename, 'r') as file:
lines = file.readlines()
num_lines = len(lines)
feature_matrix = np.zeros((num_lines, 3))
label_vector = []
for i, line in enumerate(lines):
line = line.strip()
data = line.split('\t')
feature_matrix[i, :] = data[0:3]
if data[-1] == 'didntLike':
label_vector.append(1)
elif data[-1] == 'smallDoses':
label_vector.append(2)
elif data[-1] == 'largeDoses':
label_vector.append(3)
return feature_matrix, label_vector
except FileNotFoundError:
print(f"错误:未找到文件 {filename}")
return np.array([]), []
def autoNorm(data_set: np.ndarray) -> (np.ndarray, np.ndarray, np.ndarray):
"""
对数据进行归一化
:param data_set: 特征矩阵
:return: 归一化后的特征矩阵、数据范围、数据最小值
"""
min_vals = data_set.min(0)
max_vals = data_set.max(0)
ranges = max_vals - min_vals
norm_data_set = (data_set - min_vals) / ranges
return norm_data_set, ranges, min_vals
def classifyPerson():
"""
通过输入一个人的三维特征,进行分类输出
"""
result_list = ['讨厌', '有些喜欢', '非常喜欢']
try:
percent_tats = float(input("玩视频游戏所耗时间百分比:"))
ff_miles = float(input("每年获得的飞行常客里程数:"))
ice_cream = float(input("每周消费的冰激淋公升数:"))
filename = "datingTestSet.txt"
dating_data_matrix, dating_labels = file2matrix(filename)
if dating_data_matrix.size == 0:
return
norm_matrix, ranges, min_vals = autoNorm(dating_data_matrix)
test_array = np.array([percent_tats, ff_miles, ice_cream])
norm_test_array = (test_array - min_vals) / ranges
classifier_result = classify0(norm_test_array, norm_matrix, dating_labels, 3)
print(f"你可能{result_list[classifier_result - 1]}这个人")
except ValueError:
print("错误:输入必须为有效的数字")
if __name__ == '__main__':
classifyPerson()
你可能有些喜欢这个人
import numpy as np
data = np.mat([[1,200,105,3,False],
[2,165,80,2,False],
[3,184.5,120,2,False],
[4,116,70,8,False],
[5,270,150,4,True]])
row = 0
for line in data:
row += 1
print(row)
print(data.size)
5
25
import numpy as np
data = np.mat([[1,200,105,3,False],
[2,165,80,2,False],
[3,184.5,120,2,False],
[4,116,70,8,False],
[5,270,150,4,True]])
print(print(data[0,3]))
print(print(data[0,4]))
3.0
None
0.0
None
import numpy as np
data = np.mat([[1,200,105,3,False],
[2,165,80,2,False],
[3,184.5,120,2,False],
[4,116,70,8,False],
[5,270,150,4,True]])
coll = []
for row in data:
coll.append(row[0,1])
print(np.sum(coll))
print(np.mean(coll))
print(np.std(coll))
print(np.var(coll))
935.5
187.1
50.17808286493218
2517.84
import numpy as np
import pylab
import scipy.stats as stats
data = np.mat([[1,200,105,3,False],
[2,165,80,2,False],
[3,184.5,120,2,False],
[4,116,70,8,False],
[5,270,150,4,True]])
coll = []
for row in data:
coll.append(row[0,1])
stats.probplot(coll,plot=pylab)
pylab.show()
import pandas as pd
import matplotlib.pyplot as plot
rocksVmines = pd.DataFrame([[1,200,105,3,False],
[2,165,80,2,False],
[3,184.5,120,2,False],
[4,116,70,8,False],
[5,270,150,4,True]])
datarow1 = rocksVmines.iloc[1,0:3]
datarow2 = rocksVmines.iloc[2,0:3]
plot.scatter(datarow1,datarow2)
plot.xlabel("Attribute1")
plot.ylabel("Attribute2")
plot.show()
datarow3 = rocksVmines.iloc[3,0:3]
plot.scatter(datarow2,datarow3)
plot.xlabel("Attribute2")
plot.ylabel("Attribute3")
plot.show()
plot.scatter(datarow2,datarow3)
plot.xlabel(“Attribute2”)
plot.ylabel(“Attribute3”)
plot.show()
[外链图片转存中...(img-BJOeAcI2-1743117156398)]
[外链图片转存中...(img-pvuHG2dQ-1743117156398)]