withopen('points_normal.pkl','rb')as f:
class_1=pickle.load(f)
class_2=pickle.load(f)
labels=pickle.load(f)
model=KnnClassifier(labels,np.vstack((class_1,class_2)))# with open('points_ring.pkl','rb') as f:# class_1=pickle.load(f)# class_2=pickle.load(f)# labels=pickle.load(f)# print(model.classify(class_1[0]))
from pylab import*defplot_2D_boundary(plot_range,points,decisionfcn,labels,values=[0]):""" Plot_range is (xmin,xmax,ymin,ymax), points is a list
of class points, decisionfcn is a funtion to evaluate,
labels is a list of labels that decisionfcn returns for each class,
values is a list of decision contours to show. """
clist =['b','r','g','k','m','y']# colors for the classes# evaluate on a grid and plot contour of decision function
x = arange(plot_range[0],plot_range[1],.1)
y = arange(plot_range[2],plot_range[3],.1)
xx,yy = meshgrid(x,y)
xxx,yyy = xx.flatten(),yy.flatten()# lists of x,y in grid
zz = array(decisionfcn(xxx,yyy))
zz = zz.reshape(xx.shape)# plot contour(s) at values
contour(xx,yy,zz,values)# for each class, plot the points with '*' for correct, 'o' for incorrectfor i inrange(len(points)):
d = decisionfcn(points[i][:,0],points[i][:,1])
correct_ndx = labels[i]==d
incorrect_ndx = labels[i]!=d
plot(points[i][correct_ndx,0],points[i][correct_ndx,1],'*',color=clist[i])
plot(points[i][incorrect_ndx,0],points[i][incorrect_ndx,1],'o',color=clist[i])
axis('equal')
defget_imlist(path,endIdentifier):return[os.path.join(path,f)for f in os.listdir(path)if f.endswith(endIdentifier)]
imlist=get_imlist(r'.\train','.ppm')
imtestlist=get_imlist(r'.\test','.ppm')
for filename in imlist:
featfile=filename[:-3]+'dsift'
process_image_dsift(filename,featfile,10,5,resize=(50,50))for filename in imtestlist:
featfile=filename[:-3]+'dsift'
process_image_dsift(filename,featfile,10,5,resize=(50,50))
defread_gesture_features_labels(path):# create list of all files ending in .dsift
featlist =[os.path.join(path,f)for f in os.listdir(path)if f.endswith('.dsift')]# read the features
features =[]for featfile in featlist:# print(index)
l,d = read_features_from_file(featfile)
features.append(d.flatten())
features = array(features)# create labels
labels =[featfile.split('/')[-1][0]for featfile in featlist]return features,array(labels)
k=1
knn_classifier=KnnClassifier(labels,features)
res=array([knn_classifier.classify(test_features[i],k)for i inrange(len(test_labels))])
acc=sum(1.0*(res==test_labels))/len(test_labels)print('Accuracy:',acc)
defprint_confusion(res,labels,classnames):
n=len(classnames)
class_ind=dict([(classnames[i],i)for i inrange(n)])
confuse=zeros((n,n))for i inrange(len(test_labels)):
confuse[class_ind[res[i]],class_ind[test_labels[i]]]+=1print('Confusion matrix for')print(classnames)print(confuse)
8.2贝叶斯分类器
import numpy as np
import pickle
from pylab import*
classBayesClassifier(object):def__init__(self):""" Initialize classifier with training data. """
self.labels =[]# class labels
self.mean =[]# class mean
self.var =[]# class variances
self.n =0# nbr of classesdeftrain(self,data,labels=None):""" Train on data (list of arrays n*dim).
Labels are optional, default is 0...n-1. """if labels==None:
labels =range(len(data))
self.labels = labels
self.n =len(labels)for c in data:
self.mean.append(np.mean(c,axis=0))
self.var.append(np.var(c,axis=0))defclassify(self,points):""" Classify the points by computing probabilities
for each class and return most probable label. """# compute probabilities for each class
est_prob = np.array([gauss(m,v,points)for m,v inzip(self.mean,self.var)])print('est prob',est_prob.shape,self.labels)# get index of highest probability, this gives class label
ndx = est_prob.argmax(axis=0)
est_labels = np.array([self.labels[n]for n in ndx])return est_labels, est_prob
defgauss(m,v,x):""" Evaluate Gaussian in d-dimensions with independent
mean m and variance v at the points in (the rows of) x.
http://en.wikipedia.org/wiki/Multivariate_normal_distribution """iflen(x.shape)==1:
n,d =1,x.shape[0]else:
n,d = x.shape
# covariance matrix, subtract mean
S = np.diag(1/v)
x = x-m
# product of probabilities
y = np.exp(-0.5*np.diag(np.dot(x,np.dot(S,x.T))))# normalize and returnreturn y *(2*np.pi)**(-d/2.0)/(np.sqrt(np.prod(v))+1e-6)
from PIL import Image
from numpy import*defpca(X):""" Principal Component Analysis
input: X, matrix with training data stored as flattened arrays in rows
return: projection matrix (with important dimensions first), variance and mean.
"""# get dimensions
num_data,dim = X.shape
# center data
mean_X = X.mean(axis=0)
X = X - mean_X
if dim>num_data:# PCA - compact trick used
M = dot(X,X.T)# covariance matrix
e,EV = linalg.eigh(M)# eigenvalues and eigenvectors
tmp = dot(X.T,EV).T # this is the compact trick
V = tmp[::-1]# reverse since last eigenvectors are the ones we want
S = sqrt(e)[::-1]# reverse since eigenvalues are in increasing orderfor i inrange(V.shape[1]):
V[:,i]/= S
else:# PCA - SVD used
U,S,V = linalg.svd(X)
V = V[:num_data]# only makes sense to return the first num_data# return the projection matrix, the variance and the meanreturn V,S,mean_X
defcenter(X):""" Center the square matrix X (subtract col and row means). """
n,m = X.shape
if n != m:raise Exception('Matrix is not square.')
colsum = X.sum(axis=0)/ n
rowsum = X.sum(axis=1)/ n
totalsum = X.sum()/(n**2)#center
Y = array([[ X[i,j]-rowsum[i]-colsum[j]+totalsum for i inrange(n)]for j inrange(n)])return Y
V,S,m=pca(features)
V=V[:50]
features=np.array([np.dot(V,f,-m)for f in features])# test_features=np.array([np.dot(V,f,-m) for f in test_features])
bc=BayesClassifier()
blist=[features[where(labels==c)[0]]for c in classnames]
bc.train(blist,classnames)# res=bc.classify(test_features)[0]
defload_ocr_data(path):
imlist=[os.path.join(path,f)for f in os.listdir(path)if f.endswith('.jpg')]
labels=[int(imfile.split('/')[-1][0])for imfile in imlist]
features=[]for imname in imlist:
im =array(Image.open(imname).convert('L'))
features.append(compute_feature(im))return array(features),labels
from scipy.ndimage import measurements
deffind_sudoku_edges(im,axis=0):
trim=1*(im<128)
s=trim.sum(axis=axis)
s_labels,s_nbr=measurements.label(s>(0.5*max(s)))
m=measurements.center_of_mass(s,s_labels,range(1,s_nbr+1))
x=[int(x[0])for x in m]if lem(x)==4:
dx=diff(x)
x=[x[0],x[0]+dx[0]/3,x[0]+2*dx[0]/3,
x[1],x[1]+dx[1]/3,x[1]+2*dx[1]/3,
x[2],x[2]+dx[2]/3,x[2]+2*dx[2]/3,x[3]]iflen(x)==10:return x
else:raise RuntimeError('Edges not detected')