分类问题-机器学习

发布于:2025-07-14 ⋅ 阅读:(25) ⋅ 点赞:(0)
  1. 分类:感知机

    简单判断图片是纵向还是横向

    训练数据:images1.csv

    x1,x2,y
    153,432,-1
    220,262,-1
    118,214,-1
    474,384,1
    485,411,1
    233,430,-1
    396,361,1
    484,349,1
    429,259,1
    286,220,1
    399,433,-1
    403,340,1
    252,34,1
    497,472,1
    379,416,-1
    76,163,-1
    263,112,1
    26,193,-1
    61,473,-1
    420,253,1
    
    import numpy as np
    import matplotlib.pyplot as plt
    
    train = np.loadtxt('images1.csv',delimiter=',',skiprows=1)
    
    #取第一列和第二列
    train_x = train[:,0:2]
    #取第三列
    train_y = train[:,2]
    
    #plt.plot(train_x[train_y == 1,0],train_x[train_y == 1,1],'o')
    #plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')
    
    #plt.axis('scaled')
    #plt.show()
    
    #权重初始化
    #w·x = w1x1 + w2x2 = 0
    w = np.random.rand(2)
    
    #判别函数
    def f(x):
        if np.dot(w,x)>=0:
            return 1
        else:
            return -1;
    
    #迭代次数
    epoch = 10
    #更新次数
    count = 0
    
    #学习权重
    for _ in range(epoch):
        for x,y in zip(train_x,train_y):
            if f(x) != y:
                w = w + y*x
                #输出日志
                count += 1
                print('第{}次:w={}'.format(count,w))
    
    #w·x = w1x1 + w2x2 = 0
    #x2 = -w1/w2*x1
    x1 = np.arange(0,500)
    plt.plot(train_x[train_y == 1,0],train_x[train_y==1,1],'o')
    plt.plot(train_x[train_y==-1,0],train_x[train_y==-1,1],'x')
    plt.plot(x1,-w[0]/w[1]*x1,linestyle = 'dashed')
    plt.show()
    
    #预测
    #200x100 横向
    print(f([200,100]))
    
    #100x200 纵向
    print(f([100,200]))
    
    

    在这里插入图片描述

  2. 分类:逻辑回归

    训练数据:images2.csv

    x1,x2,y
    153,432,0
    220,262,0
    118,214,0
    474,384,1
    485,411,1
    233,430,0
    396,361,1
    484,349,1
    429,259,1
    286,220,1
    399,433,0
    403,340,1
    252,34,1
    497,472,1
    379,416,0
    76,163,0
    263,112,1
    26,193,0
    61,473,0
    420,253,1
    
    import numpy as np
    import matplotlib.pyplot as plt
    
    #读入
    train = np.loadtxt('images2.csv',delimiter=',',skiprows=1)
    train_x = train[:,0:2]
    train_y = train[:,2]
    
    #初始化参数
    theta = np.random.rand(3)
    
    #标准化
    #axis=0会计算每列的平均值和标准差
    mu = train_x.mean(axis=0)
    sigma = train_x.std(axis=0)
    def standardize(x):
        return (x-mu)/sigma
    
    train_z = standardize(train_x)
    
    #增加x0
    def to_matrix(x):
        #创建和x1一样的行一列的矩阵
        x0 = np.ones([x.shape[0],1])
        #参数合并成一个矩阵
        return np.hstack([x0,x])
    
    X = to_matrix(train_z)
    
    #可视化
    '''
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
    plt.show()
    '''
    
    #sigmoid函数
    def f(x):
        return 1/(1+np.exp(-np.dot(x,theta)))
    
    #学习率
    ETA = 1e-3
    
    #迭代次数
    epoch = 5000
    
    #重复学习
    for _ in range(epoch):
        theta = theta - ETA*np.dot(f(X)-train_y,X)
    
    #theta.Tx = 0
    #theta.Tx = theta0x0 + theta1x1 + theta2x2 = 0
    #x2 = -(theta0 + theta1*x1)/theta2
    
    x0 = np.linspace(-2,2,100)
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,0],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,0],'x')
    plt.plot(x0,-(theta[0]+theta[1]*x0)/theta[2],linestyle='dashed')
    plt.show()
    
    #预测
    #astype(np.int_):将布尔值转为整数(True→1,False→0),最终输出0或1的分类结果。
    def classify1(x):
        return (f(x)>=0.5).astype(np.int_)
    
    array = classify1(to_matrix(standardize([
        [200,100],
        [100,200]
    ])))
    
    print(array)
    
    
  3. 分类:线性不可分分类问题

    训练数据:

    x1,x2,y
    0.54508775,2.34541183,0
    0.32769134,13.43066561,0
    4.42748117,14.74150395,0
    2.98189041,-1.81818172,1
    4.02286274,8.90695686,1
    2.26722613,-6.61287392,1
    -2.66447221,5.05453871,1
    -1.03482441,-1.95643469,1
    4.06331548,1.70892541,1
    2.89053966,6.07174283,0
    2.26929206,10.59789814,0
    4.68096051,13.01153161,1
    1.27884366,-9.83826738,1
    -0.1485496,12.99605136,0
    -0.65113893,10.59417745,0
    3.69145079,3.25209182,1
    -0.63429623,11.6135625,0
    0.17589959,5.84139826,0
    0.98204409,-9.41271559,1
    -0.11094911,6.27900499,0
    
    import numpy as np
    import matplotlib.pyplot as plt
    
    #读入
    train = np.loadtxt('data3.csv',delimiter=',',skiprows=1)
    train_x = train[:,0:2]
    train_y = train[:,2]
    
    '''
    plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o')
    plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x')
    plt.show()
    '''
    
    #参数初始化
    theta = np.random.rand(4)
    
    #精度历史记录
    accuracies = []
    
    #标准化
    mu = train_x.mean(axis=0)
    sigma = train_x.std(axis=0)
    def standardize(x):
        return (x-mu)/sigma
    
    train_z = standardize(train_x)
    
    #增加x0和x3
    def to_matrix(x):
        x0 = np.ones([x.shape[0],1])
        x3 = x[:,0,np.newaxis]**2
        return np.hstack([x0,x,x3])
    
    X = to_matrix(train_z)
    
    #sigmoid函数
    def f(x):
        return 1/(1+np.exp(-np.dot(x,theta)))
    
    #学习率
    ETA = 1e-3
    
    #迭代次数
    epoch = 5000
    
    def classify1(x):
        return (f(x)>=0.5).astype(np.int_)
    
    #重复学习
    for _ in range(epoch):
        theta = theta - ETA*np.dot(f(X)-train_y,X)
        #计算现在精度
        result = classify1(X) == train_y
        accuracy = len(result[result==True])/len(result)
        accuracies.append(accuracy)
    
    #theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2
    #         = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0
    #x2 = -(theta0+theta1x1+theta3x1^2)/theta2
    x1 = np.linspace(-2,2,100)
    x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2]
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
    plt.plot(x1,x2,linestyle='dashed')
    plt.show()
    
    #绘制acc曲线
    # x = np.arange(len(accuracies))
    # plt.plot(x,accuracies)
    # plt.show()
    

    在这里插入图片描述

    因为训练数据过少只有20个 精度值只能为0.05的整数倍 所以acc曲线有棱有角:

    在这里插入图片描述

  4. 分类:线性不可分分类问题 随机梯度下降法的实现

    训练数据:同上

    import numpy as np
    import matplotlib.pyplot as plt
    
    #读入
    train = np.loadtxt('data3.csv',delimiter=',',skiprows=1)
    train_x = train[:,0:2]
    train_y = train[:,2]
    
    '''
    plt.plot(train_x[train_y==1,0],train_x[train_y==1,1],'o')
    plt.plot(train_x[train_y==0,0],train_x[train_y==0,1],'x')
    plt.show()
    '''
    
    #参数初始化
    theta = np.random.rand(4)
    
    #精度历史记录
    accuracies = []
    
    #标准化
    mu = train_x.mean(axis=0)
    sigma = train_x.std(axis=0)
    def standardize(x):
        return (x-mu)/sigma
    
    train_z = standardize(train_x)
    
    #增加x0和x3
    def to_matrix(x):
        x0 = np.ones([x.shape[0],1])
        x3 = x[:,0,np.newaxis]**2
        return np.hstack([x0,x,x3])
    
    X = to_matrix(train_z)
    
    #sigmoid函数
    def f(x):
        return 1/(1+np.exp(-np.dot(x,theta)))
    
    #学习率
    ETA = 1e-3
    
    #迭代次数
    epoch = 5000
    
    def classify1(x):
        return (f(x)>=0.5).astype(np.int_)
    
    #重复学习
    for _ in range(epoch):
        #使用随机梯度下降法更新参数
        p = np.random.permutation(X.shape[0])
        for x,y in zip(X[p,:],train_y[p]):
            theta = theta - ETA*(f(x)-y)*x
    
    #theta.Tx = theta0x0 + theta1x1 + theta2x2 + theta3x3^2
    #         = theta0 + theta1x1 + theta2x2 + theta3x1^2 = 0
    #x2 = -(theta0+theta1x1+theta3x1^2)/theta2
    x1 = np.linspace(-2,2,100)
    x2 = -(theta[0]+theta[1]*x1+theta[3]*x1**2)/theta[2]
    plt.plot(train_z[train_y==1,0],train_z[train_y==1,1],'o')
    plt.plot(train_z[train_y==0,0],train_z[train_y==0,1],'x')
    plt.plot(x1,x2,linestyle='dashed')
    plt.show()
    

网站公告

今日签到

点亮在社区的每一天
去签到