编程语言
首页 > 编程语言> > k-近邻算法(手写识别系统)

k-近邻算法(手写识别系统)

作者:互联网

这个玩意和改进约会网站的那个差不多,它是提前把所有数字转换成了32*32像素大小的黑白图,然后转换成字符图(用0,1表示),将所有1024个像素点用一维矩阵保存下来,这样就可以通过knn计算欧几里得距离来得到最接近的答案。

import os
import operator
from numpy import *

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    diffMat = tile(inX, (dataSetSize,1)) - dataSet  #统一矩阵,实现加减
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)  #进行累加,axis=0是按列,axis=1是按行
    distances = sqDistances**0.5  #开根号
    sortedDistIndicies = distances.argsort()  #按升序进行排序,返回原下标
    classCount = {}
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1  #get是字典中的方法,前面是要获得的值,后面是若该值不存在时的默认值
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]


def img2vector(filename):
    f = open(filename)
    returnVect = zeros((1,1024))
    for i in range(32):
        line = f.readline()
        for j in range(32):
            returnVect[0,i*32+j] = int(line[j])
    return returnVect


def handwritingClassTest():
    fileList = os.listdir('trainingDigits')
    m = len(fileList)
    traingMat = zeros((m, 1024))
    hwlabels = []
    for i in range(m):
        fileName = fileList[i]
        prefix = fileName.split('.')[0]
        number = int(prefix.split('_')[0])
        hwlabels.append(number)
        traingMat[i,:] = img2vector('trainingDigits/%s' %fileName)
    testFileList = os.listdir('testDigits')
    m = len(testFileList)
    errorNum = 0.0
    for i in range(m):
        testFileName = testFileList[i]
        prefix = testFileList[i].split('.')[0]
        realNumber = int(prefix.split('_')[0])
        testMat = img2vector('testDigits/%s' %testFileName)
        testResult = classify0(testMat, traingMat, hwlabels, 3)
        if testResult != realNumber:
            errorNum += 1
        print('The classifier came back with: %d, the real answer is: %d' %(testResult, realNumber))
    print('错误率为%f' %(errorNum/float(m)))

if __name__ == '__main__':
    handwritingClassTest()

标签:__,classCount,32,近邻,识别系统,prefix,range,testFileList,手写
来源: https://blog.csdn.net/lcl497049972/article/details/97946642