mooc机器学习第六天-K近邻,决策树,朴素贝叶斯分类器简单尝试
作者:互联网
1.下面的代码是上一篇理论中的小例子
from sklearn.neighbors import KNeighborsClassifier # K近邻分类器 from sklearn.datasets import load_iris # 鸢尾花数据 from sklearn.tree import DecisionTreeClassifier #决策树分类器 from sklearn.model_selection import cross_val_score #交叉验证值函数 from sklearn.naive_bayes import GaussianNB #朴素贝叶斯分类器 import numpy as np #科学计算库 #小示例实现顺序与导包顺序相同 X=[[0],[1],[2],[3]] y=[0,0,1,1] neigh=KNeighborsClassifier(n_neighbors=3) neigh.fit(X,y) print("+++++K近邻+++++") print(neigh.predict([[1.2]])) clf=DecisionTreeClassifier() irls=load_iris() re=cross_val_score(clf,irls.data,irls.target,cv=10) print("+++++交叉验证+++++") print(re) print("+++++决策树+++++") clf.fit(X,y) print(clf.predict([[2.2]])) A=np.array([[-1,-1],[-2,-1],[-3,-2],[2,1],[1,1],[3,2]]) B=np.array([1,1,1,2,2,2]) clf1=GaussianNB(priors=None) clf1.fit(A,B) r=clf1.predict([[-0.8,-1]]) print("+++++朴素毕贝叶斯+++++") print(r)
2.结果
+++++K近邻+++++ [0] +++++交叉验证+++++ [ 1. 0.93333333 1. 0.93333333 0.93333333 0.86666667 0.93333333 0.93333333 1. 1. ] +++++决策树+++++ [1] +++++朴素毕贝叶斯+++++ [1]
3.利用mooc给的feature数据实践
import numpy as np import pandas as pd from sklearn.preprocessing import Imputer#数据预处理库 from sklearn.cross_validation import train_test_split #打乱训练数据 from sklearn.metrics import classification_report #计算召回率,F1值,精准度 from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.naive_bayes import GaussianNB def load_datasets(feature_path,lable_path): #设定shape feature=np.ndarray(shape=(0,41)) lable=np.ndarray(shape=(0,1)) #处理文件缺失值 for file in feature_path: df=pd.read_table(file,delimiter=',',na_values="?",header=None) imp=Imputer(missing_values='NaN',strategy='mean',axis=0) imp.fit(df) #Impute all missing values in X. df=imp.transform(df) feature=np.concatenate((feature,df)) for file in lable_path: df=pd.read_table(file,header=None) lable=np.concatenate((lable,df)) lable=np.ravel(lable) return feature, lable if __name__ == '__main__': '''数据具体路径''' featurepaths=['/A/A.feature', '/B/B.feature', '/C/C.feature', '/D/D.feature', '/E/E.feature' ] labelPaths=['/A/A.label', '/B/B.label', '/C/C.label', '/D/D.label', '/E/E.label'] '''读如数据''' x_train, y_train = load_datasets(featurepaths[:4], labelPaths[:4]) x_test, y_test = load_datasets(featurepaths[4:], labelPaths[4:]) #打乱训练数据 x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size=0.0) #创建三种分类器并预测 print('Start training knn') knn = KNeighborsClassifier().fit(x_train, y_train) print('Training done') answer_knn = knn.predict(x_test) print('Prediction done') print('Start training DT') dt = DecisionTreeClassifier().fit(x_train, y_train) print('Training done') answer_dt = dt.predict(x_test) print('Prediction done') print('Start training Bayes') gnb = GaussianNB().fit(x_train, y_train) print('Training done') answer_gnb = gnb.predict(x_test) print('Prediction done') #结果展示 ''' Build a text report showing the main classification metrics classification_report&精确度/召回率/F1值 ''' print('\n\nThe classification report for knn:') print(classification_report(y_test, answer_knn)) print('\n\nThe classification report for DT:') print(classification_report(y_test, answer_dt)) print('\n\nThe classification report for Bayes:') print(classification_report(y_test, answer_gnb))
标签:mooc,feature,分类器,test,+++++,train,print,import,决策树 来源: https://www.cnblogs.com/cheflone/p/13205215.html