机器学习 集成学习篇——python实现Bagging和AdaBOOST算法
作者:互联网
机器学习 集成学习篇——python实现Bagging和AdaBOOST算法
摘要
本文通过python实现了集成学习中的Bagging和AdaBOOST算法,并将代码进行了封装,方便读者调用。
Bagging算法
import numpy as np
import pandas as pd
class Cyrus_bagging(object):
def __init__(self,estimator,n_estimators = 20):
self.estimator = estimator
self.n_estimators = n_estimators
self.models = None
def fit(self,x,y):
x = np.array(x)
y = np.array(y).reshape((-1,))
indices = np.arange(x.shape[0])
self.models = []
for i in range(self.n_estimators):
index = np.random.choice(indices,x.shape[0])
x0 = x[index]
y0 = y[index]
self.models.append(self.estimator.fit(x0,y0))
def predict(self,x):
res = np.zeros([x.shape[0],self.n_estimators])
for i in range(self.n_estimators):
res[:,i] = self.models[i].predict(x)
result = []
for i in range(res.shape[0]):
pd_res = pd.Series(res[i,:]).value_counts()
result.append(int(pd_res.argmax()))
return np.array(result)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
knn = KNeighborsClassifier()
model = Cyrus_bagging(knn)
model.fit(x_train,y_train)
y_pre = model.predict(x_test)
print(classification_report(y_test,y_pre))
示例使用的数据为了与不使用集成算法的模型的准确率区分开来,所以使用较少特征的数据,因而准确率不是特别高,不过与未使用集成算法的模型相比,准确率已经优出不少。
precision recall f1-score support
0 1.00 1.00 1.00 11
1 0.67 0.67 0.67 9
2 0.70 0.70 0.70 10
avg / total 0.80 0.80 0.80 30
Adaboost算法
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
class CyrusAdaBoost(object):
def __init__(self,estimator,n_estimators = 20):
self.estimator = estimator
self.n_estimators = n_estimators
self.error_rate = None
self.model = None
def update_w(self,y,pre_y,w):
error_rate = 1 - accuracy_score(y,pre_y)
for i in range(w.shape[0]):
if y[i] == pre_y[i]:
w[i] = w[i]*np.exp(-error_rate)
else:
w[i] = w[i]*np.exp(error_rate)
return w/w.sum()
def cal_label(self,result,alpha):
label = []
for i in range(result.shape[0]):
count = np.zeros(int(result[i,:].max()+1))
for j in range(result.shape[1]):
count[int(result[i,j])] += alpha[j]
label.append(count.argmax())
return np.array(label)
def fit(self,x,y):
x = np.array(x)
y = np.array(y).reshape((-1,))
self.error_rate = []
self.model = []
w0 = np.ones(x.shape[0])
w0 = w0/w0.sum()
indices = np.arange(x.shape[0])
for i in range(self.n_estimators):
index = np.random.choice(indices,size = x.shape[0],p = w0)
x0 = x[index]
y0 = y[index]
model0 = self.estimator.fit(x0,y0)
pre_y0 = model0.predict(x0)
error_rate = 1 - accuracy_score(y0,pre_y0)
self.error_rate.append(error_rate)
self.model.append(model0)
w0 = self.update_w(y0,pre_y0,w0)
def predict(self,x):
res = np.zeros([x.shape[0],self.n_estimators])
for i in range(self.n_estimators):
res[:,i] = self.model[i].predict(x)
alpha = 1 - np.array(self.error_rate)
return self.cal_label(res,alpha)
from sklearn.tree import DecisionTreeClassifier
model = CyrusAdaBoost(estimator=DecisionTreeClassifier(),n_estimators=50)
model.fit(x_train,y_train)
y_pre = model.predict(x_test)
print(accuracy_score(y_pre,y_test))
0.932
by CyrusMay 2020 06 12
这世界全部的漂亮
不过你的可爱模样
——————五月天(爱情的模样)——————
标签:Bagging,pre,python,self,shape,AdaBOOST,estimators,np,model 来源: https://blog.csdn.net/Cyrus_May/article/details/106714283