机器学习之线性回归_覃秉丰——源码
作者:互联网
一元线性回归
import numpy as np from matplotlib import pyplot as plt # 读取数据 data = np.genfromtxt('data.csv', delimiter=',') x_data = data[:, 0] y_data = data[:, 1] # plt.scatter(x_data, y_data) # plt.show() # 学习率 learning rate lr = 0.0001 # 斜率 k = -2 # 截距 b = -2 # 最大迭代次数 epochs = 500 # 最小二乘法 # #计算损失函数 def compute_loss(x_data, y_data, k, b): total_Error = 0 for i in range(0, len(x_data)): total_Error += (y_data[i] - (k * x_data[i] + b)) ** 2 return total_Error / (2.0 * len(x_data)) # 进行梯度下降 def gradient(x_data, y_data, k, b, lr, epochs): m = float(len(x_data)) for i in range(0, epochs): k_gradient = 0 b_gradient = 0 for j in range(0, len(x_data)): b_gradient += (1 / m) * ((x_data[j] * k + b) - y_data[j]) k_gradient += (1 / m) * ((x_data[j] * k + b) - y_data[j]) * x_data[j] k -= lr * k_gradient b -= lr * b_gradient # if i % 50 == 0: # print(i) # plt.plot(x_data, y_data, 'b.') # plt.plot(x_data, k * x_data + b, 'r') # plt.show() return k, b print('starting k = {0} ,b = {1} ,error = {2} '.format(k , b , compute_loss(x_data,y_data,k,b))) k, b = gradient(x_data, y_data,k, b, lr, epochs) plt.plot(x_data, k * x_data + b, 'r') plt.plot(x_data, y_data, 'b.') print('loss =:', compute_loss(x_data, y_data, k, b), 'b =:', b, 'k =:', k) plt.show()
使用sklearn的一元线性回归
import numpy as np from matplotlib import pyplot as plt from sklearn.linear_model import LinearRegression #读取数据 data = np.genfromtxt(r'data.csv', delimiter=',') x_data = data[:, 0] y_data = data[:, 1] print(x_data) # plt.scatter(x_data, y_data) # plt.show() # 使一维数据编程二维数据 x_data = data[:, 0, np.newaxis] y_data = data[:, 1, np.newaxis] # print(x_data) # 创建模型 model =LinearRegression() model.fit(x_data, y_data) # 传进的参数必须是二维的 plt.plot(x_data, y_data, 'b.') plt.plot(x_data, model.predict(x_data), 'r')#画出预测的线条 plt.show()
多元线性回归
import numpy as np from numpy import genfromtxt import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D #用来画3D图的包 # 读入数据 data = genfromtxt(r"Delivery.csv",delimiter=',') print(data) # 切分数据 x_data = data[:,:-1] y_data = data[:,-1] print(x_data) print(y_data) # 学习率learning rate lr = 0.0001 # 参数 theta0 = 0 theta1 = 0 theta2 = 0 # 最大迭代次数 epochs = 1000 # 最小二乘法 def compute_error(theta0, theta1, theta2, x_data, y_data): totalError = 0 for i in range(0, len(x_data)): totalError += (y_data[i] - (theta0 + theta1 * x_data[i,0] + theta2*x_data[i,1])) ** 2 return totalError / float(len(x_data)) # 求梯度 def gradient_descent_runner(x_data, y_data, theta0, theta1, theta2, lr, epochs): # 计算总数据量 m = float(len(x_data)) # 循环epochs次 for i in range(epochs): theta0_grad = 0 theta1_grad = 0 theta2_grad = 0 # 计算梯度的总和再求平均 for j in range(0, len(x_data)): # 多远线性回归公式 theta0_grad += (1/m) * ((theta1 * x_data[j,0] + theta2*x_data[j,1] + theta0) - y_data[j]) theta1_grad += (1/m) * x_data[j,0] * ((theta1 * x_data[j,0] + theta2*x_data[j,1] + theta0) - y_data[j]) theta2_grad += (1/m) * x_data[j,1] * ((theta1 * x_data[j,0] + theta2*x_data[j,1] + theta0) - y_data[j]) # 更新b和k theta0 -= lr*theta0_grad theta1 -= lr*theta1_grad theta2 -= lr*theta2_grad return theta0, theta1, theta2 print("Starting theta0 = {0}, theta1 = {1}, theta2 = {2}, error = {3}". format(theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data))) print("Running...") theta0, theta1, theta2 = gradient_descent_runner(x_data, y_data, theta0, theta1, theta2, lr, epochs) print("After {0} iterations theta0 = {1}, theta1 = {2}, theta2 = {3}, error = {4}". format(epochs, theta0, theta1, theta2, compute_error(theta0, theta1, theta2, x_data, y_data))) # #plt.figure().add_subplot和plt.subplot的作用是一致的 # ax = Axes3D(plt.figure())#和下面的代码功能一样 ax = plt.figure().add_subplot(111, projection='3d') ax.scatter(x_data[:, 0], x_data[:, 1], y_data, c='r', marker='o', s=100) # 点为红色三角形 x0 = x_data[:, 0] x1 = x_data[:, 1] # 生成网格矩阵 x0, x1 = np.meshgrid(x0, x1)#生成一个网格矩阵,矩阵的每个点的第一个轴的取值来自于x0范围内,第二个坐标轴的取值来自于x1范围内 z = theta0 + x0 * theta1 + x1 * theta2 # 画3D图 ax.plot_surface(x0, x1, z) # 设置坐标轴 ax.set_xlabel('Miles') ax.set_ylabel('Num of Deliveries') ax.set_zlabel('Time') # 显示图像 plt.show()
使用sklearn的多元线性回归
import numpy as np from numpy import genfromtxt from sklearn import linear_model import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # sklearn里面用的是标准方程法,不是最小二乘,所以用sklearn的结果与multi_liner不一样 # 读入数据 data = genfromtxt(r"D:\2019年工作资料\光伏相关文档\源码资料\回归分类数据\数据\Delivery.csv",delimiter=',') print(data) # 切分数据 x_data = data[:,:-1] y_data = data[:,-1] print(x_data) print(y_data) # 创建模型 model = linear_model.LinearRegression() model.fit(x_data, y_data) # 系数 print("coefficients:",model.coef_) # 截距 print("intercept:",model.intercept_) # 测试 x_test = [[102, 4]] predict = model.predict(x_test) print("predict:", predict) ax = plt.figure().add_subplot(111, projection='3d') ax.scatter(x_data[:, 0], x_data[:, 1], y_data, c='r', marker='o', s=100) # 点为红色三角形 x0 = x_data[:, 0] x1 = x_data[:, 1] # 生成网格矩阵 x0, x1 = np.meshgrid(x0, x1) z = model.intercept_ + x0*model.coef_[0] + x1*model.coef_[1] # 画3D图 ax.plot_surface(x0, x1, z)#参数是二维的,而model.prodict(x_data)是一维的。 # 设置坐标轴 ax.set_xlabel('Miles') ax.set_ylabel('Num of Deliveries') ax.set_zlabel('Time') # 显示图像 plt.show()
最后的多项式
import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import PolynomialFeatures#多项式 from sklearn.linear_model import LinearRegression # 载入数据 data = np.genfromtxt(r"job.csv", delimiter=",") x_data = data[1:,1] y_data = data[1:,2] plt.scatter(x_data,y_data) plt.show() x_data x_data = x_data[:,np.newaxis] y_data = y_data[:,np.newaxis] x_data # 创建并拟合模型 model = LinearRegression() model.fit(x_data, y_data) # 画图 plt.plot(x_data, y_data, 'b.') plt.plot(x_data, model.predict(x_data), 'r') plt.show() # 定义多项式回归,degree的值可以调节多项式的特征 poly_reg = PolynomialFeatures(degree=5) # 特征处理 x_poly = poly_reg.fit_transform(x_data) # 定义回归模型 lin_reg = LinearRegression() # 训练模型 lin_reg.fit(x_poly, y_data) # 画图 plt.plot(x_data, y_data, 'b.') plt.plot(x_data, lin_reg.predict(poly_reg.fit_transform(x_data)), c='r') plt.title('Truth or Bluff (Polynomial Regression)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show() # 画图 plt.plot(x_data, y_data, 'b.') x_test = np.linspace(1,10,100) x_test = x_test[:,np.newaxis] plt.plot(x_test, lin_reg.predict(poly_reg.fit_transform(x_test)), c='r') plt.title('Truth or Bluff (Polynomial Regression)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show()
标签:theta2,theta0,plt,theta1,覃秉丰,线性,model,data,源码 来源: https://www.cnblogs.com/ray-blog/p/12173498.html