目录
(1)机器学习库(sklearn.linear_model)
1、概述
(1)人工智能学习

(2)机器学习


(3)有监督学习


(4)线性回归

2、线性回归
(1)实现步骤
1:根据随机初始化的 w x b 和 y 来计算 loss
2:根据当前的 w x b 和 y 的值来计算梯度
3:更新梯度,循环将新的 w′ 和 b′ 复赋给 w 和 b ,最终得到一个最优的 w′ 和 b′ 作为方程最终的
(2)数学表达式

3、代码实现(Python)
(1)机器学习库(sklearn.linear_model)

代码:
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt#用于作图
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
import numpy as np#用于创建向量
reg=linear_model.LinearRegression(fit_intercept=True,normalize=False)
x=[[32.50235],[53.4268],[61.53036],[47.47564],[59.81321],[55.14219],[52.14219],[39.29957],
[48.10504],[52.55001],[45.41873],[54.35163],[44.16405],[58.16847],[56.72721]]
y=[31.70701,68.7776,62.56238,71.54663,87.23093,78.21152,79.64197,59.17149,75.33124,71.30088,55.16568,82.47885,62.00892
,75.39287,81.43619]
reg.fit(x,y)
k=reg.coef_#获取斜率w1,w2,w3,...,wn
b=reg.intercept_#获取截距w0
x0=np.arange(30,60,0.2)
y0=k*x0+b
print("k={0},b={1}".format(k,b))
plt.scatter(x,y)
plt.plot(x0,y0,label='LinearRegression')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
结果:
k=[1.36695374],b=0.13079331831460195
(2)Python详细实现(方法1)

代码:
#方法1
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
#数据生成
data = []
for i in range(100):
x = np.random.uniform(3., 12.)
# mean=0, std=1
eps = np.random.normal(0., 1)
y = 1.677 * x + 0.039 + eps
data.append([x, y])
data = np.array(data)
#统计误差
# y = wx + b
def compute_error_for_line_given_points(b, w, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# computer mean-squared-error
totalError += (y - (w * x + b)) ** 2
# average loss for each point
return totalError / float(len(points))
#计算梯度
def step_gradient(b_current, w_current, points, learningRate):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# grad_b = 2(wx+b-y)
b_gradient += (2/N) * ((w_current * x + b_current) - y)
# grad_w = 2(wx+b-y)*x
w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
# update w'
new_b = b_current - (learningRate * b_gradient)
new_w = w_current - (learningRate * w_gradient)
return [new_b, new_w]
#迭代更新
def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
b = starting_b
w = starting_w
# update for several times
for i in range(num_iterations):
b, w = step_gradient(b, w, np.array(points), learning_rate)
return [b, w]
def main():
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_w = 0 # initial slope guess
num_iterations = 1000
print("迭代前 b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given_points(initial_b, initial_w, data))
)
print("Running...")
[b, w] = gradient_descent_runner(data, initial_b, initial_w, learning_rate, num_iterations)
print("第 {0} 次迭代结果 b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
compute_error_for_line_given_points(b, w, data))
)
plt.plot(data[:,0],data[:,1], color='b', marker='+', linestyle='--',label='true')
plt.plot(data[:,0],w*data[:,0]+b,color='r',label='predict')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
if __name__ == '__main__':
main()
结果:

迭代前 b = 0, w = 0, error = 186.61000821356697
Running...
第 1000 次迭代结果 b = 0.20558501549252192, w = 1.6589067569038516, error = 0.9963685680112963
(3)Python详细实现(方法2)
代码:
#方法2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams["font.sans-serif"]=["SimHei"]
mpl.rcParams["axes.unicode_minus"]=False
# y = wx + b
#Import data
file=pd.read_csv("data.csv")
def compute_error_for_line_given(b, w):
totalError = np.sum((file['y']-(w*file['x']+b))**2)
return np.mean(totalError)
def step_gradient(b_current, w_current, learningRate):
b_gradient = 0
w_gradient = 0
N = float(len(file['x']))
for i in range (0,len(file['x'])):
# grad_b = 2(wx+b-y)
b_gradient += (2 / N) * ((w_current * file['x'] + b_current) - file['y'])
# grad_w = 2(wx+b-y)*x
w_gradient += (2 / N) * file['x'] * ((w_current * file['x'] + b_current) - file['x'])
# update w'
new_b = b_current - (learningRate * b_gradient)
new_w = w_current - (learningRate * w_gradient)
return [new_b, new_w]
def gradient_descent_runner( starting_b, starting_w, learning_rate, num_iterations):
b = starting_b
w = starting_w
# update for several times
for i in range(num_iterations):
b, w = step_gradient(b, w, learning_rate)
return [b, w]
def main():
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_w = 0 # initial slope guess
num_iterations = 100
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given(initial_b, initial_w))
)
print("Running...")
[b, w] = gradient_descent_runner(initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
compute_error_for_line_given(b, w))
)
plt.plot(file['x'],file['y'],'ro',label='线性回归')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
if __name__ == '__main__':
main()结果:

Starting gradient descent at b = 0, w = 0, error = 75104.71822821398
Running...
After 100 iterations b = 0 0.014845
1 0.325621
2 0.036883
3 0.502265
4 0.564917
5 0.479366
6 0.568968
7 0.422619
8 0.565073
9 0.393907
10 0.216854
11 0.580750
12 0.379350
13 0.361574
14 0.511651
dtype: float64, w = 0 0.999520
1 0.994006
2 0.999405
3 0.989645
4 0.990683
5 0.991444
6 0.989282
7 0.989573
8 0.988498
9 0.992633
10 0.995329
11 0.989490
12 0.991617
13 0.993872
14 0.991116
dtype: float64, error = 6451.5510231710905
数据:

(4)Python详细实现(方法3)
#方法3
import numpy as np
points = np.genfromtxt("data.csv", delimiter=",")
#从数据读入到返回需要两个迭代循环,第一个迭代将文件中每一行转化为一个字符串序列,
#第二个循环迭代对每个字符串序列指定合适的数据类型:
# y = wx + b
def compute_error_for_line_given_points(b, w, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# computer mean-squared-error
totalError += (y - (w * x + b)) ** 2
# average loss for each point
return totalError / float(len(points))
def step_gradient(b_current, w_current, points, learningRate):
b_gradient = 0
w_gradient = 0
N = float(len(points))
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
# grad_b = 2(wx+b-y)
b_gradient += (2 / N) * ((w_current * x + b_current) - y)
# grad_w = 2(wx+b-y)*x
w_gradient += (2 / N) * x * ((w_current * x + b_current) - y)
# update w'
new_b = b_current - (learningRate * b_gradient)
new_w = w_current - (learningRate * w_gradient)
return [new_b, new_w]
def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
b = starting_b
w = starting_w
# update for several times
for i in range(num_iterations):
b, w = step_gradient(b, w, np.array(points), learning_rate)
return [b, w]
def main():
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_w = 0 # initial slope guess
num_iterations = 1000
print("Starting gradient descent at b = {0}, w = {1}, error = {2}"
.format(initial_b, initial_w,
compute_error_for_line_given_points(initial_b, initial_w, points))
)
print("Running...")
[b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
print("After {0} iterations b = {1}, w = {2}, error = {3}".
format(num_iterations, b, w,
compute_error_for_line_given_points(b, w, points))
)
if __name__ == '__main__':
main()
4、案例——房屋与价格、尺寸
(1)代码
#1.导入包
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model
#2.加载训练数据,建立回归方程
# 取数据集(1)
datasets_X = [] #存放房屋面积
datasets_Y = [] #存放交易价格
fr = open('房价与房屋尺寸.csv','r') #读取文件,r: 以只读方式打开文件,w: 打开一个文件只用于写入。
lines = fr.readlines() #一次读取整个文件。
for line in lines: #逐行进行操作,循环遍历所有数据
items = line.strip().split(',') #去除数据文件中的逗号,strip()用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。
#split(‘ ’): 通过指定分隔符对字符串进行切片,如果参数 num 有指定值,则分隔 num+1 个子字符串。
datasets_X.append(int(items[0])) #将读取的数据转换为int型,并分别写入
datasets_Y.append(int(items[1]))
length = len(datasets_X) #求得datasets_X的长度,即为数据的总数
datasets_X = np.array(datasets_X).reshape([length,1]) #将datasets_X转化为数组,并变为1维,以符合线性回归拟合函数输入参数要求
datasets_Y = np.array(datasets_Y) #将datasets_Y转化为数组
#取数据集(2)
'''fr = pd.read_csv('房价与房屋尺寸.csv',encoding='utf-8')
datasets_X=fr['房屋面积']
datasets_Y=fr['交易价格']'''
minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX,maxX).reshape([-1,1]) #以数据datasets_X的最大值和最小值为范围,建立等差数列,方便后续画图。
#reshape([-1,1]),转换成1列,reshape([2,-1]):转换成两行
linear = linear_model.LinearRegression() #调用线性回归模块,建立回归方程,拟合数据
linear.fit(datasets_X, datasets_Y)
#3.斜率及截距
print('Coefficients:', linear.coef_) #查看回归方程系数(k)
print('intercept:', linear.intercept_) ##查看回归方程截距(b)
print('y={0}x+{1}'.format(linear.coef_,linear.intercept_)) #拟合线
# 4.图像中显示
plt.scatter(datasets_X, datasets_Y, color = 'red')
plt.plot(X, linear.predict(X), color = 'blue')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()(2)结果
Coefficients: [0.14198749]
intercept: 53.43633899175563
y=[0.14198749]x+53.43633899175563 
(3)数据
第一列是房屋面积,第二列是交易价格:
1000 168 792 184 1260 197 1262 220 1240 228 1170 248 1230 305 1255 256 1194 240 1450 230 1481 202 1475 220 1482 232 1484 460 1512 320 1680 340 1620 240 1720 368 1800 280 4400 710 4212 552 3920 580 3212 585 3151 590 3100 560 2700 285 2612 292 2705 482 2570 462 2442 352 2387 440 2292 462 2308 325 2252 298 2202 352 2157 403 2140 308 4000 795 4200 765 3900 705 3544 420 2980 402 4355 762 3150 392 3025 320 3450 350 4402 820 3454 425 890 272
5、致谢
————————————————
https://blog.csdn.net/m0_55740904/article/details/114363666
版权声明:本文为weixin_46039719原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。


