%matplotlib inline
import random
import torch
from d2l import torch as d2l
1.生成数据集
def synthetic_data(w,b,num_examples):
#生成y=Xw+b+噪声
x = torch.normal(0,1,(num_examples,len(w)))#mean:0 标准差:1 shape:(1000,2)
y = torch.matmul(x, w) + b #二维矩阵相乘 维数更高把最后2维做矩阵相乘 其他维度对应位置相乘
y += torch.normal(0,0.01,y.shape) #添加噪声 标准差为0.01
return x,y.reshape((-1,1))
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
features.shape, labels.shape
(torch.Size([1000, 2]), torch.Size([1000, 1]))
#画图
d2l.set_figsize()
d2l.plt.scatter(features[:,(1)].detach().numpy(),labels.detach().numpy(),1)
<matplotlib.collections.PathCollection at 0x1d9864c36a0>
2.读取数据集
def data_iter(batch_size,features,labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)#随机索引列表
for i in range(0,num_examples,batch_size):
batch_indices = torch.tensor(indices[i:min(i+batch_size,num_examples)])#min: i+batch_size可能会超过数据集大小
yield features[batch_indices],labels[batch_indices]
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y)
break
tensor([[ 1.5946, 0.6724],
[-0.1018, -0.2454],
[-0.7724, -1.8857],
[ 0.5850, -2.3932],
[-0.9178, 2.3138],
[ 0.1072, 2.1181],
[ 0.3176, -2.1640],
[ 1.1418, 0.3118],
[ 1.3866, -1.2852],
[-0.1227, -0.4315]])
tensor([[ 5.0937],
[ 4.8374],
[ 9.0904],
[13.5061],
[-5.4900],
[-2.7859],
[12.1854],
[ 5.4120],
[11.3396],
[ 5.4209]])
3.初始化模型参数
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
4.定义模型
y=w 1 x + b w_{1}x+bw1x+b
def linreg(X, w, b): #@save
"""线性回归模型"""
return torch.matmul(X, w) + b
5.损失函数
def squared_loss(y_hat,y):
return (y_hat - y.reshape(y_hat.shape))**2/2 # /2 求导方便
6.优化算法
def sgd(params,lr,batch_size):
with torch.no_grad():
for param in params:
param -= lr*param.grad /batch_size
param.grad.zero_() #梯度清零
7.训练
lr = 0.03
num_epochs=5
net = linreg
loss = squared_loss
def train(net,loss,sgd,lr,batch_size,num_epochs):
for epoch in range(num_epochs):
for x,y in data_iter(batch_size,features,labels):
l = loss(net(x,w,b),y) #X和y的小批量损失
# 因为l形状是(batch_size,1),而不是一个标量。l中的所有元素被加到一起,
# 并以此计算关于[w,b]的梯度
l.sum().backward() #反向传播
sgd([w,b],lr,batch_size) # 使用参数的梯度更新参数
with torch.no_grad():
train_l = loss(net(features, w, b), labels)
print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
train(net,loss,sgd,lr,batch_size,num_epochs)
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')
epoch 1, loss 0.039063
epoch 2, loss 0.000137
epoch 3, loss 0.000052
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w的估计误差: tensor([ 0.0003, -0.0003], grad_fn=<SubBackward0>)
b的估计误差: tensor([-0.0003], grad_fn=<RsubBackward1>)
1.如果我们将权重初始化为零,会发生什么。算法仍然有效吗?
w = torch.zeros((2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
w,b
(tensor([[0.],
[0.]], requires_grad=True),
tensor([0.], requires_grad=True))
lr = 0.03
num_epochs=5
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
for x,y in data_iter(batch_size,features,labels):
l = loss(net(x,w,b),y) #X和y的小批量损失
# 因为l形状是(batch_size,1),而不是一个标量。l中的所有元素被加到一起,
# 并以此计算关于[w,b]的梯度
l.sum().backward() #反向传播
sgd([w,b],lr,batch_size) # 使用参数的梯度更新参数
with torch.no_grad():
train_l = loss(net(features, w, b), labels)
print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')
epoch 1, loss 0.039145
epoch 2, loss 0.000148
epoch 3, loss 0.000052
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w的估计误差: tensor([ 0.0002, -0.0001], grad_fn=<SubBackward0>)
b的估计误差: tensor([-0.0003], grad_fn=<RsubBackward1>)
2.尝试使用不同的学习率,观察损失函数值下降的快慢。
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
lr = 0.1
train(net,loss,sgd,lr,batch_size,num_epochs)
epoch 1, loss 0.000052
epoch 2, loss 0.000053
epoch 3, loss 0.000053
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
lr = 0.001
train(net,loss,sgd,lr,batch_size,num_epochs)
epoch 1, loss 13.501316
epoch 2, loss 11.077716
epoch 3, loss 9.089215
epoch 4, loss 7.457697
epoch 5, loss 6.119079
3.如果样本个数不能被批量大小整除,data_iter函数的行为会有什么变化?
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1002)
features.shape, labels.shape
(torch.Size([1002, 2]), torch.Size([1002, 1]))
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y)
tensor([[ 1.0339, -0.3519],
[ 0.0263, -1.9477],
[-0.2656, 0.0769],
[-0.8365, 0.9819],
[ 0.0466, -1.2629],
[ 0.3026, 1.2143],
[ 0.0703, -1.3212],
[-0.5305, -0.8711],
[ 1.4451, -0.9024],
[-1.3351, -1.4617]])
tensor([[ 7.4491],
[10.8596],
[ 3.4007],
[-0.8099],
[ 8.5943],
[ 0.6945],
[ 8.8334],
[ 6.0887],
[10.1421],
[ 6.4951]])
.......
tensor([[ 1.8936, 0.1687],
[ 0.1139, -1.8070]])
tensor([[ 7.4165],
[10.5828]])
版权声明:本文为qq_41582779原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。