11 从零实现线性回归

%matplotlib inline
import random
import torch
from d2l import torch as d2l

1.生成数据集

def synthetic_data(w,b,num_examples):
    #生成y=Xw+b+噪声
    x = torch.normal(0,1,(num_examples,len(w)))#mean:0 标准差:1 shape:(1000,2)
    y = torch.matmul(x, w) + b #二维矩阵相乘 维数更高把最后2维做矩阵相乘 其他维度对应位置相乘
    y += torch.normal(0,0.01,y.shape) #添加噪声 标准差为0.01
    return x,y.reshape((-1,1))
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
features.shape, labels.shape

(torch.Size([1000, 2]), torch.Size([1000, 1]))

#画图

d2l.set_figsize()
d2l.plt.scatter(features[:,(1)].detach().numpy(),labels.detach().numpy(),1)

<matplotlib.collections.PathCollection at 0x1d9864c36a0>

在这里插入图片描述

2.读取数据集

def data_iter(batch_size,features,labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)#随机索引列表
    for  i in range(0,num_examples,batch_size):
        batch_indices = torch.tensor(indices[i:min(i+batch_size,num_examples)])#min: i+batch_size可能会超过数据集大小
        yield features[batch_indices],labels[batch_indices]

batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[ 1.5946,  0.6724],
        [-0.1018, -0.2454],
        [-0.7724, -1.8857],
        [ 0.5850, -2.3932],
        [-0.9178,  2.3138],
        [ 0.1072,  2.1181],
        [ 0.3176, -2.1640],
        [ 1.1418,  0.3118],
        [ 1.3866, -1.2852],
        [-0.1227, -0.4315]]) 
 tensor([[ 5.0937],
        [ 4.8374],
        [ 9.0904],
        [13.5061],
        [-5.4900],
        [-2.7859],
        [12.1854],
        [ 5.4120],
        [11.3396],
        [ 5.4209]])

3.初始化模型参数

w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)

4.定义模型

y= $w_{1}x+b$

def linreg(X, w, b):  #@save
    """线性回归模型"""
    return torch.matmul(X, w) + b

5.损失函数

def squared_loss(y_hat,y):
    return (y_hat - y.reshape(y_hat.shape))**2/2 # /2 求导方便

6.优化算法

def sgd(params,lr,batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr*param.grad /batch_size 
            param.grad.zero_() #梯度清零

7.训练

lr = 0.03
num_epochs=5
net = linreg
loss = squared_loss
def train(net,loss,sgd,lr,batch_size,num_epochs):
    for epoch in range(num_epochs):
        for x,y in data_iter(batch_size,features,labels):
            l = loss(net(x,w,b),y) #X和y的小批量损失
            # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
            # 并以此计算关于[w,b]的梯度
            l.sum().backward() #反向传播
            sgd([w,b],lr,batch_size) # 使用参数的梯度更新参数
        with torch.no_grad():
            train_l = loss(net(features, w, b), labels)
            print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
train(net,loss,sgd,lr,batch_size,num_epochs)
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')

epoch 1, loss 0.039063
epoch 2, loss 0.000137
epoch 3, loss 0.000052
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w的估计误差: tensor([ 0.0003, -0.0003], grad_fn=<SubBackward0>)
b的估计误差: tensor([-0.0003], grad_fn=<RsubBackward1>)

1.如果我们将权重初始化为零，会发生什么。算法仍然有效吗？

w = torch.zeros((2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
w,b

(tensor([[0.],
         [0.]], requires_grad=True),
 tensor([0.], requires_grad=True))

lr = 0.03
num_epochs=5
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
    for x,y in data_iter(batch_size,features,labels):
        l = loss(net(x,w,b),y) #X和y的小批量损失
        # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
        # 并以此计算关于[w,b]的梯度
        l.sum().backward() #反向传播
        sgd([w,b],lr,batch_size) # 使用参数的梯度更新参数
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')

epoch 1, loss 0.039145
epoch 2, loss 0.000148
epoch 3, loss 0.000052
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w的估计误差: tensor([ 0.0002, -0.0001], grad_fn=<SubBackward0>)
b的估计误差: tensor([-0.0003], grad_fn=<RsubBackward1>)

2.尝试使用不同的学习率，观察损失函数值下降的快慢。

w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
lr = 0.1
train(net,loss,sgd,lr,batch_size,num_epochs)

epoch 1, loss 0.000052
epoch 2, loss 0.000053
epoch 3, loss 0.000053
epoch 4, loss 0.000052
epoch 5, loss 0.000052

w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
lr = 0.001
train(net,loss,sgd,lr,batch_size,num_epochs)

epoch 1, loss 13.501316
epoch 2, loss 11.077716
epoch 3, loss 9.089215
epoch 4, loss 7.457697
epoch 5, loss 6.119079

3.如果样本个数不能被批量大小整除，data_iter函数的行为会有什么变化？

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1002)
features.shape, labels.shape

(torch.Size([1002, 2]), torch.Size([1002, 1]))

batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)

tensor([[ 1.0339, -0.3519],
        [ 0.0263, -1.9477],
        [-0.2656,  0.0769],
        [-0.8365,  0.9819],
        [ 0.0466, -1.2629],
        [ 0.3026,  1.2143],
        [ 0.0703, -1.3212],
        [-0.5305, -0.8711],
        [ 1.4451, -0.9024],
        [-1.3351, -1.4617]]) 
 tensor([[ 7.4491],
        [10.8596],
        [ 3.4007],
        [-0.8099],
        [ 8.5943],
        [ 0.6945],
        [ 8.8334],
        [ 6.0887],
        [10.1421],
        [ 6.4951]])
.......
tensor([[ 1.8936,  0.1687],
        [ 0.1139, -1.8070]]) 
 tensor([[ 7.4165],
        [10.5828]])

版权声明：本文为qq_41582779原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接和本声明。

原文链接：https://blog.csdn.net/qq_41582779/article/details/124905380