【问题标题】:Gradient is None is PytorchGradient is None 是 Pytorch
【发布时间】:2020-02-21 02:32:02
【问题描述】:

对于下面的代码,我想通过get_grads()得到渐变,但是输出总是None,这是什么原因?

import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

X = torch.randn(100, 1)*10  #random with 100rows*1col, *10 to make it larger (original is small variance around 0)
y = X + 3*torch.randn(100, 1) #3*torch.randn(100, 1), it's the noise
plt.plot(X.numpy(), y.numpy(), 'o')
plt.ylabel('y')
plt.xlabel('x')

class LR(nn.Module):
  def __init__(self, input_size, output_size):
    super().__init__()
    self.linear = nn.Linear(input_size, output_size)
  def forward(self, x):
    pred = self.linear(x)
    return pred

torch.manual_seed(1)
model = LR(1, 1) 

[w, b] = model.parameters()
def get_params():
  return (w[0][0].item(), b[0].item())
def get_grads():
  return (w[0][0].grad, b[0].grad)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)


epochs = 30
losses = []
for i in range(epochs):
  y_pred = model.forward(X)
  loss = criterion(y_pred, y)
  print("epoch:", i, "loss:", loss.item())

  losses.append(loss)
  optimizer.zero_grad() #clear gradient for parameter]
  print("model=" + str(get_params())+str(get_grads()))
  loss.backward() #calculate gradient for parameters
  print("model2=" + str(get_params())+str(get_grads()))
  optimizer.step() #update the parameters with gradient
  print("model3=" + str(get_params())+str(get_grads()))
  print()

输出

epoch: 0 loss: 31.3035831451416
model=(0.5152631998062134, -0.44137823581695557)(None, None)
model2=(0.5152631998062134, -0.44137823581695557)(None, None)
model3=(1.447475790977478, -0.4449453055858612)(None, None)

epoch: 1 loss: 31.142377853393555
model=(1.447475790977478, -0.4449453055858612)(None, None)
model2=(1.447475790977478, -0.4449453055858612)(None, None)
model3=(0.5188075304031372, -0.41897052526474)(None, None)

epoch: 2 loss: 30.982675552368164
model=(0.5188075304031372, -0.41897052526474)(None, None)
model2=(0.5188075304031372, -0.41897052526474)(None, None)
model3=(1.4446537494659424, -0.42287370562553406)(None, None)

epoch: 3 loss: 30.824453353881836
model=(1.4446537494659424, -0.42287370562553406)(None, None)
model2=(1.4446537494659424, -0.42287370562553406)(None, None)
model3=(0.5223162770271301, -0.39742958545684814)(None, None)

epoch: 4 loss: 30.667678833007812
model=(0.5223162770271301, -0.39742958545684814)(None, None)
model2=(0.5223162770271301, -0.39742958545684814)(None, None)
model3=(1.4418396949768066, -0.40165263414382935)(None, None)

epoch: 5 loss: 30.51233673095703
model=(1.4418396949768066, -0.40165263414382935)(None, None)
model2=(1.4418396949768066, -0.40165263414382935)(None, None)
model3=(0.5257899761199951, -0.37672188878059387)(None, None)

epoch: 6 loss: 30.358415603637695
model=(0.5257899761199951, -0.37672188878059387)(None, None)
model2=(0.5257899761199951, -0.37672188878059387)(None, None)
model3=(1.4390342235565186, -0.3812492787837982)(None, None)

epoch: 7 loss: 30.205856323242188
model=(1.4390342235565186, -0.3812492787837982)(None, None)
model2=(1.4390342235565186, -0.3812492787837982)(None, None)
model3=(0.5292295813560486, -0.35681530833244324)(None, None)

epoch: 8 loss: 30.054668426513672
model=(0.5292295813560486, -0.35681530833244324)(None, None)
model2=(0.5292295813560486, -0.35681530833244324)(None, None)
model3=(1.4362375736236572, -0.3616321086883545)(None, None)

epoch: 9 loss: 29.904821395874023
model=(1.4362375736236572, -0.3616321086883545)(None, None)
model2=(1.4362375736236572, -0.3616321086883545)(None, None)
model3=(0.532635509967804, -0.3376788794994354)(None, None)

epoch: 10 loss: 29.756284713745117
model=(0.532635509967804, -0.3376788794994354)(None, None)
model2=(0.532635509967804, -0.3376788794994354)(None, None)
model3=(1.4334499835968018, -0.3427707254886627)(None, None)

epoch: 11 loss: 29.609052658081055
model=(1.4334499835968018, -0.3427707254886627)(None, None)
model2=(1.4334499835968018, -0.3427707254886627)(None, None)
model3=(0.5360085964202881, -0.3192828297615051)(None, None)
...

【问题讨论】:

    标签: python pytorch


    【解决方案1】:

    您需要直接以w.gradb.grad 获取渐变,而不是w[0][0].grad,如下所示:

    def get_grads():
      return (w.grad, b.grad)
    

    或者你也可以直接在训练循环中使用参数的名称来打印它的梯度:

    print(model.linear.weight.grad)
    print(model.linear.bias.grad)
    

    【讨论】:

      【解决方案2】:

      只需使用.data.grad 属性来获取您的值,但最好使用clonedetach 安全地提取它们,如下所示:

      代码:

      [w, b] = model.parameters()
      
      
      def get_params():
          return (w.data.clone().detach().item(), b.data.clone().detach().item())
      
      
      def get_grads():
          # if grads are not None safely extract them:
          if w.grad and b.grad:
              return (w.grad.clone().detach().item(), b.grad.clone().detach().item())
          else:
              return (w.grad, b.grad)
      

      输出:

      所以我们期待Nones 在第一个 backward() 之前 -> 在 backward() 之后计算毕业生 -> 在 optimizer.step() 之后相同 -> 在 .zero_grad() 之后为零:

      epoch: 0 loss: 36.44832992553711
      model=(0.5152631998062134, -0.44137823581695557)(None, None)
      model2=(0.5152631998062134, -0.44137823581695557)(-108.91305541992188, 0.6554011106491089)
      model3=(1.604393720626831, -0.44793224334716797)(-108.91305541992188, 0.6554011106491089)
      
      epoch: 1 loss: 43.94552230834961
      model=(1.604393720626831, -0.44793224334716797)(tensor([[0.]]), tensor([0.]))
      model2=(1.604393720626831, -0.44793224334716797)(122.66950988769531, -2.455918073654175)
      model3=(0.3776986598968506, -0.4233730733394623)(122.66950988769531, -2.455918073654175)
      

      【讨论】:

        猜你喜欢
        • 2022-08-06
        • 2018-11-29
        • 1970-01-01
        • 2021-09-04
        • 2021-06-29
        • 2011-02-12
        • 2019-08-12
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多