【发布时间】:2021-01-14 22:46:58
【问题描述】:
我有一个输出长度为 2 的向量的网络。我的目标是 1 或 0 的形式,指的是两个可能的类别。获得损失的最佳方法是什么 - 即我应该将目标转换为例如 2 维向量,还是应该转换网络的输出,例如取最大数的位置作为输出?
我的网络如下所示:
class LSTMClassifier(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.lstm1 = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.lstm2 = nn.LSTM(hidden_dim, hidden_dim, layer_dim, batch_first=True)
self.fc1 = nn.Linear(hidden_dim, 32)
self.fc2 = nn.Linear(32, 1)
self.dropout = nn.Dropout(p=0.2)
self.batch_normalisation1 = nn.BatchNorm1d(layer_dim)
self.batch_normalisation2 = nn.BatchNorm1d(2)
self.activation = nn.Softmax(dim=2)
def forward(self, x):
h0, c0 = self.init_hidden(x)
out, (hn1, cn1) = self.lstm1(x, (h0, c0))
out = self.dropout(out,)
out = self.batch_normalisation1(out)
h1, c1 = self.init_hidden(out)
out, (hn2, cn2) = self.lstm2(out, (h1, c1))
out = self.dropout(out)
out = self.batch_normalisation1(out)
h2, c2 = self.init_hidden(out)
out, (hn3, cn3) = self.lstm2(out, (h2, c2))
out = self.dropout(out)
out = self.batch_normalisation1(out)
out = self.fc1(out[:, -1, :])
out = self.dropout(out)
out = self.fc2(out)
return out
def init_hidden(self, x):
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
return [t for t in (h0, c0)]
def pred(self, x):
out = self(x)
return out > 0
这个网络的输入示例是:
tensor([[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
[2.3597e-04, 1.1507e-02, 8.7719e-02, 6.1093e-02, 9.5556e-01],
[2.1474e-03, 5.3805e-03, 9.6491e-02, 2.2508e-01, 8.2222e-01]]])
形状为torch.Size([1, 3, 5])。目标当前是1 或0。但是,网络输出一个向量,例如:
tensor([[0.5293, 0.4707]], grad_fn=<SoftmaxBackward>)
在这些目标和网络输出之间设置损失的最佳方法是什么?
更新:
我现在可以按照答案中的建议训练模型:
model = LSTMClassifier(5, 128, 3, 1)
Epochs = 10
batch_size = 32
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-6)
for epoch in range(Epochs):
if epoch == 0:
accurate = 0
for X_instance, y_instance in zip(val_x, val_y):
if int(y_instance) == 1 and model.pred(X_instance.view(-1, 3, 5)).item():
accurate += 1
print(f"Untrained accuracy test set: {accurate/len(val_x)}")
print(f"Epoch {epoch + 1}")
for n, (X, y) in enumerate(train_batches):
model.train()
optimizer.zero_grad()
y_pred = model(X)
loss = criterion(y_pred, y)
loss.backward()
optimizer.step()
model.eval()
accurate = 0
for X_instance, y_instance in zip(val_x, val_y):
if int(y_instance) == 1 and model.pred(X_instance.view(-1, 3, 5)).item():
accurate += 1
print(f"Accuracy test set: {accurate/len(val_x)}")
【问题讨论】: