【发布时间】:2018-07-22 09:17:36
【问题描述】:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms, models
import time
import argparse
import os
batch_size = 64
train_dataset = datasets.CIFAR10(root='./data/cifar10/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.CIFAR10(root='./data/cifar10/',
train=False,
transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
class Vgg16(nn.Module):
def __init__(self, num_classes=10):
super(Vgg16, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2, dilation=1),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=2, stride=2, dilation=1)
)
self.classifier = nn.Sequential(
nn.Linear(25088, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return F.softmax(x)
model = Vgg16()
# print(model)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss().cuda()
if torch.cuda.device_count() > 0:
# os.environ["CUDA_VISIBLE_DEVICES"]= '0'
print("USE", torch.cuda.device_count(), "GPUs!")
model = nn.DataParallel(model)
else:
print("USE ONLY CPU!")
if torch.cuda.is_available():
model.cuda()
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if torch.cuda.is_available():
data, target = Variable(data.cuda()), Variable(target.cuda())
else:
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
# loss = F.nll_loss(output, target)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if torch.cuda.is_available():
data, target = Variable(data.cuda(), volatile=True), Variable(target.cuda())
else:
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0]
pred = output.data.max(1, keepdim=True)[1] # [0] : value, [1]: index
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(0, 200):
train(epoch)
test()
当我运行这段代码时..发生了这个错误。
-> RuntimeError: /pytorch/torch/lib/THC/generic/THCTensorMathBlas.cu:243 的大小不匹配
当我在 pytorch 的其他 vgg 代码中打印(模型)时,FC 层的输入大小为 25088... 于是,我尝试将这个参数设置为25088,出现大小不匹配错误。 当我将此 input_size 从 25088 更改为 512 时,没有错误,但训练效果不佳。(在训练过程中从未改变损失,在测试过程中始终有 10% 的准确率) 所以我认为这个 FC 层的输入大小是问题所在。在这种情况下我该怎么办? 提前致谢;
【问题讨论】:
-
您的数据预处理是如何完成的?我怀疑您正在向网络提供尺寸错误的图像
-
@ManuelLagunas 我只是在 pytorch 中使用 DataLoader。我上传了我的完整代码。
标签: pytorch