mihara

Cats vs Dogs based on VGG16

1.准备数据集

from google.colab import drive
drive.mount(\'/content/drive\')
%cd /content/drive/My Drive
!unzip cat_dog.zip

2.数据预处理

#先为数据集划分合理的文件存储结构
import shutil
import os

os.mkdir(\'train_/cat/\')
os.mkdir(\'train_/dog/\')

for f in os.listdir(\'cat_dog/train\'):
    if f.split(\'_\')[0] == \'cat\':
        shutil.move(\'cat_dog/train/\'+f,\'train_/cat/\'+f)
    else:
        shutil.move(\'cat_dog/train/\'+f,\'train_/dog/\'+f)
        
train_dir = \'train_\'
# 对训练集做数据扩充
train_transform = transforms.Compose([
  transforms.Resize(256),
  transforms.CenterCrop(224),
  transforms.RandomRotation(30),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_dataset = datasets.ImageFolder(train_dir,transform=train_transform)

train_dataloader = torch.utils.data.DataLoader(
  train_dataset,  
  batch_size = 128,
  shuffle = True
)

查看图片

import matplotlib.pyplot as plt

def imshow(image, ax=None, title=None, normalize=True):
    """Imshow for Tensor."""
    if ax is None:
        fig, ax = plt.subplots()
    image = image.numpy().transpose((1, 2, 0))

    if normalize:
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = std * image + mean
        image = np.clip(image, 0, 1)

    ax.imshow(image)
    ax.spines[\'top\'].set_visible(False)
    ax.spines[\'right\'].set_visible(False)
    ax.spines[\'left\'].set_visible(False)
    ax.spines[\'bottom\'].set_visible(False)
    ax.tick_params(axis=\'both\', length=0)
    ax.set_xticklabels(\'\')
    ax.set_yticklabels(\'\')

    return ax



images, labels = next(iter(train_dataloader))

title = \'Dog\' if labels[0].item() == 1 else \'Cat\'
imshow(images[0])

3.训练模型

#先获取预训练好的VGG16
model = models.vgg16(pretrained=True)
model

冻结特征提取部分的参数

for param in model.parameters():
  param.requires_grad = False

修改自己的分类器

from collections import OrderedDict

#修改2个层
classifier = nn.Sequential(OrderedDict([
  # Layer 1
  (\'dropout1\',nn.Dropout(0.3)),
  (\'fc1\', nn.Linear(25088,500)),
  (\'relu\', nn.ReLU()),
  # output layer
  (\'fc2\', nn.Linear(500,2)),
  (\'output\', nn.LogSoftmax(dim=1))
]))

model.classifier = classifier

# 损失函数选取NLLLoss()
criterion = nn.NLLLoss()

# 优化器采用Adam
optimizer = optim.Adam(model.classifier.parameters(),lr =0.001)

model = model.to(device)

迭代训练

from tqdm import tqdm
epochs = 5


for e in range(epochs):
  running_loss, total, correct = 0, 0 , 0

  model.train()
    
  for images,labels in tqdm(train_dataloader):
    
    # Moving input to GPU
    images, labels = images.to(device), labels.to(device)

    # Forward prop
    outputs = model(images)
    loss = criterion(outputs,labels)

    # Backward prop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Metrics 
    running_loss += loss.item()
    total += labels.size(0)

    _, predicted = torch.max(torch.exp(outputs).data,1)
    correct += (predicted == labels).sum().item()
  else:
    # Logs 
    print(f\'Epoch {e} Training: Loss={running_loss:.5f} Acc={correct/total * 100:.2f}\')

4.保存模型

checkpoints = {
     \'pre-trained\':\'vgg16\',
     \'classifier\':nn.Sequential(OrderedDict([
          # 修改的Layer 1
          (\'dropout1\',nn.Dropout(0.3)),
          (\'fc1\', nn.Linear(25088,500)),
          (\'relu\', nn.ReLU()),
          # 修改的output layer
          (\'fc2\', nn.Linear(500,2)),
          (\'output\', nn.LogSoftmax(dim=1))
    ])),
    \'state_dict\':model.state_dict()
}

torch.save(checkpoints,\'vgg16_catsVdogs.pth\')
def load_saved_model(path):
    checkpoint = torch.load(path)
    
    model = models.vgg16(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.classifier = checkpoint[\'classifier\']
    model.load_state_dict(checkpoints[\'state_dict\'])
    model.eval()
    return model
loaded_model = load_saved_model(\'vgg16_catsVdogs.pth\')
loaded_model.to(device)

5.测试模型

test_transform = transforms.Compose([
          transforms.Resize(256),
          transforms.CenterCrop(224),
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

predictions = []

for i in tqdm(range(0,2000)):
    path = \'catdog/test/\'+str(i)+\'.jpg\'
    X = Image.open(path).convert(\'RGB\')
    X = test_transform(X)[:3,:,:]
    X = X.unsqueeze(0)
    X = X.to(device)
    outputs = loaded_model(X)
    predictions.append(torch.argmax(outputs).item())

6.生成结果

# 对测试集的2000图片预测
data = {\'id\':list(range(0,2000)),\'label\':predictions}
df = pd.DataFrame(data)
df.to_csv(\'cats-dogs-submission.csv\',index=False)

7.总结与结果分析

  • 仅仅训练最后一层时,得到的准确率为96.5
  • 多训练一层参数时(Layer1),得到的准确率为97.45
  • 加入数据扩增方法(对原图片进行水平竖直平移、翻转、缩放)、换用损失函数$NLLLoss$、增加迭代epoch为5后得到的准确率为98.45

分类:

技术点:

相关文章:

  • 2021-10-15
  • 2021-10-15
  • 2021-10-15
  • 2021-10-15
  • 2021-04-22
  • 2021-08-03
  • 2022-01-02
  • 2021-10-15
猜你喜欢
  • 2021-10-15
  • 2021-10-15
  • 2021-10-15
  • 2021-10-25
  • 2021-10-15
相关资源
相似解决方案