我想在使用 PyTorch 执行计算时使用 GPU 而不是 CPU答案

【问题标题】：I want to use the GPU instead of CPU while performing computations using PyTorch我想在使用 PyTorch 执行计算时使用 GPU 而不是 CPU
【发布时间】：2020-06-06 16:53:58
【问题描述】：

我正在尝试将压力从 CPU 转移到 GPU，因为我信赖的 RTX2070 可以比 CPU 做得更好，但我一直遇到这个问题，而且我对 AI 还很陌生，所以如果你愿意分享一些就任何潜在的解决方案向我提出见解，我们将不胜感激，谢谢。

**我正在使用 PyTorch

这是我正在使用的代码：

# to measure run-time

# for csv dataset
import os
# to shuffle data
import random
# to get the alphabet
import string

# import statements for iterating over csv file
import cv2
# for plotting
import matplotlib.pyplot as plt
import numpy as np
# pytorch stuff
import torch
import torch.nn as nn
from PIL import Image

# generate the targets 
# the targets are one hot encoding vectors
# print(torch.cuda.is_available())
nvcc_args = [
    '-gencode', 'arch=compute_30,code=sm_30',
    '-gencode', 'arch=compute_35,code=sm_35',
    '-gencode', 'arch=compute_37,code=sm_37',
    '-gencode', 'arch=compute_50,code=sm_50',
    '-gencode', 'arch=compute_52,code=sm_52',
    '-gencode', 'arch=compute_60,code=sm_60',
    '-gencode', 'arch=compute_61,code=sm_61',
    '-gencode', 'arch=compute_70,code=sm_70',
    '-gencode', 'arch=compute_75,code=sm_75'
]

alphabet = list(string.ascii_lowercase)
target = {}

# Initalize a target dict that has letters as its keys and empty one-hot encoding vectors of size 37 as its values
for letter in alphabet:
    target[letter] = [0] * 37

# Do the one-hot encoding for each letter now 
curr_pos = 0
for curr_letter in target.keys():
    target[curr_letter][curr_pos] = 1
    curr_pos += 1

# extra symbols 
symbols = ["space", "number", "period", "comma", "colon", "apostrophe", "hyphen", "semicolon", "question",
           "exclamation", "capitalize"]

# create vectors
for curr_symbol in symbols:
    target[curr_symbol] = [0] * 37

# create one-hot encoding vectors
for curr_symbol in symbols:
    target[curr_symbol][curr_pos] = 1
    curr_pos += 1

# collect all data from the csv file
data = []

for tgt in os.listdir("dataset"):
    if not tgt == ".DS_Store":
        for folder in os.listdir("dataset/" + tgt + "/Uploaded"):
            if not folder == ".DS_Store":
                for filename in os.listdir("dataset/" + tgt + "/Uploaded/" + folder):
                    if not filename == ".DS_Store":
                        # store the image and label
                        picture = []
                        curr_target = target[tgt]
                        image = Image.open("dataset/" + tgt + "/Uploaded/" + folder + "/" + filename)
                        image = image.convert('RGB')
                        # f.show()
                        image = np.array(image)
                        # resize image to 28x28x3
                        image = cv2.resize(image, (28, 28))
                        # normalize to 0-1
                        image = image.astype(np.float32) / 255.0
                        image = torch.from_numpy(image)
                        picture.append(image)
                        # convert the target to a long tensor
                        curr_target = torch.Tensor([curr_target])
                        picture.append(curr_target)
                        # append the current image & target
                        data.append(picture)

# create a dictionary of all the characters 
characters = alphabet + symbols

index2char = {}
number = 0
for char in characters:
    index2char[number] = char
    number += 1


# find the number of each character in a dataset
def num_chars(dataset, index2char):
    chars = {}
    for _, label in dataset:
        char = index2char[int(torch.argmax(label))]
        # update
        if char in chars:
            chars[char] += 1
        # initialize
        else:
            chars[char] = 1
    return chars


# Create dataloader objects

# shuffle all the data
random.shuffle(data)

# batch sizes for train, test, and validation
batch_size_train = 30
batch_size_test = 30
batch_size_validation = 30

# splitting data to get training, test, and validation sets
# change once get more data
# 1600 for train
train_dataset = data[:22000]
# test has 212
test_dataset = data[22000:24400]
# validation has 212
validation_dataset = data[24400:]

# create the dataloader objects
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size_test, shuffle=False)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=batch_size_validation,
                                                shuffle=True)

# to check if a dataset is missing a char
test_chars = num_chars(test_dataset, index2char)

num = 0
for char in characters:
    if char in test_chars:
        num += 1
    else:
        break
print(num)


class CNN(nn.Module):
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        super(CNN, self).__init__()
        self.block1 = nn.Sequential(
            # 3x28x28
            nn.Conv2d(in_channels=3,
                      out_channels=16,
                      kernel_size=5,
                      stride=1,
                      padding=2),
            # batch normalization
            # nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True), 
            # 16x28x28
            nn.MaxPool2d(kernel_size=2),
            # 16x14x14
            nn.LeakyReLU()
        )
        # 16x14x14
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels=16,
                      out_channels=32,
                      kernel_size=5,
                      stride=1,
                      padding=2),
            # batch normalization
            # nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True), 
            # 32x14x14
            nn.MaxPool2d(kernel_size=2),
            # 32x7x7
            nn.LeakyReLU()
        )
        # linearly 
        self.block3 = nn.Sequential(
            nn.Linear(32 * 7 * 7, 100),
            # batch normalization
            # nn.BatchNorm1d(100),
            nn.LeakyReLU(),
            nn.Linear(100, 37)
        )
        # 1x37

    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        # flatten the dataset
        out = out.view(-1, 32 * 7 * 7)
        out = self.block3(out)
        return out


# convolutional neural network model
model = CNN()
model.cuda()


# print summary of the neural network model to check if everything is fine. 
print(model)
print("# parameter: ", sum([param.nelement() for param in model.parameters()]))

# setting the learning rate
learning_rate = 1e-4

# Using a variable to store the cross entropy method
criterion = nn.CrossEntropyLoss()

# Using a variable to store the optimizer 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# list of all train_losses 
train_losses = []

# list of all validation losses 
validation_losses = []

# for loop that iterates over all the epochs
num_epochs = 20
for epoch in range(num_epochs):

    # variables to store/keep track of the loss and number of iterations
    train_loss = 0
    num_iter_train = 0

    # train the model
    model.train()

    # Iterate over train_loader
    for i, (images, labels) in enumerate(train_loader):
        # need to permute so that the images are of size 3x28x28 
        # essential to be able to feed images into the model
        images = images.permute(0, 3, 1, 2)

        # Zero the gradient buffer
        # resets the gradient after each epoch so that the gradients don't add up
        optimizer.zero_grad()

        # Forward, get output
        outputs = model(images)

        # convert the labels from one hot encoding vectors into integer values 
        labels = labels.view(-1, 37)
        y_true = torch.argmax(labels, 1)

        # calculate training loss
        loss = criterion(outputs, y_true)

        # Backward (computes all the gradients)
        loss.backward()

        # Optimize
        # loops through all parameters and updates weights by using the gradients 
        # takes steps backwards to optimize (to reach the minimum weight)
        optimizer.step()
        # update the training loss and number of iterations
        train_loss += loss.data
        num_iter_train += 1

    print('Epoch: {}'.format(epoch + 1))
    print('Training Loss: {:.4f}'.format(train_loss / num_iter_train))
    # append training loss over all the epochs
    train_losses.append(train_loss / num_iter_train)

    # evaluate the model
    model.eval()

    # variables to store/keep track of the loss and number of iterations
    validation_loss = 0
    num_iter_validation = 0

    # Iterate over validation_loader
    for i, (images, labels) in enumerate(validation_loader):
        # need to permute so that the images are of size 3x28x28 
        # essential to be able to feed images into the model
        images = images.permute(0, 3, 1, 2)

        # Forward, get output
        outputs = model(images)

        # convert the labels from one hot encoding vectors to integer values
        labels = labels.view(-1, 37)
        y_true = torch.argmax(labels, 1)

        # calculate the validation loss
        loss = criterion(outputs, y_true)

        # update the training loss and number of iterations
        validation_loss += loss.data
        num_iter_validation += 1

    print('Validation Loss: {:.4f}'.format(validation_loss / num_iter_validation))
    # append all validation_losses over all the epochs
    validation_losses.append(validation_loss / num_iter_validation)

    num_iter_test = 0
    correct = 0

    # Iterate over test_loader
    for images, labels in test_loader:
        # need to permute so that the images are of size 3x28x28 
        # essential to be able to feed images into the model
        images = images.permute(0, 3, 1, 2)

        # Forward
        outputs = model(images)

        # convert the labels from one hot encoding vectors into integer values 
        labels = labels.view(-1, 37)
        y_true = torch.argmax(labels, 1)

        # find the index of the prediction
        y_pred = torch.argmax(outputs, 1).type('torch.FloatTensor')

        # convert to FloatTensor
        y_true = y_true.type('torch.FloatTensor')

        # find the mean difference of the comparisons
        correct += torch.sum(torch.eq(y_true, y_pred).type('torch.FloatTensor'))

    print('Accuracy on the test set: {:.4f}%'.format(correct / len(test_dataset) * 100))
    print()


# learning curve function
def plot_learning_curve(train_losses, validation_losses):
    # plot the training and validation losses
    plt.ylabel('Loss')
    plt.xlabel('Number of Epochs')
    plt.plot(train_losses, label="training")
    plt.plot(validation_losses, label="validation")
    plt.legend(loc=1)


# plot the learning curve
plt.title("Learning Curve (Loss vs Number of Epochs)")
plot_learning_curve(train_losses, validation_losses)

torch.save(model.state_dict(), "model1.pth")

【问题讨论】：

您是否使用可靠的搜索引擎找到了一个库以在您的 GPU 上运行代码？
这能回答你的问题吗？ How to run python code with support of GPU
@JoshJ 感谢您的链接，但据我所知，PyTorch 通过使用 func .cuda() 支持 GPU 计算，您能更明确一点吗？

标签： python tensorflow artificial-intelligence pytorch

【解决方案1】：

我也在使用值得信赖的 RTX 2070，这就是我进行 GPU 加速的方式（对于 1 个 GPU）：

cuda_ = "cuda:0"
device = torch.device(cuda_ if torch.cuda.is_available() else "cpu")
model = CNN()
model.to(device)

这是进行 GPU 加速的最新且推荐的方法，因为它提供了更大的灵活性（即使 GPU 不可用，也无需修改代码）。您也可以通过images = images.to(device) 将图像传递到 GPU vram。

【讨论】：

您好，感谢您与我分享解决方案，只是为了确保我们在同一页面上，是否必须将图像也传输到 GPU，或者我可以只坚持模型，因为我已经尝试过您的解决方案，它产生了相同的输出，然后在尝试使用图像时也得到了以下答案： RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'target ' 调用 _thnn_nll_loss_forward
是的，在计算损失时你也应该将标签传递给设备（它期望输入和目标都在同一个设备上：cpu/gpu）。是的，加速是必要的，否则你需要使用pin_memory=True。这是一个相关的线程：discuss.pytorch.org/t/…
感谢您提供的额外信息，我调整了一些参数，现在训练速度更快，CPU 冷却了 25% 左右。有趣的是，GPU 没有我想象的那么多使用，只有 4% 的外部显示器连接到 GPU 也是如此……我认为这对 RTX 来说并不困难。再次感谢您花时间向我解释整个情况，祝您一切顺利！