【问题标题】:How to get time taken for each layer in Pytorch?如何在 Pytorch 中为每一层花费时间?
【发布时间】:2022-08-08 00:26:17
【问题描述】:

我想知道 Alexnet 中某一层的推理时间。此代码测量 Alexnet 的第一个全连接层的推理时间,因为批量大小发生变化。我对此有几个问题。

  1. 是否可以使用以下代码准确测量推理时间?
  2. CPU 和 GPU 分开运行是否存在时间差?
  3. Pytorch 中是否有用于测量层推理时间的模块?

    给定以下代码:

    import torch
    import torch.optim as optim
    import torch.nn as nn
    import torch.nn.functional as F
    from torchvision import transforms
    import time
    from tqdm import tqdm
    
    
    class AlexNet(nn.Module):
        def __init__(self):
            super(AlexNet, self).__init__()
    
            self.relu = nn.ReLU(inplace=True)
            self.maxpool2D = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
            self.adaptive_avg_polling = nn.AdaptiveAvgPool2d((6, 6))
            self.dropout = nn.Dropout(p=0.5)
    
            self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
            self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
            self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
            self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
            self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
            self.fc1 = nn.Linear(256 * 6 * 6, 4096)
            self.fc2 = nn.Linear(4096, 4096)
            self.fc3 = nn.Linear(4096, 1000)
    
        def time(self, x):
            x = self.maxpool2D(self.relu(self.conv1(x)))
            x = self.maxpool2D(self.relu(self.conv2(x)))
            x =                self.relu(self.conv3(x))
            x =                self.relu(self.conv4(x))
            x = self.maxpool2D(self.relu(self.conv5(x)))
            x = self.adaptive_avg_polling(x)
    
    
            x = x.view(x.size(0), -1)
            x = self.dropout(x)
    
            start1 = time.time()
            x = self.fc1(x)
            finish1 = time.time()
    
            x = self.dropout(self.relu(x))
            x = self.fc2(x)
            x = self.relu(x)
            x = self.fc3(x)
    
            return finish1 - start1
    
    
    
    def layer_time():
         use_cuda = torch.cuda.is_available()
         print(\"use_cuda : \", use_cuda)
    
         FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
         device= torch.device(\"cuda:0\" if use_cuda else \"cpu\")
    
         net = AlexNet().to(device)
    
         test_iter = 10000
         batch_size = 1
         for i in range(10):
             X = torch.randn(size=(batch_size, 3, 227, 227)).type(FloatTensor)
             s = 0.0
             for i in tqdm(range(test_iter)):
                 s += net.time(X)
             print(s)
             batch_size *= 2
    
    
     layer_time()
    
    
  • This solution 应该有助于确保在您测量时完成一个层的 GPU 计算 finish-start

标签: python pytorch


【解决方案1】:

通过查看AMP document,我找到了一种计时推理的方法。使用此方法,GPU 和 CPU 可以同步,并且可以准确测量推理时间。

import torch, time, gc

# Timing utilities
start_time = None

def start_timer():
    global start_time
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_max_memory_allocated()
    torch.cuda.synchronize()
    start_time = time.time()

def end_timer():
    torch.cuda.synchronize()
    end_time = time.time()
    return end_time - start_time

所以我的代码更改如下:

import torch, time, gc
from tqdm import tqdm
import torch.nn as nn
import torch

# Timing utilities
start_time = None

def start_timer():
    global start_time
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_max_memory_allocated()
    torch.cuda.synchronize()
    start_time = time.time()

def end_timer():
    torch.cuda.synchronize()
    end_time = time.time()
    return end_time - start_time


class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()

        self.relu = nn.ReLU(inplace=True)
        self.maxpool2D = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        self.adaptive_avg_polling = nn.AdaptiveAvgPool2d((6, 6))
        self.dropout = nn.Dropout(p=0.5)

        self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(256 * 6 * 6, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 1000)

    def time(self, x):
        x = self.maxpool2D(self.relu(self.conv1(x)))
        x = self.maxpool2D(self.relu(self.conv2(x)))
        x =                self.relu(self.conv3(x))
        x =                self.relu(self.conv4(x))
        x = self.maxpool2D(self.relu(self.conv5(x)))
        x = self.adaptive_avg_polling(x)


        x = x.view(x.size(0), -1)
        x = self.dropout(x)

        # Check first linear layer inference time
        start_timer()
        x = self.fc1(x)
        result = end_timer()

        x = self.dropout(self.relu(x))
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)

        return result


def layer_time():
     use_cuda = torch.cuda.is_available()
     print("use_cuda : ", use_cuda)

     FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
     device= torch.device("cuda:0" if use_cuda else "cpu")

     net = AlexNet().to(device)

     test_iter = 1000
     batch_size = 1
     for i in range(10):
         X = torch.randn(size=(batch_size, 3, 227, 227)).type(FloatTensor)
         s = 0.0
         for i in tqdm(range(test_iter)):
             s += net.time(X)
         print(s)
         batch_size *= 2

layer_time()

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 2011-11-26
    • 1970-01-01
    • 2013-07-13
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2021-12-13
    • 1970-01-01
    相关资源
    最近更新 更多