在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:
cnn = DataParallel(AlexNet())
之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:
1 # -*- coding: utf-8 -*- 2 # Implementation of https://arxiv.org/pdf/1512.03385.pdf/ 3 # See section 4.2 for model architecture on CIFAR-10. 4 # Some part of the code was referenced below. 5 # https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 6 7 import os 8 from PIL import Image 9 import time 10 11 import torch 12 import torch.nn as nn 13 import torchvision.datasets as dsets 14 import torchvision.transforms as transforms 15 from torch.autograd import Variable 16 import torch.utils.data as data 17 from torch.nn import DataParallel 18 19 20 kwargs = {'num_workers': 1, 'pin_memory': True} 21 # def my dataloader, return the data and corresponding label 22 23 24 def default_loader(path): 25 return Image.open(path).convert('RGB') 26 27 28 class myImageFloder(data.Dataset): # Class inheritance 29 def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader): 30 fh = open(label) 31 c = 0 32 imgs = [] 33 class_names = [] 34 for line in fh.readlines(): 35 if c == 0: 36 class_names = [n.strip() for n in line.rstrip().split(' ')] 37 else: 38 cls = line.split() # cls is a list 39 fn = cls.pop(0) 40 if os.path.isfile(os.path.join(root, fn)): 41 imgs.append((fn, tuple([float(v) for v in cls]))) # imgs is the list,and the content is the tuple 42 # we can use the append way to append the element for list 43 c = c + 1 44 self.root = root 45 self.imgs = imgs 46 self.classes = class_names 47 self.transform = transform 48 self.target_transform = target_transform 49 self.loader = loader 50 51 def __getitem__(self, index): 52 fn, label = self.imgs[index] # eventhough the imgs is just a list, it can return the elements of is 53 # in a proper way 54 img = self.loader(os.path.join(self.root, fn)) 55 if self.transform is not None: 56 img = self.transform(img) 57 return img, torch.Tensor(label) 58 59 def __len__(self): 60 return len(self.imgs) 61 62 def getName(self): 63 return self.classes 64 65 mytransform = transforms.Compose([transforms.ToTensor()]) # almost dont do any operation 66 train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training" 67 test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing" 68 train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt" 69 test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt" 70 71 train_loader = torch.utils.data.DataLoader( 72 myImageFloder(root=train_data_root, label=train_label, transform=mytransform), 73 batch_size=64, shuffle=True, **kwargs) 74 75 test_loader = torch.utils.data.DataLoader( 76 myImageFloder(root=test_data_root, label=test_label, transform=mytransform), 77 batch_size=64, shuffle=True, **kwargs) 78 79 80 # 3x3 Convolution 81 def conv3x3(in_channels, out_channels, stride=1): 82 return nn.Conv2d(in_channels, out_channels, kernel_size=3, 83 stride=stride, padding=1, bias=False) 84 85 86 # Residual Block 87 class ResidualBlock(nn.Module): 88 def __init__(self, in_channels, out_channels, stride=1, downsample=None): 89 super(ResidualBlock, self).__init__() 90 self.conv1 = conv3x3(in_channels, out_channels, stride) # kernel size is default 3 91 self.bn1 = nn.BatchNorm2d(out_channels) 92 self.relu = nn.ReLU(inplace=True) 93 self.conv2 = conv3x3(out_channels, out_channels) 94 self.bn2 = nn.BatchNorm2d(out_channels) 95 self.downsample = downsample 96 97 def forward(self, x): 98 residual = x 99 out = self.conv1(x) 100 out = self.bn1(out) 101 out = self.relu(out) 102 out = self.conv2(out) 103 out = self.bn2(out) 104 if self.downsample: 105 residual = self.downsample(x) 106 out += residual 107 out = self.relu(out) 108 return out 109 110 111 # ResNet Module 112 class ResNet(nn.Module): 113 def __init__(self, block, layers, num_classes=1): 114 super(ResNet, self).__init__() 115 self.in_channels = 16 116 self.conv = conv3x3(3, 16) 117 self.bn = nn.BatchNorm2d(16) 118 self.relu = nn.ReLU(inplace=True) 119 self.layer1 = self.make_layer(block, 16, layers[0]) 120 self.layer2 = self.make_layer(block, 32, layers[0], 2) 121 self.layer3 = self.make_layer(block, 64, layers[1], 2) # the input arg is blocks and the stride 122 self.layer4 = self.make_layer(block, 128, layers[1], 2) 123 self.layer5 = self.make_layer(block, 256, layers[1], 2) 124 self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8) # 2*2 125 self.fc = nn.Linear(256*2*2, num_classes) 126 127 def make_layer(self, block, out_channels, blocks, stride=1): 128 downsample = None 129 if (stride != 1) or (self.in_channels != out_channels): # the input channel is not consistant with the output's 130 downsample = nn.Sequential( # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32 131 conv3x3(self.in_channels, out_channels, stride=stride), 132 nn.BatchNorm2d(out_channels)) 133 layers = [] 134 layers.append(block(self.in_channels, out_channels, stride, downsample)) 135 self.in_channels = out_channels # update the input channel and the output channel 136 for i in range(1, blocks): # reduce a block because the first block is already appened 137 layers.append(block(out_channels, out_channels)) # 32*32 -> 8*8 138 return nn.Sequential(*layers) 139 140 def forward(self, x): 141 out = self.conv(x) 142 out = self.bn(out) 143 out = self.relu(out) 144 out = self.layer1(out) 145 out = self.layer2(out) 146 out = self.layer3(out) 147 out=self.layer4(out) 148 out=self.layer5(out) 149 out = self.avg_pool(out) 150 out = out.view(out.size(0), -1) 151 out = self.fc(out) 152 return out 153 154 155 resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3])) 156 resnet.cuda() 157 158 # Loss and Optimizer 159 criterion = nn.MSELoss() 160 lr = 0.001 161 optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) 162 163 # Training 164 start=time.clock() 165 for epoch in range(50): 166 for i, (images, labels) in enumerate(train_loader): 167 images = Variable(images.cuda()) 168 labels = Variable(labels.cuda()) 169 170 # Forward + Backward + Optimize 171 optimizer.zero_grad() 172 outputs = resnet(images) 173 loss = criterion(outputs, labels) 174 loss.backward() 175 optimizer.step() 176 177 if (i + 1) % 100 == 0: 178 print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0])) 179 180 # Decaying Learning Rate 181 if (epoch + 1) % 20 == 0: 182 lr /= 3 183 optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) 184 elapsed=time.clock()-start 185 print("time used:",elapsed) 186 # # Test 187 # correct = 0 188 # total = 0 189 # for images, labels in test_loader: 190 # images = Variable(images.cuda()) 191 # outputs = resnet(images) 192 # _, predicted = torch.max(outputs.data, 1) 193 # total += labels.size(0) 194 # correct += (predicted.cpu() == labels).sum() 195 # 196 # print('Accuracy of the model on the test images: %d %%' % (100 * correct / total)) 197 198 # Save the Model 199 torch.save(resnet.state_dict(), 'resnet.pkl')
作为对比实验,我们同时把ResNet的声明方式修改为
1 resnet = ResNet(ResidualBlock, [3, 3, 3])
其余不变,再运行程序的时候不指定GPU,直接python resnet.py,在声明DataParallel时,运行耗时结果如下:
('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU
在不声明DataParallel时,实验运行结果耗时如下:
('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU
可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。
官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子
以下是两组实验结果的输出:
DataParalle初始化
1 Epoch [1/80], Iter [100/500] Loss: 916.5578 2 3 Epoch [1/80], Iter [200/500] Loss: 172.2591 4 5 Epoch [1/80], Iter [300/500] Loss: 179.8360 6 7 Epoch [1/80], Iter [400/500] Loss: 259.6867 8 9 Epoch [1/80], Iter [500/500] Loss: 244.0616 10 11 Epoch [1/80], Iter [600/500] Loss: 74.7015 12 13 Epoch [1/80], Iter [700/500] Loss: 63.1657 14 15 Epoch [1/80], Iter [800/500] Loss: 90.3517 16 17 Epoch [1/80], Iter [900/500] Loss: 70.4562 18 19 Epoch [2/80], Iter [100/500] Loss: 52.3249 20 21 Epoch [2/80], Iter [200/500] Loss: 129.1855 22 23 Epoch [2/80], Iter [300/500] Loss: 110.0157 24 25 Epoch [2/80], Iter [400/500] Loss: 64.9313 26 27 Epoch [2/80], Iter [500/500] Loss: 87.8385 28 29 Epoch [2/80], Iter [600/500] Loss: 118.5828 30 31 Epoch [2/80], Iter [700/500] Loss: 123.9575 32 33 Epoch [2/80], Iter [800/500] Loss: 79.1908 34 35 Epoch [2/80], Iter [900/500] Loss: 61.8099 36 37 Epoch [3/80], Iter [100/500] Loss: 50.4294 38 39 Epoch [3/80], Iter [200/500] Loss: 106.8135 40 41 Epoch [3/80], Iter [300/500] Loss: 83.2198 42 43 Epoch [3/80], Iter [400/500] Loss: 60.7116 44 45 Epoch [3/80], Iter [500/500] Loss: 101.9553 46 47 Epoch [3/80], Iter [600/500] Loss: 64.6967 48 49 Epoch [3/80], Iter [700/500] Loss: 66.2446 50 51 Epoch [3/80], Iter [800/500] Loss: 81.1825 52 53 Epoch [3/80], Iter [900/500] Loss: 53.9905 54 55 Epoch [4/80], Iter [100/500] Loss: 76.2977 56 57 Epoch [4/80], Iter [200/500] Loss: 18.4255 58 59 Epoch [4/80], Iter [300/500] Loss: 57.6188 60 61 Epoch [4/80], Iter [400/500] Loss: 45.6235 62 63 Epoch [4/80], Iter [500/500] Loss: 82.9265 64 65 Epoch [4/80], Iter [600/500] Loss: 119.6085 66 67 Epoch [4/80], Iter [700/500] Loss: 53.1355 68 69 Epoch [4/80], Iter [800/500] Loss: 29.5248 70 71 Epoch [4/80], Iter [900/500] Loss: 57.0401 72 73 Epoch [5/80], Iter [100/500] Loss: 47.2671 74 75 Epoch [5/80], Iter [200/500] Loss: 31.6928 76 77 Epoch [5/80], Iter [300/500] Loss: 38.0040 78 79 Epoch [5/80], Iter [400/500] Loss: 24.5184 80 81 Epoch [5/80], Iter [500/500] Loss: 33.8515 82 83 Epoch [5/80], Iter [600/500] Loss: 43.6560 84 85 Epoch [5/80], Iter [700/500] Loss: 68.2500 86 87 Epoch [5/80], Iter [800/500] Loss: 30.8259 88 89 Epoch [5/80], Iter [900/500] Loss: 43.9696 90 91 Epoch [6/80], Iter [100/500] Loss: 22.4120 92 93 Epoch [6/80], Iter [200/500] Loss: 45.5722 94 95 Epoch [6/80], Iter [300/500] Loss: 26.8331 96 97 Epoch [6/80], Iter [400/500] Loss: 58.1139 98 99 Epoch [6/80], Iter [500/500] Loss: 12.8767 100 101 Epoch [6/80], Iter [600/500] Loss: 26.6725 102 103 Epoch [6/80], Iter [700/500] Loss: 31.9800 104 105 Epoch [6/80], Iter [800/500] Loss: 91.2332 106 107 Epoch [6/80], Iter [900/500] Loss: 44.1361 108 109 Epoch [7/80], Iter [100/500] Loss: 13.1401 110 111 Epoch [7/80], Iter [200/500] Loss: 20.9435 112 113 Epoch [7/80], Iter [300/500] Loss: 28.0944 114 115 Epoch [7/80], Iter [400/500] Loss: 24.0240 116 117 Epoch [7/80], Iter [500/500] Loss: 43.3279 118 119 Epoch [7/80], Iter [600/500] Loss: 23.3077 120 121 Epoch [7/80], Iter [700/500] Loss: 32.9658 122 123 Epoch [7/80], Iter [800/500] Loss: 27.2044 124 125 Epoch [7/80], Iter [900/500] Loss: 25.5850 126 127 Epoch [8/80], Iter [100/500] Loss: 39.7642 128 129 Epoch [8/80], Iter [200/500] Loss: 17.7421 130 131 Epoch [8/80], Iter [300/500] Loss: 29.8965 132 133 Epoch [8/80], Iter [400/500] Loss: 20.6153 134 135 Epoch [8/80], Iter [500/500] Loss: 43.0224 136 137 Epoch [8/80], Iter [600/500] Loss: 58.1552 138 139 Epoch [8/80], Iter [700/500] Loss: 19.1967 140 141 Epoch [8/80], Iter [800/500] Loss: 34.9122 142 143 Epoch [8/80], Iter [900/500] Loss: 15.0651 144 145 Epoch [9/80], Iter [100/500] Loss: 18.5950 146 147 Epoch [9/80], Iter [200/500] Loss: 36.1891 148 149 Epoch [9/80], Iter [300/500] Loss: 22.4936 150 151 Epoch [9/80], Iter [400/500] Loss: 14.8044 152 153 Epoch [9/80], Iter [500/500] Loss: 16.6958 154 155 Epoch [9/80], Iter [600/500] Loss: 24.8461 156 157 Epoch [9/80], Iter [700/500] Loss: 13.7112 158 159 Epoch [9/80], Iter [800/500] Loss: 21.2906 160 161 Epoch [9/80], Iter [900/500] Loss: 31.6950 162 163 Epoch [10/80], Iter [100/500] Loss: 20.7707 164 165 Epoch [10/80], Iter [200/500] Loss: 15.6260 166 167 Epoch [10/80], Iter [300/500] Loss: 28.5737 168 169 Epoch [10/80], Iter [400/500] Loss: 36.6791 170 171 Epoch [10/80], Iter [500/500] Loss: 38.9839 172 173 Epoch [10/80], Iter [600/500] Loss: 14.4459 174 175 Epoch [10/80], Iter [700/500] Loss: 10.0907 176 177 Epoch [10/80], Iter [800/500] Loss: 17.9035 178 179 Epoch [10/80], Iter [900/500] Loss: 24.5759 180 181 Epoch [11/80], Iter [100/500] Loss: 19.8531 182 183 Epoch [11/80], Iter [200/500] Loss: 15.7126 184 185 Epoch [11/80], Iter [300/500] Loss: 18.0198 186 187 Epoch [11/80], Iter [400/500] Loss: 19.3038 188 189 Epoch [11/80], Iter [500/500] Loss: 27.4435 190 191 Epoch [11/80], Iter [600/500] Loss: 18.1086 192 193 Epoch [11/80], Iter [700/500] Loss: 10.8124 194 195 Epoch [11/80], Iter [800/500] Loss: 31.2389 196 197 Epoch [11/80], Iter [900/500] Loss: 14.4881 198 199 Epoch [12/80], Iter [100/500] Loss: 10.6320 200 201 Epoch [12/80], Iter [200/500] Loss: 26.8394 202 203 Epoch [12/80], Iter [300/500] Loss: 16.0246 204 205 Epoch [12/80], Iter [400/500] Loss: 16.3263 206 207 Epoch [12/80], Iter [500/500] Loss: 24.5880 208 209 Epoch [12/80], Iter [600/500] Loss: 15.7498 210 211 Epoch [12/80], Iter [700/500] Loss: 11.4933 212 213 Epoch [12/80], Iter [800/500] Loss: 9.7252 214 215 Epoch [12/80], Iter [900/500] Loss: 31.6774 216 217 Epoch [13/80], Iter [100/500] Loss: 21.1929 218 219 Epoch [13/80], Iter [200/500] Loss: 17.0953 220 221 Epoch [13/80], Iter [300/500] Loss: 21.1883 222 223 Epoch [13/80], Iter [400/500] Loss: 15.9005 224 225 Epoch [13/80], Iter [500/500] Loss: 14.7924 226 227 Epoch [13/80], Iter [600/500] Loss: 12.4324 228 229 Epoch [13/80], Iter [700/500] Loss: 12.0840 230 231 Epoch [13/80], Iter [800/500] Loss: 30.9664 232 233 Epoch [13/80], Iter [900/500] Loss: 14.9601 234 235 Epoch [14/80], Iter [100/500] Loss: 6.5126 236 237 Epoch [14/80], Iter [200/500] Loss: 11.3227 238 239 Epoch [14/80], Iter [300/500] Loss: 12.9980 240 241 Epoch [14/80], Iter [400/500] Loss: 13.8523 242 243 Epoch [14/80], Iter [500/500] Loss: 10.6771 244 245 Epoch [14/80], Iter [600/500] Loss: 7.3953 246 247 Epoch [14/80], Iter [700/500] Loss: 14.6829 248 249 Epoch [14/80], Iter [800/500] Loss: 15.6956 250 251 Epoch [14/80], Iter [900/500] Loss: 21.8876 252 253 Epoch [15/80], Iter [100/500] Loss: 5.1943 254 255 Epoch [15/80], Iter [200/500] Loss: 13.0731 256 257 Epoch [15/80], Iter [300/500] Loss: 6.8931 258 259 Epoch [15/80], Iter [400/500] Loss: 15.3212 260 261 Epoch [15/80], Iter [500/500] Loss: 8.1775 262 263 Epoch [15/80], Iter [600/500] Loss: 11.5664 264 265 Epoch [15/80], Iter [700/500] Loss: 5.5951 266 267 Epoch [15/80], Iter [800/500] Loss: 10.9075 268 269 Epoch [15/80], Iter [900/500] Loss: 14.8503 270 271 Epoch [16/80], Iter [100/500] Loss: 19.5184 272 273 Epoch [16/80], Iter [200/500] Loss: 10.3570 274 275 Epoch [16/80], Iter [300/500] Loss: 10.0997 276 277 Epoch [16/80], Iter [400/500] Loss: 9.7350 278 279 Epoch [16/80], Iter [500/500] Loss: 11.3000 280 281 Epoch [16/80], Iter [600/500] Loss: 21.6213 282 283 Epoch [16/80], Iter [700/500] Loss: 9.7907 284 285 Epoch [16/80], Iter [800/500] Loss: 10.0128 286 287 Epoch [16/80], Iter [900/500] Loss: 10.7869 288 289 Epoch [17/80], Iter [100/500] Loss: 9.2015 290 291 Epoch [17/80], Iter [200/500] Loss: 7.3021 292 293 Epoch [17/80], Iter [300/500] Loss: 5.9662 294 295 Epoch [17/80], Iter [400/500] Loss: 17.5215 296 297 Epoch [17/80], Iter [500/500] Loss: 7.3349 298 299 Epoch [17/80], Iter [600/500] Loss: 8.5626 300 301 Epoch [17/80], Iter [700/500] Loss: 12.7575 302 303 Epoch [17/80], Iter [800/500] Loss: 10.7792 304 305 Epoch [17/80], Iter [900/500] Loss: 7.0889 306 307 Epoch [18/80], Iter [100/500] Loss: 10.5613 308 309 Epoch [18/80], Iter [200/500] Loss: 3.0777 310 311 Epoch [18/80], Iter [300/500] Loss: 6.3598 312 313 Epoch [18/80], Iter [400/500] Loss: 7.9515 314 315 Epoch [18/80], Iter [500/500] Loss: 10.8023 316 317 Epoch [18/80], Iter [600/500] Loss: 7.3443 318 319 Epoch [18/80], Iter [700/500] Loss: 8.0862 320 321 Epoch [18/80], Iter [800/500] Loss: 15.2795 322 323 Epoch [18/80], Iter [900/500] Loss: 10.2788 324 325 Epoch [19/80], Iter [100/500] Loss: 5.0786 326 327 Epoch [19/80], Iter [200/500] Loss: 8.8248 328 329 Epoch [19/80], Iter [300/500] Loss: 4.9262 330 331 Epoch [19/80], Iter [400/500] Loss: 7.8992 332 333 Epoch [19/80], Iter [500/500] Loss: 13.1279 334 335 Epoch [19/80], Iter [600/500] Loss: 8.2703 336 337 Epoch [19/80], Iter [700/500] Loss: 4.1547 338 339 Epoch [19/80], Iter [800/500] Loss: 9.0542 340 341 Epoch [19/80], Iter [900/500] Loss: 6.7904 342 343 Epoch [20/80], Iter [100/500] Loss: 8.6150 344 345 Epoch [20/80], Iter [200/500] Loss: 3.7212 346 347 Epoch [20/80], Iter [300/500] Loss: 6.2832 348 349 Epoch [20/80], Iter [400/500] Loss: 10.1591 350 351 Epoch [20/80], Iter [500/500] Loss: 9.7668 352 353 Epoch [20/80], Iter [600/500] Loss: 4.7498 354 355 Epoch [20/80], Iter [700/500] Loss: 4.8831 356 357 Epoch [20/80], Iter [800/500] Loss: 7.7877 358 359 Epoch [20/80], Iter [900/500] Loss: 8.5114 360 361 Epoch [21/80], Iter [100/500] Loss: 2.1853 362 363 Epoch [21/80], Iter [200/500] Loss: 5.8741 364 365 Epoch [21/80], Iter [300/500] Loss: 5.3676 366 367 Epoch [21/80], Iter [400/500] Loss: 3.1155 368 369 Epoch [21/80], Iter [500/500] Loss: 4.2433 370 371 Epoch [21/80], Iter [600/500] Loss: 1.9783 372 373 Epoch [21/80], Iter [700/500] Loss: 2.7622 374 375 Epoch [21/80], Iter [800/500] Loss: 2.0112 376 377 Epoch [21/80], Iter [900/500] Loss: 2.2692 378 379 Epoch [22/80], Iter [100/500] Loss: 2.1882 380 381 Epoch [22/80], Iter [200/500] Loss: 4.2540 382 383 Epoch [22/80], Iter [300/500] Loss: 4.0126 384 385 Epoch [22/80], Iter [400/500] Loss: 2.2220 386 387 Epoch [22/80], Iter [500/500] Loss: 2.4755 388 389 Epoch [22/80], Iter [600/500] Loss: 3.0793 390 391 Epoch [22/80], Iter [700/500] Loss: 1.9128 392 393 Epoch [22/80], Iter [800/500] Loss: 4.8721 394 395 Epoch [22/80], Iter [900/500] Loss: 2.1349 396 397 Epoch [23/80], Iter [100/500] Loss: 1.8705 398 399 Epoch [23/80], Iter [200/500] Loss: 2.4326 400 401 Epoch [23/80], Iter [300/500] Loss: 1.5636 402 403 Epoch [23/80], Iter [400/500] Loss: 2.0465 404 405 Epoch [23/80], Iter [500/500] Loss: 1.5183 406 407 Epoch [23/80], Iter [600/500] Loss: 2.2711 408 409 Epoch [23/80], Iter [700/500] Loss: 2.8997 410 411 Epoch [23/80], Iter [800/500] Loss: 2.6150 412 413 Epoch [23/80], Iter [900/500] Loss: 2.8083 414 415 Epoch [24/80], Iter [100/500] Loss: 2.7177 416 417 Epoch [24/80], Iter [200/500] Loss: 3.2044 418 419 Epoch [24/80], Iter [300/500] Loss: 3.8137 420 421 Epoch [24/80], Iter [400/500] Loss: 1.9400 422 423 Epoch [24/80], Iter [500/500] Loss: 2.3550 424 425 Epoch [24/80], Iter [600/500] Loss: 1.6304 426 427 Epoch [24/80], Iter [700/500] Loss: 1.1287 428 429 Epoch [24/80], Iter [800/500] Loss: 2.1436 430 431 Epoch [24/80], Iter [900/500] Loss: 1.3761 432 433 Epoch [25/80], Iter [100/500] Loss: 1.9115 434 435 Epoch [25/80], Iter [200/500] Loss: 0.9423 436 437 Epoch [25/80], Iter [300/500] Loss: 1.1732 438 439 Epoch [25/80], Iter [400/500] Loss: 1.8946 440 441 Epoch [25/80], Iter [500/500] Loss: 1.4359 442 443 Epoch [25/80], Iter [600/500] Loss: 2.7499 444 445 Epoch [25/80], Iter [700/500] Loss: 3.2734 446 447 Epoch [25/80], Iter [800/500] Loss: 1.5863 448 449 Epoch [25/80], Iter [900/500] Loss: 2.8276 450 451 Epoch [26/80], Iter [100/500] Loss: 3.3783 452 453 Epoch [26/80], Iter [200/500] Loss: 1.6336 454 455 Epoch [26/80], Iter [300/500] Loss: 1.8298 456 457 Epoch [26/80], Iter [400/500] Loss: 1.1775 458 459 Epoch [26/80], Iter [500/500] Loss: 2.5811 460 461 Epoch [26/80], Iter [600/500] Loss: 1.2587 462 463 Epoch [26/80], Iter [700/500] Loss: 2.3547 464 465 Epoch [26/80], Iter [800/500] Loss: 3.2238 466 467 Epoch [26/80], Iter [900/500] Loss: 1.8571 468 469 Epoch [27/80], Iter [100/500] Loss: 1.9582 470 471 Epoch [27/80], Iter [200/500] Loss: 0.8752 472 473 Epoch [27/80], Iter [300/500] Loss: 1.5140 474 475 Epoch [27/80], Iter [400/500] Loss: 1.4624 476 477 Epoch [27/80], Iter [500/500] Loss: 3.6735 478 479 Epoch [27/80], Iter [600/500] Loss: 2.5618 480 481 Epoch [27/80], Iter [700/500] Loss: 1.3707 482 483 Epoch [27/80], Iter [800/500] Loss: 1.2286 484 485 Epoch [27/80], Iter [900/500] Loss: 2.4623 486 487 Epoch [28/80], Iter [100/500] Loss: 0.8966 488 489 Epoch [28/80], Iter [200/500] Loss: 1.4363 490 491 Epoch [28/80], Iter [300/500] Loss: 1.3229 492 493 Epoch [28/80], Iter [400/500] Loss: 1.4402 494 495 Epoch [28/80], Iter [500/500] Loss: 1.4920 496 497 Epoch [28/80], Iter [600/500] Loss: 1.9604 498 499 Epoch [28/80], Iter [700/500] Loss: 3.1165 500 501 Epoch [28/80], Iter [800/500] Loss: 1.0391 502 503 Epoch [28/80], Iter [900/500] Loss: 2.5201 504 505 Epoch [29/80], Iter [100/500] Loss: 1.8787 506 507 Epoch [29/80], Iter [200/500] Loss: 0.9840 508 509 Epoch [29/80], Iter [300/500] Loss: 1.4460 510 511 Epoch [29/80], Iter [400/500] Loss: 2.2886 512 513 Epoch [29/80], Iter [500/500] Loss: 1.4231 514 515 Epoch [29/80], Iter [600/500] Loss: 1.4980 516 517 Epoch [29/80], Iter [700/500] Loss: 2.3995 518 519 Epoch [29/80], Iter [800/500] Loss: 1.7662 520 521 Epoch [29/80], Iter [900/500] Loss: 2.3659 522 523 Epoch [30/80], Iter [100/500] Loss: 1.9505 524 525 Epoch [30/80], Iter [200/500] Loss: 1.1663 526 527 Epoch [30/80], Iter [300/500] Loss: 0.9471 528 529 Epoch [30/80], Iter [400/500] Loss: 0.9364 530 531 Epoch [30/80], Iter [500/500] Loss: 1.0124 532 533 Epoch [30/80], Iter [600/500] Loss: 1.2437 534 535 Epoch [30/80], Iter [700/500] Loss: 0.8796 536 537 Epoch [30/80], Iter [800/500] Loss: 1.2183 538 539 Epoch [30/80], Iter [900/500] Loss: 2.3959 540 541 Epoch [31/80], Iter [100/500] Loss: 1.4337 542 543 Epoch [31/80], Iter [200/500] Loss: 1.1861 544 545 Epoch [31/80], Iter [300/500] Loss: 1.2915 546 547 Epoch [31/80], Iter [400/500] Loss: 1.0188 548 549 Epoch [31/80], Iter [500/500] Loss: 2.2067 550 551 Epoch [31/80], Iter [600/500] Loss: 2.6476 552 553 Epoch [31/80], Iter [700/500] Loss: 1.1402 554 555 Epoch [31/80], Iter [800/500] Loss: 1.4248 556 557 Epoch [31/80], Iter [900/500] Loss: 1.0669 558 559 Epoch [32/80], Iter [100/500] Loss: 1.5955 560 561 Epoch [32/80], Iter [200/500] Loss: 1.7216 562 563 Epoch [32/80], Iter [300/500] Loss: 1.2304 564 565 Epoch [32/80], Iter [400/500] Loss: 1.7058 566 567 Epoch [32/80], Iter [500/500] Loss: 1.2115 568 569 Epoch [32/80], Iter [600/500] Loss: 1.6176 570 571 Epoch [32/80], Iter [700/500] Loss: 1.3043 572 573 Epoch [32/80], Iter [800/500] Loss: 1.9501 574 575 Epoch [32/80], Iter [900/500] Loss: 1.9035 576 577 Epoch [33/80], Iter [100/500] Loss: 1.9505 578 579 Epoch [33/80], Iter [200/500] Loss: 1.5603 580 581 Epoch [33/80], Iter [300/500] Loss: 1.5528 582 583 Epoch [33/80], Iter [400/500] Loss: 1.4192 584 585 Epoch [33/80], Iter [500/500] Loss: 1.2211 586 587 Epoch [33/80], Iter [600/500] Loss: 1.3927 588 589 Epoch [33/80], Iter [700/500] Loss: 2.3885 590 591 Epoch [33/80], Iter [800/500] Loss: 1.0948 592 593 Epoch [33/80], Iter [900/500] Loss: 1.6951 594 595 Epoch [34/80], Iter [100/500] Loss: 0.9534 596 597 Epoch [34/80], Iter [200/500] Loss: 0.7364 598 599 Epoch [34/80], Iter [300/500] Loss: 1.2372 600 601 Epoch [34/80], Iter [400/500] Loss: 1.6718 602 603 Epoch [34/80], Iter [500/500] Loss: 0.7804 604 605 Epoch [34/80], Iter [600/500] Loss: 2.1848 606 607 Epoch [34/80], Iter [700/500] Loss: 0.6333 608 609 Epoch [34/80], Iter [800/500] Loss: 1.6399 610 611 Epoch [34/80], Iter [900/500] Loss: 0.9555 612 613 Epoch [35/80], Iter [100/500] Loss: 1.5851 614 615 Epoch [35/80], Iter [200/500] Loss: 3.7824 616 617 Epoch [35/80], Iter [300/500] Loss: 2.5642 618 619 Epoch [35/80], Iter [400/500] Loss: 0.8965 620 621 Epoch [35/80], Iter [500/500] Loss: 1.9092 622 623 Epoch [35/80], Iter [600/500] Loss: 1.3729 624 625 Epoch [35/80], Iter [700/500] Loss: 2.2079 626 627 Epoch [35/80], Iter [800/500] Loss: 0.9051 628 629 Epoch [35/80], Iter [900/500] Loss: 1.1845 630 631 Epoch [36/80], Iter [100/500] Loss: 0.8240 632 633 Epoch [36/80], Iter [200/500] Loss: 1.1929 634 635 Epoch [36/80], Iter [300/500] Loss: 1.7051 636 637 Epoch [36/80], Iter [400/500] Loss: 0.7341 638 639 Epoch [36/80], Iter [500/500] Loss: 0.8078 640 641 Epoch [36/80], Iter [600/500] Loss: 0.7525 642 643 Epoch [36/80], Iter [700/500] Loss: 1.5739 644 645 Epoch [36/80], Iter [800/500] Loss: 1.3938 646 647 Epoch [36/80], Iter [900/500] Loss: 0.7145 648 649 Epoch [37/80], Iter [100/500] Loss: 0.9577 650 651 Epoch [37/80], Iter [200/500] Loss: 0.9464 652 653 Epoch [37/80], Iter [300/500] Loss: 1.0931 654 655 Epoch [37/80], Iter [400/500] Loss: 1.0390 656 657 Epoch [37/80], Iter [500/500] Loss: 1.3472 658 659 Epoch [37/80], Iter [600/500] Loss: 0.6312 660 661 Epoch [37/80], Iter [700/500] Loss: 0.6754 662 663 Epoch [37/80], Iter [800/500] Loss: 0.5888 664 665 Epoch [37/80], Iter [900/500] Loss: 3.1377 666 667 Epoch [38/80], Iter [100/500] Loss: 0.8339 668 669 Epoch [38/80], Iter [200/500] Loss: 0.9345 670 671 Epoch [38/80], Iter [300/500] Loss: 0.6615 672 673 Epoch [38/80], Iter [400/500] Loss: 1.6327 674 675 Epoch [38/80], Iter [500/500] Loss: 0.4701 676 677 Epoch [38/80], Iter [600/500] Loss: 1.1513 678 679 Epoch [38/80], Iter [700/500] Loss: 0.9013 680 681 Epoch [38/80], Iter [800/500] Loss: 2.7680 682 683 Epoch [38/80], Iter [900/500] Loss: 1.2733 684 685 Epoch [39/80], Iter [100/500] Loss: 3.0368 686 687 Epoch [39/80], Iter [200/500] Loss: 1.5569 688 689 Epoch [39/80], Iter [300/500] Loss: 0.5049 690 691 Epoch [39/80], Iter [400/500] Loss: 0.4075 692 693 Epoch [39/80], Iter [500/500] Loss: 0.9771 694 695 Epoch [39/80], Iter [600/500] Loss: 0.9003 696 697 Epoch [39/80], Iter [700/500] Loss: 1.6323 698 699 Epoch [39/80], Iter [800/500] Loss: 0.4881 700 701 Epoch [39/80], Iter [900/500] Loss: 2.1344 702 703 Epoch [40/80], Iter [100/500] Loss: 1.2439 704 705 Epoch [40/80], Iter [200/500] Loss: 1.3419 706 707 Epoch [40/80], Iter [300/500] Loss: 0.9575 708 709 Epoch [40/80], Iter [400/500] Loss: 1.4438 710 711 Epoch [40/80], Iter [500/500] Loss: 0.8559 712 713 Epoch [40/80], Iter [600/500] Loss: 1.0400 714 715 Epoch [40/80], Iter [700/500] Loss: 0.9063 716 717 Epoch [40/80], Iter [800/500] Loss: 1.0714 718 719 Epoch [40/80], Iter [900/500] Loss: 0.5098 720 721 Epoch [41/80], Iter [100/500] Loss: 0.5906 722 723 Epoch [41/80], Iter [200/500] Loss: 0.6610 724 725 Epoch [41/80], Iter [300/500] Loss: 0.4230 726 727 Epoch [41/80], Iter [400/500] Loss: 0.6014 728 729 Epoch [41/80], Iter [500/500] Loss: 0.3004 730 731 Epoch [41/80], Iter [600/500] Loss: 0.5606 732 733 Epoch [41/80], Iter [700/500] Loss: 0.4994 734 735 Epoch [41/80], Iter [800/500] Loss: 0.8664 736 737 Epoch [41/80], Iter [900/500] Loss: 0.5302 738 739 Epoch [42/80], Iter [100/500] Loss: 0.2961 740 741 Epoch [42/80], Iter [200/500] Loss: 0.2826 742 743 Epoch [42/80], Iter [300/500] Loss: 0.3575 744 745 Epoch [42/80], Iter [400/500] Loss: 0.3224 746 747 Epoch [42/80], Iter [500/500] Loss: 0.6851 748 749 Epoch [42/80], Iter [600/500] Loss: 0.2997 750 751 Epoch [42/80], Iter [700/500] Loss: 0.3907 752 753 Epoch [42/80], Iter [800/500] Loss: 0.4437 754 755 Epoch [42/80], Iter [900/500] Loss: 0.4847 756 757 Epoch [43/80], Iter [100/500] Loss: 0.5418 758 759 Epoch [43/80], Iter [200/500] Loss: 0.4099 760 761 Epoch [43/80], Iter [300/500] Loss: 0.3339 762 763 Epoch [43/80], Iter [400/500] Loss: 0.5546 764 765 Epoch [43/80], Iter [500/500] Loss: 0.5867 766 767 Epoch [43/80], Iter [600/500] Loss: 0.3540 768 769 Epoch [43/80], Iter [700/500] Loss: 0.4656 770 771 Epoch [43/80], Iter [800/500] Loss: 0.2922 772 773 Epoch [43/80], Iter [900/500] Loss: 0.3042 774 775 Epoch [44/80], Iter [100/500] Loss: 0.6309 776 777 Epoch [44/80], Iter [200/500] Loss: 0.2412 778 779 Epoch [44/80], Iter [300/500] Loss: 0.5505 780 781 Epoch [44/80], Iter [400/500] Loss: 0.4133 782 783 Epoch [44/80], Iter [500/500] Loss: 0.4317 784 785 Epoch [44/80], Iter [600/500] Loss: 0.4152 786 787 Epoch [44/80], Iter [700/500] Loss: 0.6375 788 789 Epoch [44/80], Iter [800/500] Loss: 0.3283 790 791 Epoch [44/80], Iter [900/500] Loss: 0.4399 792 793 Epoch [45/80], Iter [100/500] Loss: 0.2777 794 795 Epoch [45/80], Iter [200/500] Loss: 0.3131 796 797 Epoch [45/80], Iter [300/500] Loss: 0.2451 798 799 Epoch [45/80], Iter [400/500] Loss: 0.5350 800 801 Epoch [45/80], Iter [500/500] Loss: 0.2501 802 803 Epoch [45/80], Iter [600/500] Loss: 0.2076 804 805 Epoch [45/80], Iter [700/500] Loss: 0.2317 806 807 Epoch [45/80], Iter [800/500] Loss: 0.8772 808 809 Epoch [45/80], Iter [900/500] Loss: 0.4162 810 811 Epoch [46/80], Iter [100/500] Loss: 0.3190 812 813 Epoch [46/80], Iter [200/500] Loss: 0.2458 814 815 Epoch [46/80], Iter [300/500] Loss: 0.2976 816 817 Epoch [46/80], Iter [400/500] Loss: 0.3712 818 819 Epoch [46/80], Iter [500/500] Loss: 0.4305 820 821 Epoch [46/80], Iter [600/500] Loss: 0.5143 822 823 Epoch [46/80], Iter [700/500] Loss: 0.2622 824 825 Epoch [46/80], Iter [800/500] Loss: 0.5331 826 827 Epoch [46/80], Iter [900/500] Loss: 0.3598 828 829 Epoch [47/80], Iter [100/500] Loss: 0.2180 830 831 Epoch [47/80], Iter [200/500] Loss: 0.2275 832 833 Epoch [47/80], Iter [300/500] Loss: 0.5302 834 835 Epoch [47/80], Iter [400/500] Loss: 0.3535 836 837 Epoch [47/80], Iter [500/500] Loss: 0.5790 838 839 Epoch [47/80], Iter [600/500] Loss: 0.3741 840 841 Epoch [47/80], Iter [700/500] Loss: 0.5120 842 843 Epoch [47/80], Iter [800/500] Loss: 0.6204 844 845 Epoch [47/80], Iter [900/500] Loss: 0.4902 846 847 Epoch [48/80], Iter [100/500] Loss: 0.2668 848 849 Epoch [48/80], Iter [200/500] Loss: 0.5693 850 851 Epoch [48/80], Iter [300/500] Loss: 0.3328 852 853 Epoch [48/80], Iter [400/500] Loss: 0.2399 854 855 Epoch [48/80], Iter [500/500] Loss: 0.3160 856 857 Epoch [48/80], Iter [600/500] Loss: 0.2944 858 859 Epoch [48/80], Iter [700/500] Loss: 0.2742 860 861 Epoch [48/80], Iter [800/500] Loss: 0.5297 862 863 Epoch [48/80], Iter [900/500] Loss: 0.3755 864 865 Epoch [49/80], Iter [100/500] Loss: 0.2658 866 867 Epoch [49/80], Iter [200/500] Loss: 0.2223 868 869 Epoch [49/80], Iter [300/500] Loss: 0.4348 870 871 Epoch [49/80], Iter [400/500] Loss: 0.2313 872 873 Epoch [49/80], Iter [500/500] Loss: 0.2838 874 875 Epoch [49/80], Iter [600/500] Loss: 0.3415 876 877 Epoch [49/80], Iter [700/500] Loss: 0.3633 878 879 Epoch [49/80], Iter [800/500] Loss: 0.3768 880 881 Epoch [49/80], Iter [900/500] Loss: 0.5177 882 883 Epoch [50/80], Iter [100/500] Loss: 0.3538 884 885 Epoch [50/80], Iter [200/500] Loss: 0.2759 886 887 Epoch [50/80], Iter [300/500] Loss: 0.2255 888 889 Epoch [50/80], Iter [400/500] Loss: 0.3148 890 891 Epoch [50/80], Iter [500/500] Loss: 0.4502 892 893 Epoch [50/80], Iter [600/500] Loss: 0.3382 894 895 Epoch [50/80], Iter [700/500] Loss: 0.8207 896 897 Epoch [50/80], Iter [800/500] Loss: 0.3541 898 899 Epoch [50/80], Iter [900/500] Loss: 0.4090 900 901 ('time used:', 17124.861335999998)