pytorch CRNN 单机多GPU

【配置】:

      python 3.6;pytorch 1.2.0; gpu 2块1080Ti

【教程】:

    1. 设置需要使用的GPU编号:

import os
import torch

## 配置GPU编号
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' 
ids = [0,1]

    2.用DataParallel包装net

import torch

net.cuda()
net = torch.nn.DataParallel(net, device_ids = ids)

    3. 修改CRNN

  1. RNN部分   

    class BLSTM2(torch.nn.Module):
        def __init__(self, lstminput_size,class_num, hidden_unit = 256):
            super(BLSTM2, self).__init__()
            self.Bidirectional_LSTM1 = torch.nn.LSTM(lstminput_size, lstminput_size//2, 
                                                     bidirectional=True,
                                                     batch_first=True)
            output1_size = lstminput_size
            self.embedding1 = torch.nn.Linear((lstminput_size//2) * 2, output1_size)
            
            self.Bidirectional_LSTM2 = torch.nn.LSTM(output1_size, hidden_unit, 
                                                     bidirectional=True,
                                                     batch_first=True)
            self.embedding2 = torch.nn.Linear(hidden_unit * 2, class_num)
    
        def forward(self, x):
            self.Bidirectional_LSTM1.flatten_parameters()
            x = self.Bidirectional_LSTM1(x)   
            # x torch.Size([20, 100, 512])
            # LSTM output: output, (h_n, c_n) 
            # output:torch.Size([20, 100, 2*hidden_size = 512])
             
            T, b, h = x[0].size()   # x[0]: (seq_len, batch, num_directions * hidden_size)
            x = self.embedding1(x[0].contiguous().view(T * b, h))   
            # pytorch view() reshape as [T * b, h]
            # [T * b, h] * [ 2 * (lstminput_size//2), output1_size] = [T * b, 512]
            
            x = x.view(T, b, -1)  # [20, 100, 512]
            
            
            self.Bidirectional_LSTM2.flatten_parameters()
            x = self.Bidirectional_LSTM2(x) # x[0]:torch.Size([20, 100, 2*hidden_size = 512])
            T, b, h = x[0].size() 
            x = self.embedding2(x[0].contiguous().view(T * b, h)) 
            # [T * b, h] * [ 2 * hidden_unit, class_num] = [T * b, class_num]
            
            x = x.view(T, b, -1)
            return x  # [20,100,class_num]   

    解决  :  UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greately increasing memory usage. To compact weights again call flatten_parameters()
     

     

  2. CRNN
    class CRNN(torch.nn.Module):
        def __init__(self, imgh,imgw, lstminput_size, class_num, hidden_unit=256):
            super(CRNN, self).__init__()
            self.cnn = torch.nn.Sequential()
            self.cnn.add_module('vgg_16', Vgg_16(imgh,imgw))
            self.rnn = torch.nn.Sequential()
            self.rnn.add_module('rnn', BLSTM2(lstminput_size, class_num, hidden_unit))
            
        def forward(self, x):
            #self.rnn.flatten_parameters()
            x = self.cnn(x)  ##  b, c, h, w torch.Size([100, 512, 1, 20])
            b, c, h, w = x.size()
            #print(x.size())  #: b,c,h,w,(100, 512, 1,20)
            assert h == 1   # "the height of conv must be 1"
            x = x.squeeze(2)  # remove h dimension, b *512 * width torch.Size([100, 512, 20])
            x = x.permute(2, 0, 1)  # [w, b, c] = [seq_len, batch, input_size] torch.Size([20, 100, 512])
            #x = torch.nn.parallel.data_parallel(self.cnn, x, self.ids)
            x = self.rnn(x)
            x = x.permute(1, 0, 2)
            # print(x.size())  # (20, 100, class_num)
            return x

    【注意】: pred = net(img).permute(1, 0, 2)

    4. 如果有pretrained,加载模型的过程与单块GPU有所不同

state_dict = torch.load(crnn_model_path)
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    if 'module' not in k:
        k = 'module.'+k
    else:
        k = k.replace('features.module.', 'module.features.')
    new_state_dict[k]=v

 


版权声明:本文为weixin_41632154原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。