智能数字图像处理:图卷积SGN代码(pytorch)之model.py解读

解读SGN类

1.self.dim1 = 256
        self.dataset = dataset
        self.seg = seg
        num_joint = 25-》关节数为25

2.if args.train:
            self.spa = self.one_hot(bs, num_joint, self.seg)-》独热编码
            self.spa = self.spa.permute(0, 3, 2, 1).cuda()-》转置
            self.tem = self.one_hot(bs, self.seg, num_joint)-》独热编码
            self.tem = self.tem.permute(0, 3, 1, 2).cuda()-》转置

3.self.tem_embed = embed(self.seg, 64*4, norm=False, bias=bias)-》传入embed类创建embed模块
        self.spa_embed = embed(num_joint, 64, norm=False, bias=bias)
        self.joint_embed = embed(3, 64, norm=True, bias=bias)
        self.dif_embed = embed(3, 64, norm=True, bias=bias)

embed组成:正则化-》1x1卷积-》Relu激活-》1x1卷积-》Relu激活

4.self.maxpool = nn.AdaptiveMaxPool2d((1, 1))-》最大池化

5.self.cnn = local(self.dim1, self.dim1 * 2, bias=bias)-》创建一个local模块。

local模块组成:最大池化-》卷积-》正则化-》Relu激活-》卷积-》正则化-》Dropout随机损失

6.self.compute_g1 = compute_g_spa(self.dim1 // 2, self.dim1, bias=bias)-》创建compute_g_spa模块

compute_g_spa:卷积-》卷积-》softmax映射为0-1之间的实数,并且归一化保证和为1,因此多分类的概率之和也刚好为1。

7.self.gcn1 = gcn_spa(self.dim1 // 2, self.dim1 // 2, bias=bias)
        self.gcn2 = gcn_spa(self.dim1 // 2, self.dim1, bias=bias)
        self.gcn3 = gcn_spa(self.dim1, self.dim1, bias=bias)-》创建3个gcn图卷积模块

8.self.fc = nn.Linear(self.dim1 * 2, num_classes)-》全连接分类

9. for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels-》第一个卷积核x第二个卷积核x输出通道
                m.weight.data.normal_(0, math.sqrt(2. / n))-》

10.
        nn.init.constant_(self.gcn1.w.cnn.weight, 0)
        nn.init.constant_(self.gcn2.w.cnn.weight, 0)
        nn.init.constant_(self.gcn3.w.cnn.weight, 0)-》初始化各层权重

解读one_hot方法:

1. y = torch.arange(spa).unsqueeze(-1)-》对数据的维度进行压缩,去掉维数为1的的维度,默认是将a中所有为1的维度删掉。也可以通过dim指定位置,删掉指定位置的维数为1的维度。
 2.y_onehot = torch.FloatTensor(spa, spa) -》32位浮点型

3.y_onehot.zero_()
4.y_onehot.scatter_(1, y, 1)-》

scatter(dim, index, src) 的参数有 3 个

  • dim:沿着哪个维度进行索引
  • index:用来 scatter 的元素索引
  • src:用来 scatter 的源元素,可以是一个标量或一个张量

这个 scatter  可以理解成放置元素或者修改元素

5.y_onehot = y_onehot.unsqueeze(0).unsqueeze(0)-》对数据的维度进行压缩
6. y_onehot = y_onehot.repeat(bs, tem, 1, 1)-》计算矩阵

7.return y_onehot-》返回处理后矩阵 

解读embed类:

1.if norm:
            self.cnn = nn.Sequential(
                norm_data(dim),-》创建一个norm_data模块(resize-》正则化-》resize)
                cnn1x1(dim, 64, bias=bias),-》1x1卷积核,输入通道为dim,输出通道(卷积核数量)64
                nn.ReLU(),-》Relu激活
                cnn1x1(64, dim1, bias=bias),-》1x1卷积核,输入通道为64,输出通道(卷积核数量)dim1
                nn.ReLU(),-》Relu激活
            )
        else:
            self.cnn = nn.Sequential(
                cnn1x1(dim, 64, bias=bias),-》1x1卷积核,输入通道为dim,输出通道(卷积核数量)64
                nn.ReLU(),-》Relu激活
                cnn1x1(64, dim1, bias=bias),-》1x1卷积核,输入通道为64,输出通道(卷积核数量)dim1
                nn.ReLU(),-》Relu激活
            )

解读local方法:

1.self.maxpool = nn.AdaptiveMaxPool2d((1, 20))-》最大池化
        self.cnn1 = nn.Conv2d(dim1, dim1, kernel_size=(1, 3), padding=(0, 1), bias=bias)-》卷积,输入输出均为dim1,卷积核大小为1x3,左右加上一圈0
        self.bn1 = nn.BatchNorm2d(dim1)-》正则化
        self.relu = nn.ReLU()-》Relu激活
        self.cnn2 = nn.Conv2d(dim1, dim2, kernel_size=1, bias=bias)-》卷积,输入均为dim1,输出为dim2,卷积核大小为1x1.
        self.bn2 = nn.BatchNorm2d(dim2)-》正则化
        self.dropout = nn.Dropout2d(0.2) ->Dropout随机失活

解读compute_g_spa方法:

1.x = x1.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
        x = g.matmul(x)-》返回两个数组的矩阵乘积
        x = x.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
        x = self.w(x) + self.w1(x1)-》计算矩阵的和
        x = self.relu(self.bn(x))-》Relu激活

解读gcn_spa方法:

1.x = x1.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
   x = g.matmul(x)-》返回两个数组的矩阵乘积
   x = x.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
  x = self.w(x) + self.w1(x1)-》计算矩阵的和
 x = self.relu(self.bn(x)) -》Relu激活

解读norm_data方法

1. bs, c, num_joints, step = x.size()-》获得矩阵x的规格大小
        x = x.view(bs, -1, step)-》相当于numpy中resize()的功能
        x = self.bn(x)-》批归一化

        x = x.view(bs, -1, num_joints, step).contiguous()-》相当于numpy中resize()的功能

 

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
from torch import nn
import torch
import math

class SGN(nn.Module):
    def __init__(self, num_classes, dataset, seg, args, bias = True):
        super(SGN, self).__init__()

        self.dim1 = 256
        self.dataset = dataset
        self.seg = seg
        num_joint = 25
        bs = args.batch_size
        if args.train:
            self.spa = self.one_hot(bs, num_joint, self.seg)
            self.spa = self.spa.permute(0, 3, 2, 1).cuda()
            self.tem = self.one_hot(bs, self.seg, num_joint)
            self.tem = self.tem.permute(0, 3, 1, 2).cuda()
        else:
            self.spa = self.one_hot(32 * 5, num_joint, self.seg)
            self.spa = self.spa.permute(0, 3, 2, 1).cuda()
            self.tem = self.one_hot(32 * 5, self.seg, num_joint)
            self.tem = self.tem.permute(0, 3, 1, 2).cuda()

        self.tem_embed = embed(self.seg, 64*4, norm=False, bias=bias)
        self.spa_embed = embed(num_joint, 64, norm=False, bias=bias)
        self.joint_embed = embed(3, 64, norm=True, bias=bias)
        self.dif_embed = embed(3, 64, norm=True, bias=bias)
        self.maxpool = nn.AdaptiveMaxPool2d((1, 1))
        self.cnn = local(self.dim1, self.dim1 * 2, bias=bias)
        self.compute_g1 = compute_g_spa(self.dim1 // 2, self.dim1, bias=bias)
        self.gcn1 = gcn_spa(self.dim1 // 2, self.dim1 // 2, bias=bias)
        self.gcn2 = gcn_spa(self.dim1 // 2, self.dim1, bias=bias)
        self.gcn3 = gcn_spa(self.dim1, self.dim1, bias=bias)
        self.fc = nn.Linear(self.dim1 * 2, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))

        nn.init.constant_(self.gcn1.w.cnn.weight, 0)
        nn.init.constant_(self.gcn2.w.cnn.weight, 0)
        nn.init.constant_(self.gcn3.w.cnn.weight, 0)


    def forward(self, input):
        
        # Dynamic Representation
        bs, step, dim = input.size()
        num_joints = dim //3
        input = input.view((bs, step, num_joints, 3))
        input = input.permute(0, 3, 2, 1).contiguous()
        dif = input[:, :, :, 1:] - input[:, :, :, 0:-1]
        dif = torch.cat([dif.new(bs, dif.size(1), num_joints, 1).zero_(), dif], dim=-1)
        pos = self.joint_embed(input)
        tem1 = self.tem_embed(self.tem)
        spa1 = self.spa_embed(self.spa)
        dif = self.dif_embed(dif)
        dy = pos + dif
        # Joint-level Module
        input= torch.cat([dy, spa1], 1)
        g = self.compute_g1(input)
        input = self.gcn1(input, g)
        input = self.gcn2(input, g)
        input = self.gcn3(input, g)
        # Frame-level Module
        input = input + tem1
        input = self.cnn(input)
        # Classification
        output = self.maxpool(input)
        output = torch.flatten(output, 1)
        output = self.fc(output)

        return output

    def one_hot(self, bs, spa, tem):

        y = torch.arange(spa).unsqueeze(-1)
        y_onehot = torch.FloatTensor(spa, spa)

        y_onehot.zero_()
        y_onehot.scatter_(1, y, 1)

        y_onehot = y_onehot.unsqueeze(0).unsqueeze(0)
        y_onehot = y_onehot.repeat(bs, tem, 1, 1)

        return y_onehot

class norm_data(nn.Module):
    def __init__(self, dim= 64):
        super(norm_data, self).__init__()

        self.bn = nn.BatchNorm1d(dim* 25)

    def forward(self, x):
        bs, c, num_joints, step = x.size()
        x = x.view(bs, -1, step)
        x = self.bn(x)
        x = x.view(bs, -1, num_joints, step).contiguous()
        return x

class embed(nn.Module):
    def __init__(self, dim = 3, dim1 = 128, norm = True, bias = False):
        super(embed, self).__init__()

        if norm:
            self.cnn = nn.Sequential(
                norm_data(dim),
                cnn1x1(dim, 64, bias=bias),
                nn.ReLU(),
                cnn1x1(64, dim1, bias=bias),
                nn.ReLU(),
            )
        else:
            self.cnn = nn.Sequential(
                cnn1x1(dim, 64, bias=bias),
                nn.ReLU(),
                cnn1x1(64, dim1, bias=bias),
                nn.ReLU(),
            )

    def forward(self, x):
        x = self.cnn(x)
        return x

class cnn1x1(nn.Module):
    def __init__(self, dim1 = 3, dim2 =3, bias = True):
        super(cnn1x1, self).__init__()
        self.cnn = nn.Conv2d(dim1, dim2, kernel_size=1, bias=bias)

    def forward(self, x):
        x = self.cnn(x)
        return x

class local(nn.Module):
    def __init__(self, dim1 = 3, dim2 = 3, bias = False):
        super(local, self).__init__()
        self.maxpool = nn.AdaptiveMaxPool2d((1, 20))
        self.cnn1 = nn.Conv2d(dim1, dim1, kernel_size=(1, 3), padding=(0, 1), bias=bias)
        self.bn1 = nn.BatchNorm2d(dim1)
        self.relu = nn.ReLU()
        self.cnn2 = nn.Conv2d(dim1, dim2, kernel_size=1, bias=bias)
        self.bn2 = nn.BatchNorm2d(dim2)
        self.dropout = nn.Dropout2d(0.2)

    def forward(self, x1):
        x1 = self.maxpool(x1)
        x = self.cnn1(x1)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.cnn2(x)
        x = self.bn2(x)
        x = self.relu(x)

        return x

class gcn_spa(nn.Module):
    def __init__(self, in_feature, out_feature, bias = False):
        super(gcn_spa, self).__init__()
        self.bn = nn.BatchNorm2d(out_feature)
        self.relu = nn.ReLU()
        self.w = cnn1x1(in_feature, out_feature, bias=False)
        self.w1 = cnn1x1(in_feature, out_feature, bias=bias)


    def forward(self, x1, g):
        x = x1.permute(0, 3, 2, 1).contiguous()
        x = g.matmul(x)
        x = x.permute(0, 3, 2, 1).contiguous()
        x = self.w(x) + self.w1(x1)
        x = self.relu(self.bn(x))
        return x

class compute_g_spa(nn.Module):
    def __init__(self, dim1 = 64 *3, dim2 = 64*3, bias = False):
        super(compute_g_spa, self).__init__()
        self.dim1 = dim1
        self.dim2 = dim2
        self.g1 = cnn1x1(self.dim1, self.dim2, bias=bias)
        self.g2 = cnn1x1(self.dim1, self.dim2, bias=bias)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x1):

        g1 = self.g1(x1).permute(0, 3, 2, 1).contiguous()
        g2 = self.g2(x1).permute(0, 3, 1, 2).contiguous()
        g3 = g1.matmul(g2)
        g = self.softmax(g3)
        return g
    

https://github.com/microsoft/SGN


版权声明:本文为com_fang_bean原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。