解读SGN类
1.self.dim1 = 256
self.dataset = dataset
self.seg = seg
num_joint = 25-》关节数为25
2.if args.train:
self.spa = self.one_hot(bs, num_joint, self.seg)-》独热编码
self.spa = self.spa.permute(0, 3, 2, 1).cuda()-》转置
self.tem = self.one_hot(bs, self.seg, num_joint)-》独热编码
self.tem = self.tem.permute(0, 3, 1, 2).cuda()-》转置
3.self.tem_embed = embed(self.seg, 64*4, norm=False, bias=bias)-》传入embed类创建embed模块
self.spa_embed = embed(num_joint, 64, norm=False, bias=bias)
self.joint_embed = embed(3, 64, norm=True, bias=bias)
self.dif_embed = embed(3, 64, norm=True, bias=bias)
embed组成:正则化-》1x1卷积-》Relu激活-》1x1卷积-》Relu激活
4.self.maxpool = nn.AdaptiveMaxPool2d((1, 1))-》最大池化
5.self.cnn = local(self.dim1, self.dim1 * 2, bias=bias)-》创建一个local模块。
local模块组成:最大池化-》卷积-》正则化-》Relu激活-》卷积-》正则化-》Dropout随机损失
6.self.compute_g1 = compute_g_spa(self.dim1 // 2, self.dim1, bias=bias)-》创建compute_g_spa模块
compute_g_spa:卷积-》卷积-》softmax映射为0-1之间的实数,并且归一化保证和为1,因此多分类的概率之和也刚好为1。
7.self.gcn1 = gcn_spa(self.dim1 // 2, self.dim1 // 2, bias=bias)
self.gcn2 = gcn_spa(self.dim1 // 2, self.dim1, bias=bias)
self.gcn3 = gcn_spa(self.dim1, self.dim1, bias=bias)-》创建3个gcn图卷积模块
8.self.fc = nn.Linear(self.dim1 * 2, num_classes)-》全连接分类
9. for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels-》第一个卷积核x第二个卷积核x输出通道
m.weight.data.normal_(0, math.sqrt(2. / n))-》
10.
nn.init.constant_(self.gcn1.w.cnn.weight, 0)
nn.init.constant_(self.gcn2.w.cnn.weight, 0)
nn.init.constant_(self.gcn3.w.cnn.weight, 0)-》初始化各层权重
解读one_hot方法:
1. y = torch.arange(spa).unsqueeze(-1)-》对数据的维度进行压缩,去掉维数为1的的维度,默认是将a中所有为1的维度删掉。也可以通过dim指定位置,删掉指定位置的维数为1的维度。
2.y_onehot = torch.FloatTensor(spa, spa) -》32位浮点型
3.y_onehot.zero_()
4.y_onehot.scatter_(1, y, 1)-》
scatter(dim, index, src) 的参数有 3 个
- dim:沿着哪个维度进行索引
- index:用来 scatter 的元素索引
- src:用来 scatter 的源元素,可以是一个标量或一个张量
这个 scatter 可以理解成放置元素或者修改元素
5.y_onehot = y_onehot.unsqueeze(0).unsqueeze(0)-》对数据的维度进行压缩
6. y_onehot = y_onehot.repeat(bs, tem, 1, 1)-》计算矩阵
7.return y_onehot-》返回处理后矩阵
解读embed类:
1.if norm:
self.cnn = nn.Sequential(
norm_data(dim),-》创建一个norm_data模块(resize-》正则化-》resize)
cnn1x1(dim, 64, bias=bias),-》1x1卷积核,输入通道为dim,输出通道(卷积核数量)64
nn.ReLU(),-》Relu激活
cnn1x1(64, dim1, bias=bias),-》1x1卷积核,输入通道为64,输出通道(卷积核数量)dim1
nn.ReLU(),-》Relu激活
)
else:
self.cnn = nn.Sequential(
cnn1x1(dim, 64, bias=bias),-》1x1卷积核,输入通道为dim,输出通道(卷积核数量)64
nn.ReLU(),-》Relu激活
cnn1x1(64, dim1, bias=bias),-》1x1卷积核,输入通道为64,输出通道(卷积核数量)dim1
nn.ReLU(),-》Relu激活
)
解读local方法:
1.self.maxpool = nn.AdaptiveMaxPool2d((1, 20))-》最大池化
self.cnn1 = nn.Conv2d(dim1, dim1, kernel_size=(1, 3), padding=(0, 1), bias=bias)-》卷积,输入输出均为dim1,卷积核大小为1x3,左右加上一圈0
self.bn1 = nn.BatchNorm2d(dim1)-》正则化
self.relu = nn.ReLU()-》Relu激活
self.cnn2 = nn.Conv2d(dim1, dim2, kernel_size=1, bias=bias)-》卷积,输入均为dim1,输出为dim2,卷积核大小为1x1.
self.bn2 = nn.BatchNorm2d(dim2)-》正则化
self.dropout = nn.Dropout2d(0.2) ->Dropout随机失活
解读compute_g_spa方法:
1.x = x1.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
x = g.matmul(x)-》返回两个数组的矩阵乘积
x = x.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
x = self.w(x) + self.w1(x1)-》计算矩阵的和
x = self.relu(self.bn(x))-》Relu激活
解读gcn_spa方法:
1.x = x1.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
x = g.matmul(x)-》返回两个数组的矩阵乘积
x = x.permute(0, 3, 2, 1).contiguous()-》将张量的维度换位
x = self.w(x) + self.w1(x1)-》计算矩阵的和
x = self.relu(self.bn(x)) -》Relu激活
解读norm_data方法
1. bs, c, num_joints, step = x.size()-》获得矩阵x的规格大小
x = x.view(bs, -1, step)-》相当于numpy中resize()的功能
x = self.bn(x)-》批归一化
x = x.view(bs, -1, num_joints, step).contiguous()-》相当于numpy中resize()的功能
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
from torch import nn
import torch
import math
class SGN(nn.Module):
def __init__(self, num_classes, dataset, seg, args, bias = True):
super(SGN, self).__init__()
self.dim1 = 256
self.dataset = dataset
self.seg = seg
num_joint = 25
bs = args.batch_size
if args.train:
self.spa = self.one_hot(bs, num_joint, self.seg)
self.spa = self.spa.permute(0, 3, 2, 1).cuda()
self.tem = self.one_hot(bs, self.seg, num_joint)
self.tem = self.tem.permute(0, 3, 1, 2).cuda()
else:
self.spa = self.one_hot(32 * 5, num_joint, self.seg)
self.spa = self.spa.permute(0, 3, 2, 1).cuda()
self.tem = self.one_hot(32 * 5, self.seg, num_joint)
self.tem = self.tem.permute(0, 3, 1, 2).cuda()
self.tem_embed = embed(self.seg, 64*4, norm=False, bias=bias)
self.spa_embed = embed(num_joint, 64, norm=False, bias=bias)
self.joint_embed = embed(3, 64, norm=True, bias=bias)
self.dif_embed = embed(3, 64, norm=True, bias=bias)
self.maxpool = nn.AdaptiveMaxPool2d((1, 1))
self.cnn = local(self.dim1, self.dim1 * 2, bias=bias)
self.compute_g1 = compute_g_spa(self.dim1 // 2, self.dim1, bias=bias)
self.gcn1 = gcn_spa(self.dim1 // 2, self.dim1 // 2, bias=bias)
self.gcn2 = gcn_spa(self.dim1 // 2, self.dim1, bias=bias)
self.gcn3 = gcn_spa(self.dim1, self.dim1, bias=bias)
self.fc = nn.Linear(self.dim1 * 2, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
nn.init.constant_(self.gcn1.w.cnn.weight, 0)
nn.init.constant_(self.gcn2.w.cnn.weight, 0)
nn.init.constant_(self.gcn3.w.cnn.weight, 0)
def forward(self, input):
# Dynamic Representation
bs, step, dim = input.size()
num_joints = dim //3
input = input.view((bs, step, num_joints, 3))
input = input.permute(0, 3, 2, 1).contiguous()
dif = input[:, :, :, 1:] - input[:, :, :, 0:-1]
dif = torch.cat([dif.new(bs, dif.size(1), num_joints, 1).zero_(), dif], dim=-1)
pos = self.joint_embed(input)
tem1 = self.tem_embed(self.tem)
spa1 = self.spa_embed(self.spa)
dif = self.dif_embed(dif)
dy = pos + dif
# Joint-level Module
input= torch.cat([dy, spa1], 1)
g = self.compute_g1(input)
input = self.gcn1(input, g)
input = self.gcn2(input, g)
input = self.gcn3(input, g)
# Frame-level Module
input = input + tem1
input = self.cnn(input)
# Classification
output = self.maxpool(input)
output = torch.flatten(output, 1)
output = self.fc(output)
return output
def one_hot(self, bs, spa, tem):
y = torch.arange(spa).unsqueeze(-1)
y_onehot = torch.FloatTensor(spa, spa)
y_onehot.zero_()
y_onehot.scatter_(1, y, 1)
y_onehot = y_onehot.unsqueeze(0).unsqueeze(0)
y_onehot = y_onehot.repeat(bs, tem, 1, 1)
return y_onehot
class norm_data(nn.Module):
def __init__(self, dim= 64):
super(norm_data, self).__init__()
self.bn = nn.BatchNorm1d(dim* 25)
def forward(self, x):
bs, c, num_joints, step = x.size()
x = x.view(bs, -1, step)
x = self.bn(x)
x = x.view(bs, -1, num_joints, step).contiguous()
return x
class embed(nn.Module):
def __init__(self, dim = 3, dim1 = 128, norm = True, bias = False):
super(embed, self).__init__()
if norm:
self.cnn = nn.Sequential(
norm_data(dim),
cnn1x1(dim, 64, bias=bias),
nn.ReLU(),
cnn1x1(64, dim1, bias=bias),
nn.ReLU(),
)
else:
self.cnn = nn.Sequential(
cnn1x1(dim, 64, bias=bias),
nn.ReLU(),
cnn1x1(64, dim1, bias=bias),
nn.ReLU(),
)
def forward(self, x):
x = self.cnn(x)
return x
class cnn1x1(nn.Module):
def __init__(self, dim1 = 3, dim2 =3, bias = True):
super(cnn1x1, self).__init__()
self.cnn = nn.Conv2d(dim1, dim2, kernel_size=1, bias=bias)
def forward(self, x):
x = self.cnn(x)
return x
class local(nn.Module):
def __init__(self, dim1 = 3, dim2 = 3, bias = False):
super(local, self).__init__()
self.maxpool = nn.AdaptiveMaxPool2d((1, 20))
self.cnn1 = nn.Conv2d(dim1, dim1, kernel_size=(1, 3), padding=(0, 1), bias=bias)
self.bn1 = nn.BatchNorm2d(dim1)
self.relu = nn.ReLU()
self.cnn2 = nn.Conv2d(dim1, dim2, kernel_size=1, bias=bias)
self.bn2 = nn.BatchNorm2d(dim2)
self.dropout = nn.Dropout2d(0.2)
def forward(self, x1):
x1 = self.maxpool(x1)
x = self.cnn1(x1)
x = self.bn1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.cnn2(x)
x = self.bn2(x)
x = self.relu(x)
return x
class gcn_spa(nn.Module):
def __init__(self, in_feature, out_feature, bias = False):
super(gcn_spa, self).__init__()
self.bn = nn.BatchNorm2d(out_feature)
self.relu = nn.ReLU()
self.w = cnn1x1(in_feature, out_feature, bias=False)
self.w1 = cnn1x1(in_feature, out_feature, bias=bias)
def forward(self, x1, g):
x = x1.permute(0, 3, 2, 1).contiguous()
x = g.matmul(x)
x = x.permute(0, 3, 2, 1).contiguous()
x = self.w(x) + self.w1(x1)
x = self.relu(self.bn(x))
return x
class compute_g_spa(nn.Module):
def __init__(self, dim1 = 64 *3, dim2 = 64*3, bias = False):
super(compute_g_spa, self).__init__()
self.dim1 = dim1
self.dim2 = dim2
self.g1 = cnn1x1(self.dim1, self.dim2, bias=bias)
self.g2 = cnn1x1(self.dim1, self.dim2, bias=bias)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x1):
g1 = self.g1(x1).permute(0, 3, 2, 1).contiguous()
g2 = self.g2(x1).permute(0, 3, 1, 2).contiguous()
g3 = g1.matmul(g2)
g = self.softmax(g3)
return g