概念

生成网络
尺寸变大(1 → 图片尺寸)。
通道数变大,再逐渐变小(in → max → …… → 3)。
输出用 Tanh 激活。
输出层不用 BN。
判别网络
尺寸变小(图片尺寸 → 1)。
通道数逐渐变大,再变小(3 → …… → max → 1)。
输出用 Sigmoid 激活。
输入层不用 BN。
实验(生成卡通人脸)
数据集:96×96 的卡通人脸。(5 万)
网络结构:
- 判别器:卷积 + 标准化(BN)+ 激活(LeakyReLU)+ Sigmoid。
- 生成器:转置卷积 + 标准化(BN)+ 激活(ReLU)+ Tanh。
优化器:Adam(lr=0.0002, betas=(0.5, 0.999))。
损失函数:二进制交叉熵(BCELoss)。
输出:
- 判别网络:图片为真的概率。
- 生成网络:图片。
数据集
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os
class MyDataset(Dataset):
def __init__(self, path):
self.path = path
self.imgs = os.listdir(path)
self.transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
def __len__(self):
return len(self.imgs)
def __getitem__(self, index):
img = Image.open(os.path.join(self.path, self.imgs[index]))
return self.transform(img)
网络
import torch
from torch import nn
# 判别器
class D_Net(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(3, 64, 5, 3, 1, bias=False), nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(64, 128, 4, 2, 1, bias=False), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(128, 256, 4, 2, 1, bias=False), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(256, 512, 4, 2, 1, bias=False), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(512, 1, 4, 1, 0, bias=False), nn.Sigmoid()
)
def forward(self, x):
return self.conv(x)
# 生成器
class G_Net(nn.Module):
def __init__(self):
super().__init__()
self.convT = nn.Sequential(
nn.ConvTranspose2d(128, 512, 4, 1, 0, bias=False), nn.BatchNorm2d(512), nn.ReLU(inplace=True),
nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
nn.ConvTranspose2d(64, 3, 5, 3, 1, bias=False), nn.Tanh()
)
def forward(self, x):
return self.convT(x)
训练
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import os
from dataset import MyDataset
from net import D_Net, G_Net
batch_size = 100
data_path = r"D:\data\faces"
img_path = r"img"
net_path = r"modules"
d_net_path = r"modules/d_net.pth"
g_net_path = r"modules/g_net.pth"
# 创建文件夹
if not os.path.exists(net_path):
os.makedirs(net_path)
if not os.path.exists(data_path):
os.makedirs(data_path)
if not os.path.exists(img_path):
os.makedirs(img_path)
# 数据集
dataset = MyDataset(data_path)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if __name__ == '__main__':
# 加载网络
d_net = D_Net().to(device)
g_net = G_Net().to(device)
if os.path.isfile(d_net_path):
d_net.load_state_dict(torch.load(d_net_path))
if os.path.isfile(g_net_path):
g_net.load_state_dict(torch.load(g_net_path))
d_opt = torch.optim.Adam(d_net.parameters(), lr=0.0002, betas=(0.5, 0.999))
g_opt = torch.optim.Adam(g_net.parameters(), lr=0.0002, betas=(0.5, 0.999))
loss_fn = nn.BCELoss()
d_net.train()
g_net.train()
while True:
for i, x in enumerate(dataloader):
# 训练判别器
real_img = x.to(device)
real_out = d_net(real_img)
real_label = torch.ones(batch_size, 1, 1, 1).to(device)
# 判别真损失:真实图片和 1 标签
real_loss = loss_fn(real_out, real_label)
z = torch.randn(batch_size, 128, 1, 1).to(device)
fake_img = g_net(z)
fake_out = d_net(fake_img)
fake_label = torch.zeros(batch_size, 1, 1, 1).to(device)
# 判别假损失:生成图片和 0 标签
fake_loss = loss_fn(fake_out, fake_label)
d_loss = real_loss + fake_loss
d_opt.zero_grad()
d_loss.backward()
d_opt.step()
# 训练生成器
z = torch.randn(batch_size, 128, 1, 1).to(device)
g_img = g_net(z)
g_out = d_net(g_img)
# 生成损失:生成图片和 1 标签
g_loss = loss_fn(g_out, real_label)
g_opt.zero_grad()
g_loss.backward()
g_opt.step()
if i % 50 == 0:
print("i:{},d_loss:{:.5},g_loss:{:.5}".format(i, d_loss, g_loss))
torch.save(d_net.state_dict(), d_net_path)
torch.save(g_net.state_dict(), g_net_path)
save_image(real_img, "{}/{}_real.jpg".format(img_path, i), nrow=10, padding=2, normalize=True, scale_each=True)
save_image(fake_img, "{}/{}_fake.jpg".format(img_path, i), nrow=10, padding=2, normalize=True, scale_each=True)
测试
import torch
from torchvision.utils import save_image
import os
from net import G_Net
batch_size = 100
net_path = r"modules/g_net.pth"
result_path = r"result"
# 设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if __name__ == '__main__':
# 加载网络
net = G_Net().to(device)
if os.path.isfile(net_path):
net.load_state_dict(torch.load(net_path))
net.eval()
for i in range(10):
z = torch.randn(batch_size, 128, 1, 1)
img = net(z)
save_image(img, "{}/{}.jpg".format(result_path, i), nrow=10, padding=2, normalize=True, scale_each=True)
版权声明:本文为afsya原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。