概念
精度高,最小可检测 12×12 的物体。
计算力需求低,大多数笔记本可以训练,手机可以运行。
算法思路
- 用小卷积核卷多次代替大卷积核卷一次。
- 把单目标侦测网络当成卷积核,通过图像金字塔缩放图片(最小边长不小于 12),多次扫描来侦测多个目标。
- IOU:衡量框的相似度,交并(交叉的框),交小(嵌套的框)。
- NMS(非极大值抑制):按照置信度给框从大到小排序,保留置信度最大的框,然后用这个框和其它框做 IOU,留下相似度小于阈值的框,再从剩下的框里找到置信度最大的保留,和其它框继续做 IOU 比较相似度。
数据处理
使用 CelebA 数据集,生成正样本、部分样本、负样本。(正样本 : 部分样本 : 负样本 = 1 : 1 : 3,因为图上大部分区域都是负样本)
设计标签
标签:地址 编号 x1偏移量 y1偏移量 x2偏移量 y2偏移量
positive/0.jpg 1 0.022935779816513763 0.4334862385321101 -0.013761467889908258 -0.0022935779816513763
增样
CelebA 数据集都是欧洲明星脸,要检测亚洲普通人脸,需要增样。
import os
from PIL import Image
import numpy as np
import utils
img_path = r"D:\class\CelebA_test\img"
label_path = r"D:\class\CelebA_test\label.txt"
save_path = r"D:\class\CelebA_"
# 3 个尺寸(12,24,48),3 种样本(positive-正样本,negative-负样本,part-部分样本)
for img_size in [12, 24, 48]:
img_path_positive = os.path.join(save_path, str(img_size), "positive")
img_path_part = os.path.join(save_path, str(img_size), "part")
img_path_negative = os.path.join(save_path, str(img_size), "negative")
for each in [img_path_positive, img_path_part, img_path_negative]:
if not os.path.exists(each):
os.makedirs(each)
# 3 个标签(positive-正样本,negative-负样本,part-部分样本)
label_path_positive = os.path.join(save_path, str(img_size), "positive.txt")
label_path_part = os.path.join(save_path, str(img_size), "part.txt")
label_path_negative = os.path.join(save_path, str(img_size), "negative.txt")
label_positive = open(label_path_positive, "w")
label_part = open(label_path_part, "w")
label_negative = open(label_path_negative, "w")
positive_count = 0
part_count = 0
negative_count = 0
# 读取标签
for i, info in enumerate(open(label_path).readlines()):
if i < 2:
continue
# 000001.jpg 95 71 226 313
info = info.strip().split(" ")
info = list(filter(bool, info))
# 打开图片
with Image.open(os.path.join(img_path, info[0])) as img:
x1 = float(info[1])
y1 = float(info[2])
w = float(info[3])
h = float(info[4])
# 排除奇异样本
if min(w, h) < 40 or x1 < 0 or y1 < 0:
continue
x2 = x1 + w
y2 = y1 + h
r_box = np.array([[x1, y1, x2, y2]])
# 标签中心点
cx = x1 + w / 2
cy = y1 + h / 2
# 每个尺寸生成 5 个样本
for _ in range(5):
# 建议框中心点(标签框中心点偏移 0.2 倍)
cx_ = cx + np.random.randint(-w * 0.2, w * 0.2)
cy_ = cy + np.random.randint(-h * 0.2, h * 0.2)
# 建议框边长(原图宽的 0.8 倍,原图长的 1.2 倍)
side = np.random.randint(min(w, h) * 0.8, max(w, h) * 1.2)
# 建议框坐标
x1_ = cx_ - side / 2
y1_ = cy_ - side / 2
x2_ = x1_ + side
y2_ = y1_ + side
# 左上角坐标超出图片
x1_ = np.maximum(0, cx_ - side / 2)
y1_ = np.maximum(0, cy_ - side / 2)
# 剪切建议框,设置尺寸
s_box = np.array([x1_, y1_, x2_, y2_])
s_img = img.crop(s_box)
s_img = s_img.resize((img_size, img_size))
# 新样本的标签:标签框相对于建议框的偏移量
# (标签坐标 - 建议框坐标) / 建议框边长
x1_offset = (x1 - x1_) / side
y1_offset = (y1 - y1_) / side
x2_offset = (x2 - x2_) / side
y2_offset = (y2 - y2_) / side
# 建议框和标签框做 IOU
iou = utils.iou(s_box, r_box)
if iou > 0.6:
# 正样本
label_positive.write("positive/{}.jpg 1 {} {} {} {}\n".format(positive_count, x1_offset, y1_offset, x2_offset, y2_offset))
s_img.save(os.path.join(img_path_positive, "{}.jpg".format(positive_count)))
positive_count += 1
elif iou > 0.4:
# 部分样本
label_part.write("part/{}.jpg 2 {} {} {} {}\n".format(part_count, x1_offset, y1_offset, x2_offset, y2_offset))
s_img.save(os.path.join(img_path_part, "{}.jpg".format(part_count)))
part_count += 1
elif iou < 0.3:
# 负样本
label_negative.write("negative/{}.jpg 0 0 0 0 0\n".format(negative_count))
s_img.save(os.path.join(img_path_negative, "{}.jpg".format(negative_count)))
negative_count += 1
# 用中心点偏移生成的样本,几乎没有负样本,所以取标签框外的部分作为负样本
img_w, img_h = img.size
# 左
if img_size < min(x1, y1):
side = np.random.randint(img_size, min(x1, y1))
x1_ = np.random.randint(0, x1 - side)
y1_ = np.random.randint(0, img_h - side)
x2_ = x1_ + side
y2_ = y1_ + side
cut_box = np.array([x1_, y1_, x2_, y2_])
crop_img = img.crop(cut_box)
crop_img = crop_img.resize((img_size, img_size))
label_negative.write("negative/{}.jpg 0 0 0 0\n".format(negative_count))
label_negative.flush()
crop_img.save(os.path.join(img_path_negative, "{}.jpg".format(negative_count)))
negative_count += 1
# 上
if img_size < min(x1, y1):
side = np.random.randint(img_size, min(x1, y1))
x1_ = np.random.randint(0, img_w - side)
y1_ = np.random.randint(0, y1 - side)
x2_ = x1_ + side
y2_ = y1_ + side
cut_box = np.array([x1_, y1_, x2_, y2_])
crop_img = img.crop(cut_box)
crop_img = crop_img.resize((img_size, img_size))
label_negative.write("negative/{}.jpg 0 0 0 0\n".format(negative_count))
label_negative.flush()
crop_img.save(os.path.join(img_path_negative, "{}.jpg".format(negative_count)))
negative_count += 1
# 下
if img_size < min(img_w - x2, img_h - y2):
side = np.random.randint(img_size, min(img_w - x2, img_h - y2))
x1_ = np.random.randint(0, img_w - side)
y1_ = np.random.randint(y2, img_h - side)
x2_ = x1_ + side
y2_ = y1_ + side
cut_box = np.array([x1_, y1_, x2_, y2_])
crop_img = img.crop(cut_box)
crop_img = crop_img.resize((img_size, img_size))
label_negative.write("negative/{}.jpg 0 0 0 0\n".format(negative_count))
label_negative.flush()
crop_img.save(os.path.join(img_path_negative, "{}.jpg".format(negative_count)))
negative_count += 1
# 右
if img_size < min(img_w - x2, img_h - y2):
side = np.random.randint(img_size, min(img_w - x2, img_h - y2))
x1_ = np.random.randint(x2, img_w - side)
y1_ = np.random.randint(0, img_h - side)
x2_ = x1_ + side
y2_ = y1_ + side
cut_box = np.array([x1_, y1_, x2_, y2_])
crop_img = img.crop(cut_box)
crop_img = crop_img.resize((img_size, img_size))
label_negative.write("negative/{}.jpg 0 0 0 0\n".format(negative_count))
label_negative.flush()
crop_img.save(os.path.join(img_path_negative, "{}.jpg".format(negative_count)))
negative_count += 1
数据集
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import os
from PIL import Image
class MyDataset(Dataset):
def __init__(self, path):
self.path = path
self.transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
self.data = []
self.data.extend(open(os.path.join(path, "positive.txt")).readlines())
self.data.extend(open(os.path.join(path, "part.txt")).readlines())
self.data.extend(open(os.path.join(path, "negative.txt")).readlines())
def __len__(self):
return len(self.data)
def __getitem__(self, index):
info = self.data[index].strip().split(" ")
img = Image.open(os.path.join(self.path, info[0]))
img = self.transform(img)
label_c = torch.Tensor([int(info[1])])
label_offset = torch.Tensor([float(info[2]), float(info[3]), float(info[4]), float(info[5])])
return img, label_c, label_offset
网络
把单目标侦测网络当成一个卷积核(小卷积核卷多次相当于大卷积核卷一次)。
12×12 的卷积核只能框住很小的物体,使用图像金字塔缩小原图,卷积核大小不变,相当于加大了卷积核,可以框出更大的物体。
输出:置信度 [n, 1, 1, 1]、偏移量 [n, 4, 1, 1]
P 网络(初步筛选出目标框)
第一步:用 P 网络扫描图片,留下置信度大于阈值的框,再通过偏移量和缩放比例反算出框在原图上的位置。
第二步:使用图像金字塔缩放图片 ,执行第一步。
第三步:重复第二步,直到图片的最小边长小于等于 12。
第四步:使用 NMS 去重。(交并 IOU)
R 网络(进一步过滤目标框)
第一步:把 P 网络得到的框扩充成正方形,从原图上剪下来,把尺寸转为 24×24。
第二步:使用 R 网络扫描 P 网络的结果,留下置信度大于阈值的框,通过偏移量反算出框在原图上的位置。
第三步:使用 NMS 去重。(交并 IOU)
O 网络(确定目标框)
第一步:把 R 网络得到的框扩充成正方形,从原图上剪下来,把尺寸转为 48×48。
第二步:使用 O 网络扫描 R 网络的结果,留下置信度大于阈值的框,通过偏移量反算出框在原图上的位置。
第三步:使用 NMS 去重。(交小 IOU)
import torch
from torch import nn
from torch.nn import functional as F
# in:12*12*3
# out:1*1*1(置信度) 1*1*4(位置)
class P_Net(nn.Module):
def __init__(self):
super().__init__()
# 提取特征
self.conv_feature = nn.Sequential(
# (12-3+2) /1 +1 = 12
nn.Conv2d(3, 10, 3, 1, 1), nn.PReLU(),
# (12-3) /2 +1 = 5
nn.MaxPool2d(3, 2),
# (5-3) /1 +1 = 3
nn.Conv2d(10, 16, 3, 1), nn.PReLU(),
# (3-3) /1 +1 = 1
nn.Conv2d(16, 32, 3, 1), nn.PReLU()
)
# 输出置信度
self.conv_c = nn.Conv2d(32, 1, 1, 1)
# 输出位置
self.conv_offset = nn.Conv2d(32, 4, 1, 1)
def forward(self, x):
feature = self.conv_feature(x)
out_c = F.sigmoid(self.conv_c(feature))
out_offset = self.conv_offset(feature)
return out_c, out_offset
# in:24*24*3
# out:1(置信度) 4(位置)
class R_Net(nn.Module):
def __init__(self):
super().__init__()
# 提取特征
self.conv_feature = nn.Sequential(
# (24-3+2) /1 +1 = 24
nn.Conv2d(3, 28, 3, 1, 1), nn.PReLU(),
# (24-3) /2 +1 = 11
nn.MaxPool2d(3, 2),
# (11-3) /1 +1 = 9
nn.Conv2d(28, 48, 3, 1), nn.PReLU(),
# (9-3) /2 +1 = 4
nn.MaxPool2d(3, 2),
# (4-2) /1 +1 = 3
nn.Conv2d(48, 64, 2, 1), nn.PReLU()
)
self.mlp_feature = nn.Sequential(
nn.Linear(64 * 3 * 3, 128), nn.PReLU()
)
# 输出置信度
self.mlp_c = nn.Linear(128, 1)
# 输出位置
self.mlp_offset = nn.Linear(128, 4)
def forward(self, x):
feature = self.conv_feature(x)
feature = feature.reshape(-1, 64 * 3 * 3)
feature = self.mlp_feature(feature)
out_c = F.sigmoid(self.mlp_c(feature))
out_offset = self.mlp_offset(feature)
return out_c, out_offset
# in:48*48*3
# out:1(置信度) 4(位置)
class O_Net(nn.Module):
def __init__(self):
super().__init__()
# 提取特征
self.conv_feature = nn.Sequential(
# (48-3+2) /1 +1 =48
nn.Conv2d(3, 32, 3, 1, 1), nn.PReLU(),
# (48-3) /2 +1 = 23
nn.MaxPool2d(3, 2),
# (23-3) /1 +1 =21
nn.Conv2d(32, 64, 3, 1), nn.PReLU(),
# (21-3) /2 +1 = 10
nn.MaxPool2d(3, 2),
# (10-3) /1 +1 = 8
nn.Conv2d(64, 64, 3, 1), nn.PReLU(),
# (8-2) /2 +1 = 4
nn.MaxPool2d(2),
# (4-2) / +1 = 3
nn.Conv2d(64, 128, 2, 1), nn.PReLU()
)
self.mlp_feature = nn.Sequential(
nn.Linear(128 * 3 * 3, 256), nn.PReLU()
)
# 输出置信度
self.mlp_c = nn.Linear(256, 1)
# 输出位置
self.mlp_offset = nn.Linear(256, 4)
def forward(self, x):
feature = self.conv_feature(x)
feature = feature.reshape(-1, 128 * 3 * 3)
feature = self.mlp_feature(feature)
out_c = F.sigmoid(self.mlp_c(feature))
out_offset = self.mlp_offset(feature)
return out_c, out_offset
训练
import torch
from torch import nn
from torch.utils.data import DataLoader
import os
from dataset import MyDataset
from net import P_Net, R_Net, O_Net
class Trainer():
def __init__(self, net, net_path, data_path):
self.net_path = net_path
# 数据
dataset = MyDataset(data_path)
self.dataloader = DataLoader(dataset, batch_size=512, shuffle=True, drop_last=True)
# 网络
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.net = net.to(self.device)
if os.path.exists(net_path):
self.net.load_state_dict(torch.load(net_path))
self.opt = torch.optim.Adam(self.net.parameters())
# 损失
self.loss_fn_c = nn.BCELoss()
self.loss_fn_offset = nn.MSELoss()
def train(self):
while True:
for i, (img, label_c, label_offset) in enumerate(self.dataloader):
img = img.to(self.device)
label_c = label_c.to(self.device)
label_offset = label_offset.to(self.device)
out_c, out_offset = self.net(img)
out_c = out_c.reshape(-1, 1)
out_offset = out_offset.reshape(-1, 4)
# 置信度损失:正样本、负样本
mask = torch.lt(label_c, 2)
out_c_loss = torch.masked_select(out_c, mask)
label_c_loss = torch.masked_select(label_c, mask)
loss_c = self.loss_fn_c(out_c_loss, label_c_loss)
# 偏移量损失:正样本、部分样本
mask = torch.gt(label_c, 0)
index = torch.nonzero(mask)[:, 0]
out_offset_loss = out_offset[index]
label_offset_loss = label_offset[index]
loss_offset = self.loss_fn_offset(out_offset_loss, label_offset_loss)
loss = loss_c + loss_offset
self.opt.zero_grad()
loss.backward()
self.opt.step()
print("i:{},loss:{:.5},loss_c:{:.5},loss_offset:{:.5}".format(i, loss, loss_c, loss_offset))
torch.save(self.net.state_dict(), self.net_path)
if __name__ == '__main__':
net = P_Net()
trainer = Trainer(net, r"modules/p_net.pth", r"D:\class\CelebA_\12")
trainer.train()
net = R_Net()
trainer = Trainer(net, r"modules/r_net.pth", r"D:\class\CelebA_\24")
trainer.train()
net = O_Net()
trainer = Trainer(net, r"modules/o_net.pth", r"D:\class\CelebA_\48")
trainer.train()
反算坐标
第一步:筛选可能有物体的点。(根据置信度 [h, w])
第二步:找建议框
- x = (列索引 * 步长 - 1) / 缩放比例
- y = (行索引 * 步长 - 1) / 缩放比例
第三步:偏移 [4, h, w]
- x’ = x + w·offset
- y’ = y + h·offset
import numpy as np
# IOU:计算相似度
# box:框
# boxes:框的集合
# is_min:是否为交小 IOU
def iou(box, boxes, is_min=False):
# 面积
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# 交集坐标
x1 = np.maximum(box[0], boxes[:, 0])
y1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[2], boxes[:, 2])
y2 = np.minimum(box[3], boxes[:, 3])
# 交集面积
w = np.maximum(0, x2 - x1)
h = np.maximum(0, y2 - y1)
inter = w * h
# 交小 IOU:交集 / 较小框
if is_min:
return np.true_divide(inter, np.minimum(box_area, boxes_area))
# 交并 IOU:交集 / 并集
return np.true_divide(inter, box_area + boxes_area - inter)
# NMS:去重
# boxes:框的集合
# thresh:阈值小:检测远的(留下的少),阈值大:检测近的(留下的多)
# is_min:是否为交小 IOU
def nms(boxes, thresh, is_min=False):
# 判断是否有框
if boxes.shape[0] == 0:
return np.array([])
result = []
# 按置信度倒序排列框
_boxes = boxes[(-boxes[:, 4]).argsort()]
while _boxes.shape[0] > 1:
# 第一个框,保留
a_box = _boxes[0]
result.append(a_box)
# 剩下的框
b_box = _boxes[1:]
# 第一个框和剩下的框做 IOU,相似度小于阈值的保留
index = np.where(iou(a_box, b_box, is_min) < thresh)
# 更新框的集合为保留框
_boxes = b_box[index]
# 最后一个框,保留
if _boxes.shape[0] > 0:
result.append(_boxes[0])
return np.stack(result)
# 把框扩充成正方形
def convert_to_square(box):
if box.shape[0] == 0:
return np.array([])
square_box = box.copy()
w = box[:, 2] - box[:, 0]
h = box[:, 3] - box[:, 1]
# 以最大边长为准
max_side = np.maximum(w, h)
square_box[:, 0] = box[:, 0] + w * 0.5 - max_side * 0.5
square_box[:, 1] = box[:, 1] + h * 0.5 - max_side * 0.5
square_box[:, 2] = square_box[:, 0] + max_side
square_box[:, 3] = square_box[:, 1] + max_side
return square_box
# 检查数据集的形状
if __name__ == '__main__':
boxes = np.array([[1, 1, 10, 10, 6], [1, 1, 11, 11, 9], [2, 4, 6, 8, 3], [3, 5, 7, 9, 5]])
print(nms(boxes, 0.3))
box = np.array([[3, 3, 5, 6]])
print(convert_to_square(box))
侦测
import torch
from PIL import Image
from PIL import ImageDraw
import numpy as np
import utils
import net
from torchvision import transforms
import time
import os
# 检测流程:图像 → 缩放 → P 网络 → R 网路 → O 网络
# 网络参数
p_cls = 0.6
p_nms = 0.5
r_cls = 0.6
r_nms = 0.5
o_cls = 0.3
o_nms = 0.5
class Detector:
def __init__(self, pnet_param="modules/p_net.pth", rnet_param="modules/r_net.pth", onet_param="modules/o_net.pth"):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.pnet = net.P_Net().to(self.device)
self.rnet = net.R_Net().to(self.device)
self.onet = net.O_Net().to(self.device)
self.pnet.load_state_dict(torch.load(pnet_param))
self.rnet.load_state_dict(torch.load(rnet_param))
self.onet.load_state_dict(torch.load(onet_param))
self.pnet.eval()
self.rnet.eval()
self.onet.eval()
# 改变形状(HWC → CHW)、归一化、变张量
self.transform = transforms.Compose([
transforms.ToTensor()
])
def detect(self, image):
# P 网络检测
start_time = time.time()
pnet_boxes = self.pnet_detect(image)
if pnet_boxes.shape[0] == 0:
return np.array([])
end_time = time.time()
t_pnet = end_time - start_time
# R 网络检测
start_time = time.time()
rnet_boxes = self.rnet_detect(image, pnet_boxes)
if rnet_boxes.shape[0] == 0:
return np.array([])
end_time = time.time()
t_rnet = end_time - start_time
# O 网络检测
start_time = time.time()
onet_boxes = self.onet_detect(image, rnet_boxes)
if onet_boxes.shape[0] == 0:
return np.array([])
end_time = time.time()
t_onet = end_time - start_time
# 总检测时间
t_sum = t_pnet + t_rnet + t_onet
print("total:{0},pnet:{1},rnet:{2},onet:{3}".format(t_sum, t_pnet, t_rnet, t_onet))
return onet_boxes
# P 网检测函数(全部是卷积,与输入图片大小无关,可输出任意形状图片)
def pnet_detect(self, img):
boxes = []
w, h = img.size
# 获取图片的最小边长(作为缩放阈值)
min_side_len = min(w, h)
# 缩放比例
scale = 1
# 最小边长小于等于 12 时停止
while min_side_len > 12:
img_data = self.transform(img).to(self.device)
# [3,H,W] → [1,3,H,W]
img_data.unsqueeze_(0)
_cls, _offest = self.pnet(img_data)
# 置信度:[H,W]
cls = _cls[0][0].data
# 偏移量:[4,H,W]
offest = _offest[0].data
# 置信度大于阈值的索引(如果没检测到人脸,就调低置信度阈值)
idxs = torch.nonzero(torch.gt(cls, p_cls))
# idx[0]:行索引,idx[1]:列索引
for idx in idxs:
# 反算原图上的框
boxes.append(self.__box(idx, offest, cls[idx[0], idx[1]], scale))
# 缩放图片
scale *= 0.7
_w = int(w * scale)
_h = int(h * scale)
img = img.resize((_w, _h))
# 重新获取最小边长
min_side_len = min(_w, _h)
return utils.nms(np.array(boxes), p_nms)
# 特征反算:根据特征图反算得到原图的框
def __box(self, start_index, offset, cls, scale, stride=2, side_len=12):
# 原图上的建议框位置
_x1 = (start_index[1].float() * stride - 1) / scale
_y1 = (start_index[0].float() * stride - 1) / scale
_x2 = (start_index[1].float() * stride + side_len - 1) / scale
_y2 = (start_index[0].float() * stride + side_len - 1) / scale
ow = _x2 - _x1
oh = _y2 - _y1
# 偏移量:[x1,y1,x2,y2]
_offset = offset[:, start_index[0], start_index[1]]
x1 = _x1 + ow * _offset[0]
y1 = _y1 + oh * _offset[1]
x2 = _x2 + ow * _offset[2]
y2 = _y2 + oh * _offset[3]
return [x1, y1, x2, y2, cls]
# R 网络检测函数(P 网络的结果 = R 网络的建议框)
def rnet_detect(self, image, pnet_boxes):
_img_dataset = []
# 把 P 网络输出的框扩充成正方形
_pnet_boxes = utils.convert_to_square(pnet_boxes)
# 遍历正方形,从原图上剪下来,尺寸转为 24*24
for _box in _pnet_boxes:
_x1 = int(_box[0])
_y1 = int(_box[1])
_x2 = int(_box[2])
_y2 = int(_box[3])
img = image.crop((_x1, _y1, _x2, _y2))
img = img.resize((24, 24))
img_data = self.transform(img)
_img_dataset.append(img_data)
img_dataset = torch.stack(_img_dataset).to(self.device)
# R 网络检测
_cls, _offset = self.rnet(img_dataset)
# 置信度:[n,1]
cls = _cls.data
# 偏移量:[n,4]
offset = _offset.data
boxes = []
# 置信度大于阈值的索引
idxs, _ = np.where(cls > r_cls)
for idx in idxs:
# P 网络输出的框
_box = _pnet_boxes[idx]
_x1 = int(_box[0])
_y1 = int(_box[1])
_x2 = int(_box[2])
_y2 = int(_box[3])
ow = _x2 - _x1
oh = _y2 - _y1
# 在 P 网络的框上偏移
x1 = _x1 + ow * offset[idx][0]
y1 = _y1 + oh * offset[idx][1]
x2 = _x2 + ow * offset[idx][2]
y2 = _y2 + oh * offset[idx][3]
boxes.append([x1, y1, x2, y2, cls[idx][0]])
return utils.nms(np.array(boxes), r_nms)
# O 网络检测函数
def onet_detect(self, image, rnet_boxes):
_img_dataset = []
# 把 R 网络输出的框扩充成正方形
_rnet_boxes = utils.convert_to_square(rnet_boxes)
# 遍历正方形,从原图上剪下来,尺寸转为 48*48
for _box in _rnet_boxes:
_x1 = int(_box[0])
_y1 = int(_box[1])
_x2 = int(_box[2])
_y2 = int(_box[3])
img = image.crop((_x1, _y1, _x2, _y2))
img = img.resize((48, 48))
img_data = self.transform(img)
_img_dataset.append(img_data)
img_dataset = torch.stack(_img_dataset).to(self.device)
# O 网络检测
_cls, _offset = self.onet(img_dataset)
# 置信度:[1,1]
cls = _cls.data.numpy()
# 偏移量:[1,4]
offset = _offset.data.numpy()
boxes = []
# 最终阈值可改为 0.99998
idxs, _ = np.where(cls > o_cls)
for idx in idxs:
# R 网络输出的框
_box = _rnet_boxes[idx]
_x1 = int(_box[0])
_y1 = int(_box[1])
_x2 = int(_box[2])
_y2 = int(_box[3])
ow = _x2 - _x1
oh = _y2 - _y1
# 在 R 网络的框上偏移
x1 = _x1 + ow * offset[idx][0]
y1 = _y1 + oh * offset[idx][1]
x2 = _x2 + ow * offset[idx][2]
y2 = _y2 + oh * offset[idx][3]
boxes.append([x1, y1, x2, y2, cls[idx][0]])
# 用交小 IOU
return utils.nms(np.array(boxes), o_nms, is_min=True)
if __name__ == '__main__':
image_path = r"test_images"
for i in os.listdir(image_path):
detector = Detector()
with Image.open(os.path.join(image_path, i)) as im:
boxes = detector.detect(im)
imDraw = ImageDraw.Draw(im)
# 每循环一次框一个人脸
for box in boxes:
x1 = int(box[0])
y1 = int(box[1])
x2 = int(box[2])
y2 = int(box[3])
imDraw.rectangle((x1, y1, x2, y2), outline='red')
im.show()
im.save(str(i) + '.jpg')