pytorch实现人脸识别

您所在的位置:网站首页 2d与3d人脸识别详解对比图片大全 pytorch实现人脸识别

pytorch实现人脸识别

2024-07-10 23:58| 来源: 网络整理| 查看: 265

本文讨论的是如何使用pytorch实现人脸识别任务,前面我们的文章中提过,使用MTCNN网络进行人脸检测,能够将人脸进行框选。人脸识别是此任务的进阶版本,我们不仅要神经网络知道人脸长什么样子,我们还要知道这是谁的脸。

那么很容易想到的是,人脸识别任务是一个分类任务,或者说是判别模型。每个人都可以当作是一个类别,但是我们仔细想一下,人脸识别是一个简单的判别模型吗?显然不是,我们可以回忆一下小区的门禁系统,我们将自己的照片进行上传,门禁系统马上就能够识别我们的人脸了。难道是我们将照片上传之后开发人员修改了网络输出,并且重新训练了一遍吗?

并非如此,这样做的人工成本大,操作复杂。系统中一个人的脸就是一个类别,小区增删人员变动大,不可能每次都重新修改模型输出,并训练一次。那么它到底是怎么样实现这个过程的呢?

一、如何实现人脸识别

那么,我们简单的聊一下怎么使用pytorch完成人脸识别任务:

人脸识别任务的推理过程大致分为以下几个步骤:

使用人脸检测网络多人脸进行检测,例如MTCNN、YOLO等。将人脸进行框选。 将人脸从原图中裁剪下来,输入到人脸特征提取器中进行特征编码。 将编码好的人脸与人脸注册库中的人脸进行一一比对,选择相似度最高的人脸。

这样,我们就不需要将人脸识别任务设计成一个分类网络了,我们利用对人脸进行编码,并做特征对比实现多分类任务。

二、代码实现过程 (1)数据集的准备

我们这里有两个神经网络,一个MTCNN人脸检测网络,关于MTCNN,我们使用的是Wider Face数据集,数据集可以在Wider Face的官网进行下载。

关于人脸特征提取网络,本文使用的是百度开源数据集,明星人脸数据集,一共包含了30多位明星的上千张照片。这里给出数据集链接:明星人脸识别数据集 - 飞桨AI Studio (baidu.com)

关于MTCNN的数据集制作可以回顾我之前的笔记。 关于人脸特征提取网络的数据处理,可参考如下代码:

包括对文件和文件夹重命名、对图片进行resize,使用MTCNN识别出人脸进行裁剪、人脸对齐等操作。

其中人脸对齐的操作就是利用两个眼睛倾斜的角度,旋转图片,将图片进行摆正。

from infer_path import infer_image, draw_face import argparse import os import numpy as np from PIL import Image import math parser = argparse.ArgumentParser() parser.add_argument('--model_path', type=str, default='./Pytorch-MTCNN-master/infer_models', help='PNet、RNet、ONet三个模型文件存在的文件夹路径') parser.add_argument('--database_source_path', type=str, default='dataset/stars/', help='需要预测图像的路径') parser.add_argument('--database_dst_path', type=str, default='dataset/stars_faces', help='需要预测图像的路径') args = parser.parse_args() if not os.path.exists(args.database_dst_path): os.mkdir(args.database_dst_path) # 将图片转成正方形 def trans_square(image): img = image.convert("RGB") img = np.array(img, dtype=np.uint8) img_h, img_w, img_c = img.shape if img_h != img_w: long_side = max(img_w, img_h) short_side = min(img_w, img_h) loc = abs(img_w - img_h) // 2 img = img.transpose((1, 0, 2)) if img_w < img_h else img background = np.zeros((long_side, long_side, img_c), dtype=np.uint8) background[loc:loc+short_side] = img[...] img = background.transpose((1, 0, 2)) if img_w < img_h else img return Image.fromarray(img, "RGB") # 重新命名 def rename_dir(): star_list = os.listdir(args.database_source_path) label1_txt = open(f"./{args.database_source_path}/label.txt", "w", encoding= "utf-8") label2_txt = open(f"./{args.database_dst_path}/label.txt", "w", encoding= "utf-8") counter = 0 for name in star_list: print(name) if name.endswith("txt"): continue strs = f"{counter} {name}\n" os.rename(os.path.join(args.database_source_path,name),os.path.join(args.database_source_path,str(counter))) img_list = os.listdir(os.path.join(args.database_source_path, str(counter))) img_counter = 0 for img_name in img_list: file_end = img_name[img_name.index("."):] old_name = os.path.join(args.database_source_path, str(counter),img_name) new_name = os.path.join(args.database_source_path, str(counter),str(img_counter)+file_end) img = Image.open(old_name) square_img = trans_square(img) square_img = square_img.resize((800,800)) square_img.save(new_name) try: os.remove(old_name) except PermissionError as e: print(e) img_counter += 1 label1_txt.write(strs) label2_txt.write(strs) counter += 1 label1_txt.close() label2_txt.close() # 人脸对齐 def align_face(left_eye, right_eye): """ align faces according to eyes position :param image_array: numpy array of a single image :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values :return: rotated_img: numpy array of aligned image eye_center: tuple of coordinates for eye center angle: degrees of rotation """ # get list landmarks of left and right eye left_eye = np.array([left_eye]) right_eye = np.array([right_eye]) # calculate the mean point of landmarks of left and right eye left_eye_center = np.mean(left_eye, axis=0).astype("int") right_eye_center = np.mean(right_eye, axis=0).astype("int") # compute the angle between the eye centroids dy = right_eye_center[1] - left_eye_center[1] dx = right_eye_center[0] - left_eye_center[0] # compute angle between the line of 2 centeroids and the horizontal line angle = math.atan2(dy, dx) * 180. / math.pi return angle # 人脸裁剪 def crop_face(): stars_name_list = os.listdir(args.database_source_path) for i, dir_name in enumerate(stars_name_list): if dir_name.endswith("txt"): continue if not os.path.exists(os.path.join(args.database_dst_path, dir_name)): os.makedirs(os.path.join(args.database_dst_path, dir_name)) img_list = os.listdir(os.path.join(args.database_source_path, dir_name)) for j, img_name in enumerate(img_list): img_path = os.path.join(args.database_source_path, dir_name, img_name) print(f"正在处理: {img_path}") dst_img_path = os.path.join(args.database_dst_path, dir_name, img_name) boxes_c, landmarks = infer_image(img_path) if boxes_c is not None: # 过滤掉没有人脸的照片 if boxes_c.shape[0] > 1: continue # 过滤掉合照 bbox = boxes_c[0, :4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] w = corpbbox[2] - corpbbox[0] h = corpbbox[3] - corpbbox[1] if 100 < w < 400 or 100 < h < 400: img = Image.open(img_path) crop_img = img.crop(corpbbox) # 对裁剪出的人脸进行人脸对齐 # left_eye = [landmarks[0][0] - corpbbox[0], landmarks[0][1] - corpbbox[1]] # right_eye = [landmarks[0][2] - corpbbox[0], landmarks[0][3] - corpbbox[1]] left_eye = [landmarks[0][0], landmarks[0][1]] right_eye = [landmarks[0][2], landmarks[0][3]] angle = align_face(left_eye, right_eye) rota_img = crop_img.rotate(angle) new_img = trans_square(rota_img) new_img = new_img.resize((400,400)) new_img.save(dst_img_path) if __name__ == "__main__": # (1)重命名文件夹 # rename_dir() # (2)切割图片,并保存到相应的文件夹中 crop_face() 构建数据集 from torch.utils.data import Dataset import os from PIL import Image from torchvision.transforms import ToTensor import torch Base_path = "./dataset/stars_faces" class Face_dataset(Dataset): def __init__(self) -> None: super().__init__() self.dataset = [] self.target = [] dir_list = os.listdir(Base_path) for dir_name in dir_list: if dir_name.endswith("txt"): continue img_list = os.listdir(os.path.join(Base_path, dir_name)) for img_name in img_list: img_path = os.path.join(Base_path, dir_name,img_name) self.dataset.append(img_path) self.target.append(int(dir_name)) def __len__(self): return len(self.dataset) def __getitem__(self, index): img_path = self.dataset[index] target = self.target[index] img = Image.open(img_path) img = ToTensor()(img) return img, torch.tensor(target) if __name__ == "__main__": dasetset = Face_dataset() img, target = dasetset[0] print(len(dasetset)) print(img.shape) print(target) (2)训练神经网络 调整MTCNN使得MTCNN的识别速度增加,例如可以调整图像金字塔的缩放比例,调整缩小图像的最小尺寸等 收集人脸监测数据,训练人脸检测模型。这里使用densenet作为主干网络,输出层对接ArcfaceLoss,使用NLLLoss作为损失函数。ArcFace Loss能够使得网络的特征提取能力得到进一步的增强。 构建人脸注册库,需要收集人脸的正脸、侧脸、各种角度。可以是一个文件,也可以对接数据库。本文使用json文件格式对人脸特征进行存储。 人脸识别:运行MTCNN,截取人脸图像,做人脸对齐,将图像输入模型输出特征,进行特征对比,输出识别结果。 人脸特征提取网络,使用densenet作为主干网络,每一个人脸将会被编码成一个长度为512的向量。使用NLLLoss作为损失函数。 import torch import torchvision from torch import nn from torch.nn.functional import normalize device = "cuda" if torch.cuda.is_available else "cpu" """ Arc Face Loss,代替原来的输出层和softmax操作 """ class Arc(nn.Module): def __init__(self, feat_num, cls_num) -> None: super().__init__() self.w = nn.Parameter(torch.randn((feat_num, cls_num))) # [2, 10] def forward(self, x, m=1, s=10): x_norm = normalize(x, p=2, dim=1) # [N, 2] w_norm = normalize(self.w, p=2, dim=0) # [2, 10] cosa = torch.matmul(x_norm, w_norm) / s # 防止梯度爆炸 [N, 10] a = torch.arccos(cosa) # 0-90°,因为除以了十,所以达不到 0-90° [N, 10] top = torch.exp(s*torch.cos(a+m)) # [N, 10] # top = torch.exp(s*(torch.cos(a)-m)) # [N, 10] down = top + torch.sum(torch.exp(s*cosa), dim=1, keepdim=True) - torch.exp(s*cosa) arc_softmax = top/(down+1e-10) # 加起来不等于1 return arc_softmax from torchvision.models import densenet121 from torchvision.models import DenseNet121_Weights from dataset import Face_dataset from torch.utils.data import DataLoader from torch import optim, nn import torch from arcface_loss import Arc import os epochs = 200 batch_size = 2 device = "cuda" if torch.cuda.is_available() else "cpu" # device = "cpu" model = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1) arc = Arc(512, 32).to(device) optmizer = optim.Adam(model.parameters()) optmizerarc =optim.Adam(arc.parameters()) loss_fn = nn.NLLLoss(reduction="sum") train_dataloader = DataLoader(Face_dataset(), shuffle=True, batch_size=batch_size) model.classifier = nn.Sequential(nn.Linear(1024, 512)) if os.path.exists("./extract_face_feature/param/model.pt"): print("Loading parameters...") model.load_state_dict(torch.load("./extract_face_feature/param/model.pt")) model.to(device) model.train() def accuracy(out, target): out = out.detach().cpu() target = target.detach().cpu() total_num = len(target) y = torch.argmax(out, dim=1) correct_num = torch.sum(y == target) return correct_num / total_num print("TRAINING START!".center(80, "=")) for epoch in range(epochs): for i, (img, target) in enumerate(train_dataloader): img, target = img.to(device), target.to(device) y = model(img) out = arc(y) loss = loss_fn(out, target) optmizer.zero_grad() optmizerarc.zero_grad() loss.backward() optmizerarc.step() optmizer.step() if i % 300 == 0: acc = accuracy(out, target) print(f"{device} epoch: {epoch}, batch: {i}, loss: {loss.item()} acc: {acc}") torch.save(model.state_dict(), "./extract_face_feature/param/model.pt")

MTCNN代码可以参考这位大佬: 基于Pytorch实现的MTCNN模型,人脸检测,人脸关键点检测。 (gitee.com)

人脸注册代码:使用训练好的人脸特征提取器,将我们准备要是别的人的照片进行编码,并将编码结果放入到人脸注册库中,这里使用的是json文件作为人脸注册库,在实际生产中,可能使用的是数据库进行存储。 # 人脸注册库: from Pytorch_MTCNN_master.infer_path import infer_image, draw_face from torchvision.models import densenet121 from torchvision.models import DenseNet121_Weights from torchvision.transforms import ToTensor import torch from torch import nn, optim import os import numpy as np from PIL import Image import math import json from torchsummary import summary # device = "cuda" if torch.cuda.is_available() else "cpu" device = "cpu" model = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1) model.classifier = nn.Sequential(nn.Linear(1024, 512)) if os.path.exists("./extract_face_feature/param/model.pt"): print("Loading parameters...") model.load_state_dict(torch.load("./extract_face_feature/param/model.pt")) model.to(device) model.eval() # summary(model, (3, 400, 400)) # exit() # 将图片转成正方形 def trans_square(image): img = image.convert("RGB") img = np.array(img, dtype=np.uint8) img_h, img_w, img_c = img.shape if img_h != img_w: long_side = max(img_w, img_h) short_side = min(img_w, img_h) loc = abs(img_w - img_h) // 2 img = img.transpose((1, 0, 2)) if img_w < img_h else img background = np.zeros((long_side, long_side, img_c), dtype=np.uint8) background[loc:loc+short_side] = img[...] img = background.transpose((1, 0, 2)) if img_w < img_h else img return Image.fromarray(img, "RGB") # 人脸对齐 def align_face(left_eye, right_eye): """ align faces according to eyes position :param image_array: numpy array of a single image :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values :return: rotated_img: numpy array of aligned image eye_center: tuple of coordinates for eye center angle: degrees of rotation """ # get list landmarks of left and right eye left_eye = np.array([left_eye]) right_eye = np.array([right_eye]) # calculate the mean point of landmarks of left and right eye left_eye_center = np.mean(left_eye, axis=0).astype("int") right_eye_center = np.mean(right_eye, axis=0).astype("int") # compute the angle between the eye centroids dy = right_eye_center[1] - left_eye_center[1] dx = right_eye_center[0] - left_eye_center[0] # compute angle between the line of 2 centeroids and the horizontal line angle = math.atan2(dy, dx) * 180. / math.pi return angle # 人脸裁剪 def crop_face(path, name): img_list = os.listdir(path) register_dict = {} if not os.path.getsize("./register.json"): f = open("./register.json", "w") # 文件为空 f.write(json.dumps({name:[]})) f.close() else: f = open("./register.json", "r") register_dict = json.load(f) f.close() # if name not in register_dict.keys(): register_dict[name] = [] for img_name in img_list: img_path = os.path.join(path,img_name) boxes_c, landmarks = infer_image(img_path) if boxes_c is not None: # 过滤掉没有人脸的照片 if boxes_c.shape[0] > 1: continue # 过滤掉合照 bbox = boxes_c[0, :4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] w = corpbbox[2] - corpbbox[0] h = corpbbox[3] - corpbbox[1] if 100 < w < 400 or 100 < h < 400: img = Image.open(img_path) crop_img = img.crop(corpbbox) # 对裁剪出的人脸进行人脸对齐 # left_eye = [landmarks[0][0] - corpbbox[0], landmarks[0][1] - corpbbox[1]] # right_eye = [landmarks[0][2] - corpbbox[0], landmarks[0][3] - corpbbox[1]] left_eye = [landmarks[0][0], landmarks[0][1]] right_eye = [landmarks[0][2], landmarks[0][3]] angle = align_face(left_eye, right_eye) rota_img = crop_img.rotate(angle) new_img = trans_square(rota_img) new_img = new_img.resize((400,400)) inputs = ToTensor()(new_img).unsqueeze(0).to(device) encode = model(inputs)[0].tolist() register_dict[name].append(encode) if device == "cuda": torch.cuda.empty_cache() torch.cuda.empty_cache() torch.cuda.empty_cache() torch.cuda.empty_cache() torch.cuda.empty_cache() f = open("./register.json", "w") f.seek(0) json.dump(register_dict,f) f.close() print("人脸注册完成!") if __name__ == "__main__": # (2)读取对应文件夹中的人脸照片 crop_face(path="./register_img/WLL", name="WLL") 人脸识别推理流程: (1)开启相机,获取每一帧图片,为了处理过程较为流畅,我们每隔五张进行人脸识别一次。 (2)使用MTCNN对人脸进行检测 (3)将检测出的人脸进行裁剪,并根据人眼的位置进行人脸对齐,并进行resize操作。 (4)将归一化的图片输入到特征提取器中,计算出一个长度为512的特征向量,然后读取出人脸注册库中的所有特征向量进行比对。 (5)利用两个向量的余弦相似度进行比较,选择出相似度最大的值,且相似度需超过95%。 # 实时人脸检测 import json import torch from torch import nn from torchvision.models import densenet121 from torchvision.models import DenseNet121_Weights from torchvision.transforms import ToTensor import os import numpy as np from PIL import Image import math from Pytorch_MTCNN_master.infer_camera import infer_image, draw_face import cv2 # device = "cuda" if torch.cuda.is_available() else "cpu" device = "cpu" model = densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1) model.classifier = nn.Sequential(nn.Linear(1024, 512)) if os.path.exists("./extract_face_feature/param/model.pt"): print("Loading parameters...") model.load_state_dict(torch.load("./extract_face_feature/param/model.pt")) model.to(device) model.eval() # 人脸注册库 f = open("./register.json", "r") face_dataset = json.load(f) # print(len(face_dataset["HMX"])) # exit() # 将图片转成正方形 def trans_square(image): img = image.convert("RGB") img = np.array(img, dtype=np.uint8) img_h, img_w, img_c = img.shape if img_h != img_w: long_side = max(img_w, img_h) short_side = min(img_w, img_h) loc = abs(img_w - img_h) // 2 img = img.transpose((1, 0, 2)) if img_w < img_h else img background = np.zeros((long_side, long_side, img_c), dtype=np.uint8) background[loc:loc+short_side] = img[...] img = background.transpose((1, 0, 2)) if img_w < img_h else img return Image.fromarray(img, "RGB") # 人脸对齐 def align_face(left_eye, right_eye): """ align faces according to eyes position :param image_array: numpy array of a single image :param landmarks: dict of landmarks for facial parts as keys and tuple of coordinates as values :return: rotated_img: numpy array of aligned image eye_center: tuple of coordinates for eye center angle: degrees of rotation """ # get list landmarks of left and right eye left_eye = np.array([left_eye]) right_eye = np.array([right_eye]) # calculate the mean point of landmarks of left and right eye left_eye_center = np.mean(left_eye, axis=0).astype("int") right_eye_center = np.mean(right_eye, axis=0).astype("int") # compute the angle between the eye centroids dy = right_eye_center[1] - left_eye_center[1] dx = right_eye_center[0] - left_eye_center[0] # compute angle between the line of 2 centeroids and the horizontal line angle = math.atan2(dy, dx) * 180. / math.pi return angle def cos_sim(a, b): a = np.array(a) b = np.array(b) a_norm = np.linalg.norm(a) b_norm = np.linalg.norm(b) cos = np.dot(a, b) / (a_norm * b_norm) return cos if __name__ == '__main__': cap = cv2.VideoCapture(0) counter = 0 while True: ret, img = cap.read() if ret: counter +=1 # 预测图片获取人脸的box和关键点 boxes_c, landmarks = infer_image(img) # 把关键画出来 if boxes_c is not None and counter%5==0: draw_face(img=img, boxes_c=boxes_c, landmarks=landmarks) if boxes_c is not None: # 过滤掉没有人脸的照片 if boxes_c.shape[0] > 1: continue # 过滤掉合照 bbox = boxes_c[0, :4] corpbbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])] w = corpbbox[2] - corpbbox[0] h = corpbbox[3] - corpbbox[1] if 100 < w < 400 or 100 < h < 400: img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) crop_img = img.crop(corpbbox) # 对裁剪出的人脸进行人脸对齐 # left_eye = [landmarks[0][0] - corpbbox[0], landmarks[0][1] - corpbbox[1]] # right_eye = [landmarks[0][2] - corpbbox[0], landmarks[0][3] - corpbbox[1]] left_eye = [landmarks[0][0], landmarks[0][1]] right_eye = [landmarks[0][2], landmarks[0][3]] angle = align_face(left_eye, right_eye) rota_img = crop_img.rotate(angle) new_img = trans_square(rota_img) new_img = new_img.resize((400,400)) inputs = ToTensor()(new_img).unsqueeze(0).to(device) encode = model(inputs)[0].tolist() pred_name = "" pred_score = 0.0 for key, value in face_dataset.items(): sim_list = [] for x in value: cos = cos_sim(x, encode) sim_list.append(cos) if pred_score < max(sim_list): pred_name = key pred_score = max(sim_list) if pred_score > 0.95: print(f"{counter} detect_face: {pred_name}, sim: {pred_score}") else: cv2.imshow('result', img) cv2.waitKey(1)

本文中所涉及的项目较为复杂,并没有讲述的非常清楚,但可参考其中的代码逻辑。



【本文地址】


今日新闻


推荐新闻


CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3