Recently, I have been busy practicing and researching insightface face face recognition model. Last time I said that updating the face detection of mobilenet-ssd was delayed. I'm sorry. Recently, I will also write out my own practice of running Insightface and upload previous and future projects to my personal Github repository. Here are the links:
VOC datasets contain 20 categories of objects and one background, namely aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, dining table, dog, horse, motorbike, person, potted plant, sheep, foa, train, tvmonitor. Sometimes we want to train with VOC datasets, but we don't need so many categories. No, the data processing tools and provided by caffe-ssd deal with all 20 categories by default. If we don't want to rewrite these data processing tools, we can start with the root cause, that is, directly modify the annotation information in the data set, and delete the redundant classification information.
In fact, the principle of training multi-category detection (N+1) and face detection (1+1) is exactly the same. The difference is that to generate the corresponding data sets, here we first paste the code from the widerface data set to VOC format data set, and modify the training data path:
# -*- coding: utf-8 -*- import os, cv2, sys, shutil from xml.dom.minidom import Document def writexml(filename, saveimg, bboxes, xmlpath): doc = Document() annotation = doc.createElement('annotation') doc.appendChild(annotation) folder = doc.createElement('folder') folder_name = doc.createTextNode('widerface') folder.appendChild(folder_name) annotation.appendChild(folder) filenamenode = doc.createElement('filename') filename_name = doc.createTextNode(filename) filenamenode.appendChild(filename_name) annotation.appendChild(filenamenode) source = doc.createElement('source') annotation.appendChild(source) database = doc.createElement('database') database.appendChild(doc.createTextNode('wider face Database')) source.appendChild(database) annotation_s = doc.createElement('annotation') annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007')) source.appendChild(annotation_s) image = doc.createElement('image') image.appendChild(doc.createTextNode('flickr')) source.appendChild(image) flickrid = doc.createElement('flickrid') flickrid.appendChild(doc.createTextNode('-1')) source.appendChild(flickrid) owner = doc.createElement('owner') annotation.appendChild(owner) flickrid_o = doc.createElement('flickrid') flickrid_o.appendChild(doc.createTextNode('yanyu')) owner.appendChild(flickrid_o) name_o = doc.createElement('name') name_o.appendChild(doc.createTextNode('yanyu')) owner.appendChild(name_o) size = doc.createElement('size') annotation.appendChild(size) width = doc.createElement('width') width.appendChild(doc.createTextNode(str(saveimg.shape[1]))) height = doc.createElement('height') height.appendChild(doc.createTextNode(str(saveimg.shape[0]))) depth = doc.createElement('depth') depth.appendChild(doc.createTextNode(str(saveimg.shape[2]))) size.appendChild(width) size.appendChild(height) size.appendChild(depth) segmented = doc.createElement('segmented') segmented.appendChild(doc.createTextNode('0')) annotation.appendChild(segmented) for i in range(len(bboxes)): bbox = bboxes[i] objects = doc.createElement('object') annotation.appendChild(objects) object_name = doc.createElement('name') object_name.appendChild(doc.createTextNode('face')) objects.appendChild(object_name) pose = doc.createElement('pose') pose.appendChild(doc.createTextNode('Unspecified')) objects.appendChild(pose) truncated = doc.createElement('truncated') truncated.appendChild(doc.createTextNode('1')) objects.appendChild(truncated) difficult = doc.createElement('difficult') difficult.appendChild(doc.createTextNode('0')) objects.appendChild(difficult) bndbox = doc.createElement('bndbox') objects.appendChild(bndbox) xmin = doc.createElement('xmin') xmin.appendChild(doc.createTextNode(str(bbox[0]))) bndbox.appendChild(xmin) ymin = doc.createElement('ymin') ymin.appendChild(doc.createTextNode(str(bbox[1]))) bndbox.appendChild(ymin) xmax = doc.createElement('xmax') xmax.appendChild(doc.createTextNode(str(bbox[0] + bbox[2]))) bndbox.appendChild(xmax) ymax = doc.createElement('ymax') ymax.appendChild(doc.createTextNode(str(bbox[1] + bbox[3]))) bndbox.appendChild(ymax) f = open(xmlpath, "w") f.write(doc.toprettyxml(indent='')) f.close() rootdir = "E:/public_data/wideface" def convertimgset(img_set): imgdir = rootdir + "/WIDER_" + img_set + "/images" gtfilepath = rootdir + "/wider_face_split/wider_face_" + img_set + "_bbx_gt.txt" fwrite = open(rootdir + "/ImageSets/Main/" + img_set + ".txt", 'w') index = 0 with open(gtfilepath, 'r') as gtfiles: while (True): # true filename = gtfiles.readline()[:-1] if filename == None or filename == "": break imgpath = imgdir + "/" + filename print(imgpath) img = cv2.imread(imgpath) if not print(imgpath) break numbbox = int(gtfiles.readline()) bboxes = [] print(numbbox) for i in range(numbbox): line = gtfiles.readline() lines = line.split(" ") lines = lines[0:4] bbox = (int(lines[0]), int(lines[1]), int(lines[2]), int(lines[3])) if int(lines[2]) < 40 or int(lines[3]) < 40: continue bboxes.append(bbox) # cv2.rectangle(img, (bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),color=(255,255,0),thickness=1) filename = filename.replace("/", "_") if len(bboxes) == 0: print("no face") continue # cv2.imshow("img", img) # cv2.waitKey(0) cv2.imwrite("{}/JPEGImages/{}".format(rootdir, filename), img) fwrite.write(filename.split(".")[0] + "\n") xmlpath = "{}/Annotations/{}.xml".format(rootdir, filename.split(".")[0]) writexml(filename, img, bboxes, xmlpath) print("success number is ", index) index += 1 fwrite.close() if __name__ == "__main__": img_sets = ["train", "val"] for img_set in img_sets: convertimgset(img_set) shutil.move(rootdir + "/ImageSets/Main/" + "train.txt", rootdir + "/ImageSets/Main/" + "trainval.txt") shutil.move(rootdir + "/ImageSets/Main/" + "val.txt", rootdir + "/ImageSets/Main/" + "test.txt")
In this way, the wideface data set is generated into the data set format of the previous one we used to train VOC20 object detection. The later training process is almost the same. According to the inherent process, data soft connection is established, network generation and model training can be done.
The training model test can run, which is the test code of mobilenet-ssd. It can modify the path and test pictures of my model. Here I ran on 1080ti for about one night and tested the next day. The test results are good. Here, the model trained by mobilenet-ssd is small and very good. Deployment of the mobile end model.