Source code for meerqat.image.face_detection

"""Usage: face_detection.py <dataset> [--save=<root_path> <model_config> --image_key=<image_key> --disable_caching --batch_size=<n>]

Options:
--image_key=<image_key>                 Used to index the dataset item [default: image]
--save=<root_path>                      Root path to save the detected face(s).
                                        The face will actually be saved with the same file stem as the original image.
--disable_caching                       Disables Dataset caching (useless when using save_to_disk), see datasets.set_caching_enabled()
--batch_size=<n>                        Batch size for Dataset.map. The actual batches processed by the model are first grouped by image size. [default: 64]
"""

from docopt import docopt
import json
from pathlib import Path

import numpy as np

from datasets import load_from_disk, set_caching_enabled

from facenet_pytorch import MTCNN as facenet_MTCNN

from ..models.utils import device
from ..data.loading import COMMONS_PATH as IMAGE_PATH, load_image_batch
from ..data.wiki import VALID_ENCODING


[docs]class MTCNN(facenet_MTCNN): """Simply override forward to allow to return bounding boxes and landmarks"""
[docs] def forward(self, img, save_path=None, return_prob=False, return_box=False, return_landmarks=False): """Run MTCNN face detection on a PIL image or numpy array. This method performs both detection and extraction of faces. Arguments: img {PIL.Image, np.ndarray, or list} -- A PIL image, np.ndarray, torch.Tensor, or list. Keyword Arguments: save_path {str} -- An optional save path for the cropped image. Note that when self.post_process=True, although the returned tensor is post processed, the saved face image is not, so it is a true representation of the face in the input image. If `img` is a list of images, `save_path` should be a list of equal length. (default: {None}) return_prob {bool} -- Whether or not to return the detection probability. (default: {False}) return_box {bool} -- Whether or not to return the bounding box. (default: {False}) return_landmarks {bool} -- Whether or not to return the facial landmarks. (default: {False}) Returns: Union[torch.Tensor, tuple(torch.tensor, float)] -- If detected, cropped image of a face with dimensions 3 x image_size x image_size. Optionally, the probability that a face was detected, the bounding box coordinates and facial landmarks associated. If self.keep_all is True, n detected faces are returned in an n x 3 x image_size x image_size tensor with an optional list of detection probabilities. If `img` is a list of images, the item(s) returned have an extra dimension (batch) as the first dimension. Example: >>> mtcnn = MTCNN() >>> face_tensor, prob, box, landmarks = mtcnn(img, save_path='face.png', return_prob=True, return_box=True, return_landmarks=True) """ # Detect faces batch_boxes, batch_probs, batch_points = self.detect(img, landmarks=True) # Select faces if not self.keep_all: batch_boxes, batch_probs, batch_points = self.select_boxes( batch_boxes, batch_probs, batch_points, img, method=self.selection_method ) # Extract faces faces = self.extract(img, batch_boxes, save_path) outputs = (faces, ) if return_prob: outputs += (batch_probs, ) if return_box: outputs += (batch_boxes, ) if return_landmarks: outputs += (batch_points, ) # unpack the tuple if only the face was asked if len(outputs) == 1: return outputs[0] return outputs
[docs]def detect_face(file_names, model, save_root_path=None): images = load_image_batch(file_names) # group images by size to allow MTCNN batch-processing images_by_size = {} for i, (image, file_name) in enumerate(zip(images, file_names)): # trouble when loading the image if image is None: continue # if there are multiple faces, the actual save path will be # save_root_path/f'{file_name}-{face_index}.jpg' # https://github.com/timesler/facenet-pytorch/blob/54c869c51e0e3e12f7f92f551cdd2ecd164e2443/models/mtcnn.py#L488 # HACK: make save_path str because of facenet_pytorch/models/mtcnn.py/#L468 if save_root_path: save_path = str((save_root_path/file_name).with_suffix('.jpg')) else: save_path = None images_by_size.setdefault(image.size, dict(images=[], save_paths=[], indices=[])) images_by_size[image.size]['images'].append(image) images_by_size[image.size]['save_paths'].append(save_path) images_by_size[image.size]['indices'].append(i) prob_batch = [None for _ in range(len(file_names))] box_batch, landmarks_batch = prob_batch.copy(), prob_batch.copy() for size, batch in images_by_size.items(): # avoid exception when image size is smaller than model.min_face_size (keep the None default value) # see https://github.com/timesler/facenet-pytorch/issues/176 if min(size) < model.min_face_size: continue # extract the face and save them using facenet_pytorch if save_root_path is not None: faces, probs, boxes, landmarks = model(batch['images'], save_path=batch['save_paths'], return_prob=True, return_box=True, return_landmarks=True) # no need to extract the faces else: boxes, probs, landmarks = model.detect(batch['images'], landmarks=True) # Select faces if not model.keep_all: boxes, probs, landmarks = model.select_boxes( boxes, probs, landmarks, batch['images'], method=model.selection_method ) # save the faces at the right index for prob, box, landmark, i in zip(probs, boxes, landmarks, batch['indices']): # HACK: convert to list to fix https://github.com/PaulLerner/ViQuAE/issues/1 prob_batch[i] = prob.tolist() if isinstance(prob, np.ndarray) else prob box_batch[i] = box.tolist() if box is not None and isinstance(box, np.ndarray) else box landmarks_batch[i] = landmark.tolist() if landmark is not None and isinstance(landmark, np.ndarray) else landmark return prob_batch, box_batch, landmarks_batch
[docs]def dataset_detect_face(item, image_key='image', **kwargs): prob, box, landmarks = detect_face(item[image_key], **kwargs) item['face_prob'] = prob item['face_box'] = box item['face_landmarks'] = landmarks return item
[docs]def dataset_detect_faces(dataset, batch_size=64, **kwargs): dataset = dataset.map(dataset_detect_face, batched=True, fn_kwargs=kwargs, batch_size=batch_size) return dataset
if __name__ == '__main__': args = docopt(__doc__) dataset_path = args['<dataset>'] dataset = load_from_disk(dataset_path) model_config_path = args['<model_config>'] set_caching_enabled(not args['--disable_caching']) batch_size = int(args['--batch_size']) # default config model_config = dict( image_size=112, # for arcface post_process=False, # normalization: will happen in arcface select_largest=False, # select largest face VS select most probable face keep_all=True, # keep all faces device=device ) # load specified config if model_config_path is not None: with open(model_config_path, 'r') as file: model_config.update(json.load(file)) model = MTCNN(**model_config) image_key = args['--image_key'] save_root_path = args['--save'] if save_root_path is not None: save_root_path = Path(save_root_path) save_root_path.mkdir(exist_ok=True, parents=True) dataset = dataset_detect_faces(dataset, model=model, image_key=image_key, save_root_path=save_root_path, batch_size=batch_size) dataset.save_to_disk(dataset_path)