diff options
-rw-r--r-- | anime-face-detector/.gitignore | 110 | ||||
-rw-r--r-- | anime-face-detector/LICENSE | 21 | ||||
-rw-r--r-- | anime-face-detector/Makefile | 6 | ||||
-rw-r--r-- | anime-face-detector/README.md | 89 | ||||
-rw-r--r-- | anime-face-detector/asset/sample1.png | bin | 0 -> 1594270 bytes | |||
-rw-r--r-- | anime-face-detector/asset/sample2.png | bin | 0 -> 1156897 bytes | |||
-rw-r--r-- | anime-face-detector/asset/sample3.png | bin | 0 -> 1737489 bytes | |||
-rw-r--r-- | anime-face-detector/faster_rcnn_wrapper.py | 204 | ||||
-rw-r--r-- | anime-face-detector/main.py | 170 | ||||
-rw-r--r-- | anime-face-detector/make.bat | 20 | ||||
-rw-r--r-- | anime-face-detector/model/.gitignore | 5 | ||||
-rw-r--r-- | anime-face-detector/nms/.gitignore | 2 | ||||
-rw-r--r-- | anime-face-detector/nms/__init__.py | 0 | ||||
-rw-r--r-- | anime-face-detector/nms/cpu_nms.pyx | 68 | ||||
-rw-r--r-- | anime-face-detector/nms/gpu_nms.hpp | 2 | ||||
-rw-r--r-- | anime-face-detector/nms/gpu_nms.pyx | 31 | ||||
-rw-r--r-- | anime-face-detector/nms/nms_kernel.cu | 144 | ||||
-rw-r--r-- | anime-face-detector/nms/py_cpu_nms.py | 38 | ||||
-rw-r--r-- | anime-face-detector/nms_wrapper.py | 29 | ||||
-rw-r--r-- | anime-face-detector/setup.py | 42 |
20 files changed, 981 insertions, 0 deletions
diff --git a/anime-face-detector/.gitignore b/anime-face-detector/.gitignore new file mode 100644 index 0000000..ff81ae6 --- /dev/null +++ b/anime-face-detector/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# idea pycharm data +.idea/ + +# cython build result +build/ diff --git a/anime-face-detector/LICENSE b/anime-face-detector/LICENSE new file mode 100644 index 0000000..3010384 --- /dev/null +++ b/anime-face-detector/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Zhou Xuebin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/anime-face-detector/Makefile b/anime-face-detector/Makefile new file mode 100644 index 0000000..1e9e686 --- /dev/null +++ b/anime-face-detector/Makefile @@ -0,0 +1,6 @@ +all: + python setup.py build_ext --inplace + rm -rf build +clean: + rm -rf */*.pyc + rm -rf */*.so diff --git a/anime-face-detector/README.md b/anime-face-detector/README.md new file mode 100644 index 0000000..72a242e --- /dev/null +++ b/anime-face-detector/README.md @@ -0,0 +1,89 @@ +# Anime-Face-Detector +A Faster-RCNN based anime face detector. + +This detector is trained on 6000 training samples and 641 testing samples, randomly selected from the dataset which is crawled from top 100 [pixiv daily ranking](https://www.pixiv.net/ranking.php?mode=daily). + +Thanks to [OpenCV based Anime face detector](https://github.com/nagadomi/lbpcascade_animeface) written by nagadomi, which helps labelling the data. + +The original implementation of Faster-RCNN using Tensorflow can be found [here](https://github.com/endernewton/tf-faster-rcnn) + +## Dependencies +- Python 3.6 or 3.7 +- `tensorflow` < 2.0 +- `opencv-python` +- `cython` (optional, can be ignored with additional `-nms-type PY_NMS` argument) +- Pre-trained ResNet101 model + +## Usage +1. Clone this repository + ```bash + git clone https://github.com/qhgz2013/anime-face-detector.git + ``` +2. Download the pre-trained model + Google Drive: [here](https://drive.google.com/open?id=1WjBgfOUqp4sdRd9BHs4TkdH2EcBtV5ri) + Baidu Netdisk: [here](https://pan.baidu.com/s/1bvpCp1sbD7t9qnta8IhpmA) +3. Unzip the model file into `model` directory +4. Build the CPU NMS model (skip this step if use PY_NMS with argument: `-nms-type PY_NMS`) + ```bash + make clean + make + ``` + If using Windows Power Shell, type `cmd /C make.bat` to run build script. +5. Run the demo as you want + - Visualize the result (without output path): + ```bash + python main.py -i /path/to/image.jpg + ``` + - Save results to a json file + ```bash + python main.py -i /path/to/image.jpg -o /path/to/output.json + ``` + Format: `{"image_path": [{"score": predicted_probability, "bbox": [min_x, min_y, max_x, max_y]}, ...], ...}` + Sample output file: + ```json + {"/path/to/image.jpg": [{"score": 0.9999708, "bbox": [551.3375, 314.50253, 729.2599, 485.25674]}]} + ``` + - Detecting a whole directory with recursion + ```bash + python main.py -i /path/to/dir -o /path/to/output.json + ``` + - Customize threshold + ```bash + python main.py -i /path/to/image.jpg -nms 0.3 -conf 0.8 + ``` + - Customize model path + ```bash + python main.py -i /path/to/image.jpg -model /path/to/model.ckpt + ``` + - Customize nms type (supports CPU_NMS and PY_NMS, not supports GPU_NMS because of the complicated build process for Windows platform) + ```bash + python main.py -i /path/to/image.jpg -nms-type PY_NMS + ``` + +## Results +**Mean AP for this model: 0.9086** + + +Copyright info: [東方まとめ](https://www.pixiv.net/member_illust.php?mode=medium&illust_id=54275439) by [羽々斬](https://www.pixiv.net/member.php?id=2179695) + + +Copyright info: [【C94】桜と刀](https://www.pixiv.net/member_illust.php?mode=medium&illust_id=69797346) by [幻像黒兎](https://www.pixiv.net/member.php?id=4462245) + + +Copyright info: [アイドルマスター シンデレラガールズ](https://www.pixiv.net/member_illust.php?mode=medium&illust_id=69753772) by [我美蘭@1日目 東A-40a](https://www.pixiv.net/member.php?id=2003931) + +## About training + +This model is directly trained by [Faster-RCNN](https://github.com/endernewton/tf-faster-rcnn), with following argument: +```bash +python tools/trainval_net.py --weight data/imagenet_weights/res101.ckpt --imdb voc_2007_trainval --imdbval voc_2007_test --iters 60000 --cfg experiments/cfgs/res101.yml --net res101 --set ANCHOR_SCALES "[4,8,16,32]" ANCHOR_RATIOS "[1]" TRAIN.STEPSIZE "[50000]" +``` + +## Dataset + +We've uploaded the dataset to Google drive [here](https://drive.google.com/open?id=1nDPimhiwbAWc2diok-6davhubNVe82pr), dataset structure is similar to VOC2007 (used in original Faster-RCNN implementation). + +## Citation and declaration + +Feel free to cite this repo and dataset. +This work is not related to my research team and lab, just my personal interest. diff --git a/anime-face-detector/asset/sample1.png b/anime-face-detector/asset/sample1.png Binary files differnew file mode 100644 index 0000000..857ee97 --- /dev/null +++ b/anime-face-detector/asset/sample1.png diff --git a/anime-face-detector/asset/sample2.png b/anime-face-detector/asset/sample2.png Binary files differnew file mode 100644 index 0000000..eda9ca0 --- /dev/null +++ b/anime-face-detector/asset/sample2.png diff --git a/anime-face-detector/asset/sample3.png b/anime-face-detector/asset/sample3.png Binary files differnew file mode 100644 index 0000000..583542d --- /dev/null +++ b/anime-face-detector/asset/sample3.png diff --git a/anime-face-detector/faster_rcnn_wrapper.py b/anime-face-detector/faster_rcnn_wrapper.py new file mode 100644 index 0000000..3ea09ff --- /dev/null +++ b/anime-face-detector/faster_rcnn_wrapper.py @@ -0,0 +1,204 @@ +import tensorflow as tf +from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block, resnet_v1 +import tensorflow.contrib.slim as slim +from tensorflow.contrib.slim.python.slim.nets.resnet_utils import arg_scope, conv2d_same +import numpy as np + + +class FasterRCNNSlim: + + def __init__(self): + self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), + resnet_v1_block('block3', base_depth=256, num_units=23, stride=1), + resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)] + self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3]) + self._im_info = tf.placeholder(tf.float32, shape=[3]) + + self._anchor_scales = [4, 8, 16, 32] + self._num_scales = len(self._anchor_scales) + + self._anchor_ratios = [1] + self._num_ratios = len(self._anchor_ratios) + + self._num_anchors = self._num_scales * self._num_ratios + self._scope = 'resnet_v1_101' + + with arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, + slim.fully_connected], + weights_regularizer=tf.contrib.layers.l2_regularizer(0.0001), + biases_regularizer=tf.no_regularizer, + biases_initializer=tf.constant_initializer(0.0)): + # in _build_network + initializer = tf.random_normal_initializer(stddev=0.01) + initializer_bbox = tf.random_normal_initializer(stddev=0.001) + # in _image_to_head + with slim.arg_scope(self._resnet_arg_scope()): + # in _build_base + with tf.variable_scope(self._scope, self._scope): + net_conv = conv2d_same(self._image, 64, 7, stride=2, scope='conv1') + net_conv = tf.pad(net_conv, [[0, 0], [1, 1], [1, 1], [0, 0]]) + net_conv = slim.max_pool2d(net_conv, [3, 3], stride=2, padding='VALID', scope='pool1') + net_conv, _ = resnet_v1(net_conv, self._blocks[:-1], global_pool=False, include_root_block=False, + scope=self._scope) + with tf.variable_scope(self._scope, self._scope): + # in _anchor_component + with tf.variable_scope('ANCHOR-default'): + height = tf.to_int32(tf.ceil(self._im_info[0] / 16.0)) + width = tf.to_int32(tf.ceil(self._im_info[1] / 16.0)) + + shift_x = tf.range(width) * 16 + shift_y = tf.range(height) * 16 + shift_x, shift_y = tf.meshgrid(shift_x, shift_y) + sx = tf.reshape(shift_x, [-1]) + sy = tf.reshape(shift_y, [-1]) + shifts = tf.transpose(tf.stack([sx, sy, sx, sy])) + k = width * height + shifts = tf.transpose(tf.reshape(shifts, [1, k, 4]), perm=[1, 0, 2]) + + anchors = np.array([[-24, -24, 39, 39], [-56, -56, 71, 71], + [-120, -120, 135, 135], [-248, -248, 263, 263]], dtype=np.int32) + + a = anchors.shape[0] + anchor_constant = tf.constant(anchors.reshape([1, a, 4]), dtype=tf.int32) + length = k * a + anchors_tf = tf.reshape(anchor_constant + shifts, shape=[length, 4]) + anchors = tf.cast(anchors_tf, dtype=tf.float32) + self._anchors = anchors + self._anchor_length = length + + # in _region_proposal + rpn = slim.conv2d(net_conv, 512, [3, 3], trainable=False, weights_initializer=initializer, + scope='rpn_conv/3x3') + rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=False, + weights_initializer=initializer, padding='VALID', activation_fn=None, + scope='rpn_cls_score') + rpn_cls_score_reshape = self._reshape(rpn_cls_score, 2, 'rpn_cls_score_reshape') + rpn_cls_prob_reshape = self._softmax(rpn_cls_score_reshape, 'rpn_cls_prob_reshape') + # rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name='rpn_cls_pred') + rpn_cls_prob = self._reshape(rpn_cls_prob_reshape, self._num_anchors * 2, 'rpn_cls_prob') + rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=False, + weights_initializer=initializer, padding='VALID', activation_fn=None, + scope='rpn_bbox_pred') + + # in _proposal_layer + with tf.variable_scope('rois'): + post_nms_topn = 300 + nms_thresh = 0.7 + scores = rpn_cls_prob[:, :, :, self._num_anchors:] + scores = tf.reshape(scores, [-1]) + rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) + + boxes = tf.cast(self._anchors, rpn_bbox_pred.dtype) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + widths * 0.5 + ctr_y = boxes[:, 1] + heights * 0.5 + + dx = rpn_bbox_pred[:, 0] + dy = rpn_bbox_pred[:, 1] + dw = rpn_bbox_pred[:, 2] + dh = rpn_bbox_pred[:, 3] + + pred_ctr_x = dx * widths + ctr_x + pred_ctr_y = dy * heights + ctr_y + pred_w = tf.exp(dw) * widths + pred_h = tf.exp(dh) * heights + + pred_boxes0 = pred_ctr_x - pred_w * 0.5 + pred_boxes1 = pred_ctr_y - pred_h * 0.5 + pred_boxes2 = pred_ctr_x + pred_w * 0.5 + pred_boxes3 = pred_ctr_y + pred_h * 0.5 + + b0 = tf.clip_by_value(pred_boxes0, 0, self._im_info[1] - 1) + b1 = tf.clip_by_value(pred_boxes1, 0, self._im_info[0] - 1) + b2 = tf.clip_by_value(pred_boxes2, 0, self._im_info[1] - 1) + b3 = tf.clip_by_value(pred_boxes3, 0, self._im_info[0] - 1) + + proposals = tf.stack([b0, b1, b2, b3], axis=1) + indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topn, + iou_threshold=nms_thresh) + boxes = tf.to_float(tf.gather(proposals, indices)) + # rpn_scores = tf.reshape(tf.gather(scores, indices), [-1, 1]) + + batch_inds = tf.zeros([tf.shape(indices)[0], 1], dtype=tf.float32) + rois = tf.concat([batch_inds, boxes], 1) + + # in _crop_pool_layer + with tf.variable_scope('pool5'): + batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name='bath_id'), [1]) + bottom_shape = tf.shape(net_conv) + height = (tf.to_float(bottom_shape[1]) - 1) * 16.0 + width = (tf.to_float(bottom_shape[2]) - 1) * 16.0 + x1 = tf.slice(rois, [0, 1], [-1, 1], name='x1') / width + y1 = tf.slice(rois, [0, 2], [-1, 1], name='y1') / height + x2 = tf.slice(rois, [0, 3], [-1, 1], name='x2') / width + y2 = tf.slice(rois, [0, 4], [-1, 1], name='y2') / height + bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1)) + pool5 = tf.image.crop_and_resize(net_conv, bboxes, tf.to_int32(batch_ids), [7, 7], name='crops') + # in _head_to_tail + with slim.arg_scope(self._resnet_arg_scope()): + fc7, _ = resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False, + scope=self._scope) + fc7 = tf.reduce_mean(fc7, axis=[1, 2]) + with tf.variable_scope(self._scope, self._scope): + # in _region_classification + cls_score = slim.fully_connected(fc7, 2, weights_initializer=initializer, trainable=False, + activation_fn=None, scope='cls_score') + cls_prob = self._softmax(cls_score, 'cls_prob') + # cls_pred = tf.argmax(cls_score, 'cls_pred') + bbox_pred = slim.fully_connected(fc7, 2*4, weights_initializer=initializer_bbox, trainable=False, + activation_fn=None, scope='bbox_pred') + self._cls_score = cls_score + self._cls_prob = cls_prob + self._bbox_pred = bbox_pred + self._rois = rois + + stds = np.tile(np.array([0.1, 0.1, 0.2, 0.2]), 2) + means = np.tile(np.array([0.0, 0.0, 0.0, 0.0]), 2) + self._bbox_pred *= stds + self._bbox_pred += means + + @staticmethod + def _resnet_arg_scope(): + batch_norm_params = { + 'is_training': False, + 'decay': 0.997, + 'epsilon': 1e-5, + 'scale': True, + 'trainable': False, + 'updates_collections': tf.GraphKeys.UPDATE_OPS + } + with arg_scope([slim.conv2d], + weights_regularizer=slim.l2_regularizer(0.0001), + weights_initializer=slim.variance_scaling_initializer(), + trainable=False, + activation_fn=tf.nn.relu, + normalizer_fn=slim.batch_norm, + normalizer_params=batch_norm_params): + with arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc: + return arg_sc + + @staticmethod + def _reshape(bottom, num_dim, name): + input_shape = tf.shape(bottom) + with tf.variable_scope(name): + to_caffe = tf.transpose(bottom, [0, 3, 1, 2]) + reshaped = tf.reshape(to_caffe, [1, num_dim, -1, input_shape[2]]) + to_tf = tf.transpose(reshaped, [0, 2, 3, 1]) + return to_tf + + @staticmethod + def _softmax(bottom, name): + if name.startswith('rpn_cls_prob_reshape'): + input_shape = tf.shape(bottom) + bottom_reshaped = tf.reshape(bottom, [-1, input_shape[-1]]) + reshaped_score = tf.nn.softmax(bottom_reshaped, name=name) + return tf.reshape(reshaped_score, input_shape) + return tf.nn.softmax(bottom, name=name) + + def test_image(self, sess, image, im_info): + return sess.run([self._cls_score, self._cls_prob, self._bbox_pred, self._rois], feed_dict={ + self._image: image, + self._im_info: im_info + }) diff --git a/anime-face-detector/main.py b/anime-face-detector/main.py new file mode 100644 index 0000000..11f7e4d --- /dev/null +++ b/anime-face-detector/main.py @@ -0,0 +1,170 @@ +import numpy as np +import cv2 +from faster_rcnn_wrapper import FasterRCNNSlim +import tensorflow as tf +import argparse +import os +import json +import time +from nms_wrapper import NMSType, NMSWrapper + + +def detect(sess, rcnn_cls, image): + # pre-processing image for Faster-RCNN + img_origin = image.astype(np.float32, copy=True) + img_origin -= np.array([[[102.9801, 115.9465, 112.7717]]]) + + img_shape = img_origin.shape + img_size_min = np.min(img_shape[:2]) + img_size_max = np.max(img_shape[:2]) + + img_scale = 600 / img_size_min + if np.round(img_scale * img_size_max) > 1000: + img_scale = 1000 / img_size_max + img = cv2.resize(img_origin, None, None, img_scale, img_scale, cv2.INTER_LINEAR) + img_info = np.array([img.shape[0], img.shape[1], img_scale], dtype=np.float32) + img = np.expand_dims(img, 0) + + # test image + _, scores, bbox_pred, rois = rcnn_cls.test_image(sess, img, img_info) + + # bbox transform + boxes = rois[:, 1:] / img_scale + + boxes = boxes.astype(bbox_pred.dtype, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1 + heights = boxes[:, 3] - boxes[:, 1] + 1 + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + dx = bbox_pred[:, 0::4] + dy = bbox_pred[:, 1::4] + dw = bbox_pred[:, 2::4] + dh = bbox_pred[:, 3::4] + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + pred_boxes = np.zeros_like(bbox_pred, dtype=bbox_pred.dtype) + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h + # clipping edge + pred_boxes[:, 0::4] = np.maximum(pred_boxes[:, 0::4], 0) + pred_boxes[:, 1::4] = np.maximum(pred_boxes[:, 1::4], 0) + pred_boxes[:, 2::4] = np.minimum(pred_boxes[:, 2::4], img_shape[1] - 1) + pred_boxes[:, 3::4] = np.minimum(pred_boxes[:, 3::4], img_shape[0] - 1) + return scores, pred_boxes + + +def load_file_from_dir(dir_path): + ret = [] + for file in os.listdir(dir_path): + path_comb = os.path.join(dir_path, file) + if os.path.isdir(path_comb): + ret += load_file_from_dir(path_comb) + else: + ret.append(path_comb) + return ret + + +def fmt_time(dtime): + if dtime <= 0: + return '0:00.000' + elif dtime < 60: + return '0:%02d.%03d' % (int(dtime), int(dtime * 1000) % 1000) + elif dtime < 3600: + return '%d:%02d.%03d' % (int(dtime / 60), int(dtime) % 60, int(dtime * 1000) % 1000) + else: + return '%d:%02d:%02d.%03d' % (int(dtime / 3600), int((dtime % 3600) / 60), int(dtime) % 60, + int(dtime * 1000) % 1000) + + +def main(): + parser = argparse.ArgumentParser(description='Anime face detector demo') + parser.add_argument('-i', help='The input path of an image or directory', required=True, dest='input', type=str) + parser.add_argument('-o', help='The output json path of the detection result', dest='output') + parser.add_argument('-nms', help='Change the threshold for non maximum suppression', + dest='nms_thresh', default=0.3, type=float) + parser.add_argument('-conf', help='Change the threshold for class regression', dest='conf_thresh', + default=0.8, type=float) + parser.add_argument('-model', help='Specify a new path for model', dest='model', type=str, + default='model/res101_faster_rcnn_iter_60000.ckpt') + parser.add_argument('-nms-type', help='Type of nms', choices=['PY_NMS', 'CPU_NMS', 'GPU_NMS'], dest='nms_type', + default='CPU_NMS') + + args = parser.parse_args() + + assert os.path.exists(args.input), 'The input path does not exists' + + if os.path.isdir(args.input): + files = load_file_from_dir(args.input) + else: + files = [args.input] + file_len = len(files) + + if args.nms_type == 'PY_NMS': + nms_type = NMSType.PY_NMS + elif args.nms_type == 'CPU_NMS': + nms_type = NMSType.CPU_NMS + elif args.nms_type == 'GPU_NMS': + nms_type = NMSType.GPU_NMS + else: + raise ValueError('Incorrect NMS Type, not supported yet') + + nms = NMSWrapper(nms_type) + + cfg = tf.ConfigProto() + cfg.gpu_options.allow_growth = True + sess = tf.Session(config=cfg) + + net = FasterRCNNSlim() + saver = tf.train.Saver() + + saver.restore(sess, args.model) + + result = {} + + time_start = time.time() + + for idx, file in enumerate(files): + elapsed = time.time() - time_start + eta = (file_len - idx) * elapsed / idx if idx > 0 else 0 + print('[%d/%d] Elapsed: %s, ETA: %s >> %s' % (idx+1, file_len, fmt_time(elapsed), fmt_time(eta), file)) + img = cv2.imread(file) + scores, boxes = detect(sess, net, img) + boxes = boxes[:, 4:8] + scores = scores[:, 1] + keep = nms(np.hstack([boxes, scores[:, np.newaxis]]).astype(np.float32), args.nms_thresh) + boxes = boxes[keep, :] + scores = scores[keep] + inds = np.where(scores >= args.conf_thresh)[0] + scores = scores[inds] + boxes = boxes[inds, :] + + result[file] = [] + for i in range(scores.shape[0]): + x1, y1, x2, y2 = boxes[i, :].tolist() + new_result = {'score': float(scores[i]), + 'bbox': [x1, y1, x2, y2]} + result[file].append(new_result) + + if args.output is None: + cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2) + if args.output: + if ((idx+1) % 1000) == 0: + # saving the temporary result + with open(args.output, 'w') as f: + json.dump(result, f) + else: + cv2.imshow(file, img) + + if args.output: + with open(args.output, 'w') as f: + json.dump(result, f) + else: + cv2.waitKey() + + +if __name__ == '__main__': + main() diff --git a/anime-face-detector/make.bat b/anime-face-detector/make.bat new file mode 100644 index 0000000..b0d9bff --- /dev/null +++ b/anime-face-detector/make.bat @@ -0,0 +1,20 @@ +@echo off +if /i "%1" == "clean" goto clean +goto all + +:all +python setup.py build_ext --inplace +rd /s /q build + +goto exit + + + +:clean +del /f /s /q *.cpp +del /f /s /q *.c +del /f /s /q *.pyd + +goto exit + +:exit diff --git a/anime-face-detector/model/.gitignore b/anime-face-detector/model/.gitignore new file mode 100644 index 0000000..b1d31d3 --- /dev/null +++ b/anime-face-detector/model/.gitignore @@ -0,0 +1,5 @@ +# all pre-trained models +*.index +*.data-00000-of-00001 +*.meta +*.pkl diff --git a/anime-face-detector/nms/.gitignore b/anime-face-detector/nms/.gitignore new file mode 100644 index 0000000..40d7cb4 --- /dev/null +++ b/anime-face-detector/nms/.gitignore @@ -0,0 +1,2 @@ +*.c +*.cpp diff --git a/anime-face-detector/nms/__init__.py b/anime-face-detector/nms/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/anime-face-detector/nms/__init__.py diff --git a/anime-face-detector/nms/cpu_nms.pyx b/anime-face-detector/nms/cpu_nms.pyx new file mode 100644 index 0000000..71fbab1 --- /dev/null +++ b/anime-face-detector/nms/cpu_nms.pyx @@ -0,0 +1,68 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +cdef inline np.float32_t max(np.float32_t a, np.float32_t b): + return a if a >= b else b + +cdef inline np.float32_t min(np.float32_t a, np.float32_t b): + return a if a <= b else b + +def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): + cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] + cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] + cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] + cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] + cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] + + cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) + cdef np.ndarray[np.int64_t, ndim=1] order = scores.argsort()[::-1] + + cdef int ndets = dets.shape[0] + cdef np.ndarray[np.int_t, ndim=1] suppressed = \ + np.zeros((ndets), dtype=np.int) + + # nominal indices + cdef int _i, _j + # sorted indices + cdef int i, j + # temp variables for box i's (the box currently under consideration) + cdef np.float32_t ix1, iy1, ix2, iy2, iarea + # variables for computing overlap with box j (lower scoring box) + cdef np.float32_t xx1, yy1, xx2, yy2 + cdef np.float32_t w, h + cdef np.float32_t inter, ovr + + keep = [] + for _i in range(ndets): + i = order[_i] + if suppressed[i] == 1: + continue + keep.append(i) + ix1 = x1[i] + iy1 = y1[i] + ix2 = x2[i] + iy2 = y2[i] + iarea = areas[i] + for _j in range(_i + 1, ndets): + j = order[_j] + if suppressed[j] == 1: + continue + xx1 = max(ix1, x1[j]) + yy1 = max(iy1, y1[j]) + xx2 = min(ix2, x2[j]) + yy2 = min(iy2, y2[j]) + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (iarea + areas[j] - inter) + if ovr >= thresh: + suppressed[j] = 1 + + return keep diff --git a/anime-face-detector/nms/gpu_nms.hpp b/anime-face-detector/nms/gpu_nms.hpp new file mode 100644 index 0000000..68b6d42 --- /dev/null +++ b/anime-face-detector/nms/gpu_nms.hpp @@ -0,0 +1,2 @@ +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id); diff --git a/anime-face-detector/nms/gpu_nms.pyx b/anime-face-detector/nms/gpu_nms.pyx new file mode 100644 index 0000000..55878db --- /dev/null +++ b/anime-face-detector/nms/gpu_nms.pyx @@ -0,0 +1,31 @@ +# -------------------------------------------------------- +# Faster R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np +cimport numpy as np + +assert sizeof(int) == sizeof(np.int32_t) + +cdef extern from "gpu_nms.hpp": + void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) + +def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, + np.int32_t device_id=0): + cdef int boxes_num = dets.shape[0] + cdef int boxes_dim = dets.shape[1] + cdef int num_out + cdef np.ndarray[np.int32_t, ndim=1] \ + keep = np.zeros(boxes_num, dtype=np.int32) + cdef np.ndarray[np.float32_t, ndim=1] \ + scores = dets[:, 4] + cdef np.ndarray[np.int64_t, ndim=1] \ + order = scores.argsort()[::-1] + cdef np.ndarray[np.float32_t, ndim=2] \ + sorted_dets = dets[order, :] + _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) + keep = keep[:num_out] + return list(order[keep]) diff --git a/anime-face-detector/nms/nms_kernel.cu b/anime-face-detector/nms/nms_kernel.cu new file mode 100644 index 0000000..038a590 --- /dev/null +++ b/anime-face-detector/nms/nms_kernel.cu @@ -0,0 +1,144 @@ +// ------------------------------------------------------------------ +// Faster R-CNN +// Copyright (c) 2015 Microsoft +// Licensed under The MIT License [see fast-rcnn/LICENSE for details] +// Written by Shaoqing Ren +// ------------------------------------------------------------------ + +#include "gpu_nms.hpp" +#include <vector> +#include <iostream> + +#define CUDA_CHECK(condition) \ + /* Code block avoids redefinition of cudaError_t error */ \ + do { \ + cudaError_t error = condition; \ + if (error != cudaSuccess) { \ + std::cout << cudaGetErrorString(error) << std::endl; \ + } \ + } while (0) + +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) +int const threadsPerBlock = sizeof(unsigned long long) * 8; + +__device__ inline float devIoU(float const * const a, float const * const b) { + float left = max(a[0], b[0]), right = min(a[2], b[2]); + float top = max(a[1], b[1]), bottom = min(a[3], b[3]); + float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); + float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); + return interS / (Sa + Sb - interS); +} + +__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, + const float *dev_boxes, unsigned long long *dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = + min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 5]; + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; + const float *cur_box = dev_boxes + cur_box_idx * 5; + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + t |= 1ULL << i; + } + } + const int col_blocks = DIVUP(n_boxes, threadsPerBlock); + dev_mask[cur_box_idx * col_blocks + col_start] = t; + } +} + +void _set_device(int device_id) { + int current_device; + CUDA_CHECK(cudaGetDevice(¤t_device)); + if (current_device == device_id) { + return; + } + // The call to cudaSetDevice must come before any calls to Get, which + // may perform initialization using the GPU. + CUDA_CHECK(cudaSetDevice(device_id)); +} + +void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, + int boxes_dim, float nms_overlap_thresh, int device_id) { + _set_device(device_id); + + float* boxes_dev = NULL; + unsigned long long* mask_dev = NULL; + + const int col_blocks = DIVUP(boxes_num, threadsPerBlock); + + CUDA_CHECK(cudaMalloc(&boxes_dev, + boxes_num * boxes_dim * sizeof(float))); + CUDA_CHECK(cudaMemcpy(boxes_dev, + boxes_host, + boxes_num * boxes_dim * sizeof(float), + cudaMemcpyHostToDevice)); + + CUDA_CHECK(cudaMalloc(&mask_dev, + boxes_num * col_blocks * sizeof(unsigned long long))); + + dim3 blocks(DIVUP(boxes_num, threadsPerBlock), + DIVUP(boxes_num, threadsPerBlock)); + dim3 threads(threadsPerBlock); + nms_kernel<<<blocks, threads>>>(boxes_num, + nms_overlap_thresh, + boxes_dev, + mask_dev); + + std::vector<unsigned long long> mask_host(boxes_num * col_blocks); + CUDA_CHECK(cudaMemcpy(&mask_host[0], + mask_dev, + sizeof(unsigned long long) * boxes_num * col_blocks, + cudaMemcpyDeviceToHost)); + + std::vector<unsigned long long> remv(col_blocks); + memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); + + int num_to_keep = 0; + for (int i = 0; i < boxes_num; i++) { + int nblock = i / threadsPerBlock; + int inblock = i % threadsPerBlock; + + if (!(remv[nblock] & (1ULL << inblock))) { + keep_out[num_to_keep++] = i; + unsigned long long *p = &mask_host[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++) { + remv[j] |= p[j]; + } + } + } + *num_out = num_to_keep; + + CUDA_CHECK(cudaFree(boxes_dev)); + CUDA_CHECK(cudaFree(mask_dev)); +} diff --git a/anime-face-detector/nms/py_cpu_nms.py b/anime-face-detector/nms/py_cpu_nms.py new file mode 100644 index 0000000..54e7b25 --- /dev/null +++ b/anime-face-detector/nms/py_cpu_nms.py @@ -0,0 +1,38 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import numpy as np + +def py_cpu_nms(dets, thresh): + """Pure Python NMS baseline.""" + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/anime-face-detector/nms_wrapper.py b/anime-face-detector/nms_wrapper.py new file mode 100644 index 0000000..ca900e8 --- /dev/null +++ b/anime-face-detector/nms_wrapper.py @@ -0,0 +1,29 @@ +from enum import Enum + + +class NMSType(Enum): + PY_NMS = 1 + CPU_NMS = 2 + GPU_NMS = 3 + + +default_nms_type = NMSType.PY_NMS + + +class NMSWrapper: + def __init__(self, nms_type=default_nms_type): + assert type(nms_type) == NMSType + if nms_type == NMSType.PY_NMS: + from nms.py_cpu_nms import py_cpu_nms + self._nms = py_cpu_nms + elif nms_type == NMSType.CPU_NMS: + from nms.cpu_nms import cpu_nms + self._nms = cpu_nms + elif nms_type == NMSType.GPU_NMS: + from nms.gpu_nms import gpu_nms + self._nms = gpu_nms + else: + raise ValueError('current nms type is not implemented yet') + + def __call__(self, *args, **kwargs): + return self._nms(*args, **kwargs) diff --git a/anime-face-detector/setup.py b/anime-face-detector/setup.py new file mode 100644 index 0000000..dc634f5 --- /dev/null +++ b/anime-face-detector/setup.py @@ -0,0 +1,42 @@ +# -------------------------------------------------------- +# Fast R-CNN +# Copyright (c) 2015 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ross Girshick +# -------------------------------------------------------- + +import os +from os.path import join as pjoin +import numpy as np +from distutils.core import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext +import sys + + +# Obtain the numpy include directory. This logic works across numpy versions. +try: + numpy_include = np.get_include() +except AttributeError: + numpy_include = np.get_numpy_include() + +# run the customize_compiler +class custom_build_ext(build_ext): + def build_extensions(self): + build_ext.build_extensions(self) + +ext_modules = [ + Extension( + "nms.cpu_nms", + ["nms/cpu_nms.pyx"], + extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] if sys.platform == 'linux' else [], + include_dirs = [numpy_include] + ) +] + +setup( + name='tf_faster_rcnn', + ext_modules=ext_modules, + # inject our custom trigger + cmdclass={'build_ext': custom_build_ext}, +) |