20 files changed, 981 insertions, 0 deletions
diff --git a/anime-face-detector/.gitignore b/anime-face-detector/.gitignore
new file mode 100644
index 0000000..ff81ae6
--- /dev/null
+++ b/anime-face-detector/.gitignore
@@ -0,0 +1,110 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# idea pycharm data
+.idea/
+
+# cython build result
+build/
diff --git a/anime-face-detector/LICENSE b/anime-face-detector/LICENSE
new file mode 100644
index 0000000..3010384
--- /dev/null
+++ b/anime-face-detector/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Zhou Xuebin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/anime-face-detector/Makefile b/anime-face-detector/Makefile
new file mode 100644
index 0000000..1e9e686
--- /dev/null
+++ b/anime-face-detector/Makefile
@@ -0,0 +1,6 @@
+all:
+	python setup.py build_ext --inplace
+	rm -rf build
+clean:
+	rm -rf */*.pyc
+	rm -rf */*.so
diff --git a/anime-face-detector/README.md b/anime-face-detector/README.md
new file mode 100644
index 0000000..72a242e
--- /dev/null
+++ b/anime-face-detector/README.md
@@ -0,0 +1,89 @@
+# Anime-Face-Detector
+A Faster-RCNN based anime face detector.
+
+This detector is trained on 6000 training samples and 641 testing samples, randomly selected from the dataset which is crawled from top 100 [pixiv daily ranking](https://www.pixiv.net/ranking.php?mode=daily).  
+
+Thanks to [OpenCV based Anime face detector](https://github.com/nagadomi/lbpcascade_animeface) written by nagadomi, which helps labelling the data. 
+
+The original implementation of Faster-RCNN using Tensorflow can be found [here](https://github.com/endernewton/tf-faster-rcnn)
+
+## Dependencies
+- Python 3.6 or 3.7
+- `tensorflow` < 2.0
+- `opencv-python`
+- `cython` (optional, can be ignored with additional `-nms-type PY_NMS` argument)
+- Pre-trained ResNet101 model
+
+## Usage
+1. Clone this repository
+    ```bash
+    git clone https://github.com/qhgz2013/anime-face-detector.git
+    ```
+2. Download the pre-trained model  
+    Google Drive: [here](https://drive.google.com/open?id=1WjBgfOUqp4sdRd9BHs4TkdH2EcBtV5ri)    
+    Baidu Netdisk: [here](https://pan.baidu.com/s/1bvpCp1sbD7t9qnta8IhpmA)  
+3. Unzip the model file into `model` directory
+4. Build the CPU NMS model (skip this step if use PY_NMS with argument: `-nms-type PY_NMS`)
+    ```bash
+    make clean
+    make
+    ```
+   If using Windows Power Shell, type `cmd /C make.bat` to run build script.
+5. Run the demo as you want
+    - Visualize the result (without output path):
+        ```bash
+        python main.py -i /path/to/image.jpg
+        ```
+    - Save results to a json file
+        ```bash
+        python main.py -i /path/to/image.jpg -o /path/to/output.json
+        ```
+        Format: `{"image_path": [{"score": predicted_probability, "bbox": [min_x, min_y, max_x, max_y]}, ...], ...}`
+        Sample output file:
+        ```json
+        {"/path/to/image.jpg": [{"score": 0.9999708, "bbox": [551.3375, 314.50253, 729.2599, 485.25674]}]}
+        ```
+    - Detecting a whole directory with recursion
+        ```bash
+        python main.py -i /path/to/dir -o /path/to/output.json
+        ```
+    - Customize threshold
+        ```bash
+        python main.py -i /path/to/image.jpg -nms 0.3 -conf 0.8
+        ```
+    - Customize model path
+        ```bash
+        python main.py -i /path/to/image.jpg -model /path/to/model.ckpt
+        ```
+    - Customize nms type (supports CPU_NMS and PY_NMS, not supports GPU_NMS because of the complicated build process for Windows platform)
+        ```bash
+        python main.py -i /path/to/image.jpg -nms-type PY_NMS
+        ```
+
+## Results
+**Mean AP for this model: 0.9086**
+
+![](./asset/sample1.png)
+Copyright info: [東方まとめ](https://www.pixiv.net/member_illust.php?mode=medium&illust_id=54275439) by [羽々斬](https://www.pixiv.net/member.php?id=2179695)
+
+![](./asset/sample2.png)
+Copyright info: [【C94】桜と刀](https://www.pixiv.net/member_illust.php?mode=medium&illust_id=69797346) by [幻像黒兎](https://www.pixiv.net/member.php?id=4462245)
+
+![](./asset/sample3.png)
+Copyright info: [アイドルマスター　シンデレラガールズ](https://www.pixiv.net/member_illust.php?mode=medium&illust_id=69753772) by [我美蘭＠１日目 東A-40a](https://www.pixiv.net/member.php?id=2003931)
+
+## About training
+
+This model is directly trained by [Faster-RCNN](https://github.com/endernewton/tf-faster-rcnn), with following argument:
+```bash
+python tools/trainval_net.py --weight data/imagenet_weights/res101.ckpt --imdb voc_2007_trainval --imdbval voc_2007_test --iters 60000 --cfg experiments/cfgs/res101.yml --net res101 --set ANCHOR_SCALES "[4,8,16,32]" ANCHOR_RATIOS "[1]" TRAIN.STEPSIZE "[50000]"
+```
+
+## Dataset
+
+We've uploaded the dataset to Google drive [here](https://drive.google.com/open?id=1nDPimhiwbAWc2diok-6davhubNVe82pr), dataset structure is similar to VOC2007 (used in original Faster-RCNN implementation).
+
+## Citation and declaration
+
+Feel free to cite this repo and dataset.  
+This work is not related to my research team and lab, just my personal interest.
diff --git a/anime-face-detector/asset/sample1.png b/anime-face-detector/asset/sample1.png
new file mode 100644
index 0000000..857ee97
--- /dev/null
+++ b/anime-face-detector/asset/sample1.png
diff --git a/anime-face-detector/asset/sample2.png b/anime-face-detector/asset/sample2.png
new file mode 100644
index 0000000..eda9ca0
--- /dev/null
+++ b/anime-face-detector/asset/sample2.png
diff --git a/anime-face-detector/asset/sample3.png b/anime-face-detector/asset/sample3.png
new file mode 100644
index 0000000..583542d
--- /dev/null
+++ b/anime-face-detector/asset/sample3.png
diff --git a/anime-face-detector/faster_rcnn_wrapper.py b/anime-face-detector/faster_rcnn_wrapper.py
new file mode 100644
index 0000000..3ea09ff
--- /dev/null
+++ b/anime-face-detector/faster_rcnn_wrapper.py
@@ -0,0 +1,204 @@
+import tensorflow as tf
+from tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_block, resnet_v1
+import tensorflow.contrib.slim as slim
+from tensorflow.contrib.slim.python.slim.nets.resnet_utils import arg_scope, conv2d_same
+import numpy as np
+
+
+class FasterRCNNSlim:
+
+    def __init__(self):
+        self._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
+                        resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
+                        resnet_v1_block('block3', base_depth=256, num_units=23, stride=1),
+                        resnet_v1_block('block4', base_depth=512, num_units=3, stride=1)]
+        self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3])
+        self._im_info = tf.placeholder(tf.float32, shape=[3])
+
+        self._anchor_scales = [4, 8, 16, 32]
+        self._num_scales = len(self._anchor_scales)
+
+        self._anchor_ratios = [1]
+        self._num_ratios = len(self._anchor_ratios)
+
+        self._num_anchors = self._num_scales * self._num_ratios
+        self._scope = 'resnet_v1_101'
+
+        with arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d,
+                        slim.fully_connected],
+                       weights_regularizer=tf.contrib.layers.l2_regularizer(0.0001),
+                       biases_regularizer=tf.no_regularizer,
+                       biases_initializer=tf.constant_initializer(0.0)):
+            # in _build_network
+            initializer = tf.random_normal_initializer(stddev=0.01)
+            initializer_bbox = tf.random_normal_initializer(stddev=0.001)
+            # in _image_to_head
+            with slim.arg_scope(self._resnet_arg_scope()):
+                # in _build_base
+                with tf.variable_scope(self._scope, self._scope):
+                    net_conv = conv2d_same(self._image, 64, 7, stride=2, scope='conv1')
+                    net_conv = tf.pad(net_conv, [[0, 0], [1, 1], [1, 1], [0, 0]])
+                    net_conv = slim.max_pool2d(net_conv, [3, 3], stride=2, padding='VALID', scope='pool1')
+                net_conv, _ = resnet_v1(net_conv, self._blocks[:-1], global_pool=False, include_root_block=False,
+                                        scope=self._scope)
+            with tf.variable_scope(self._scope, self._scope):
+                # in _anchor_component
+                with tf.variable_scope('ANCHOR-default'):
+                    height = tf.to_int32(tf.ceil(self._im_info[0] / 16.0))
+                    width = tf.to_int32(tf.ceil(self._im_info[1] / 16.0))
+
+                    shift_x = tf.range(width) * 16
+                    shift_y = tf.range(height) * 16
+                    shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
+                    sx = tf.reshape(shift_x, [-1])
+                    sy = tf.reshape(shift_y, [-1])
+                    shifts = tf.transpose(tf.stack([sx, sy, sx, sy]))
+                    k = width * height
+                    shifts = tf.transpose(tf.reshape(shifts, [1, k, 4]), perm=[1, 0, 2])
+
+                    anchors = np.array([[-24, -24, 39, 39], [-56, -56, 71, 71],
+                                        [-120, -120, 135, 135], [-248, -248, 263, 263]], dtype=np.int32)
+
+                    a = anchors.shape[0]
+                    anchor_constant = tf.constant(anchors.reshape([1, a, 4]), dtype=tf.int32)
+                    length = k * a
+                    anchors_tf = tf.reshape(anchor_constant + shifts, shape=[length, 4])
+                    anchors = tf.cast(anchors_tf, dtype=tf.float32)
+                    self._anchors = anchors
+                    self._anchor_length = length
+
+                # in _region_proposal
+                rpn = slim.conv2d(net_conv, 512, [3, 3], trainable=False, weights_initializer=initializer,
+                                  scope='rpn_conv/3x3')
+                rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=False,
+                                            weights_initializer=initializer, padding='VALID', activation_fn=None,
+                                            scope='rpn_cls_score')
+                rpn_cls_score_reshape = self._reshape(rpn_cls_score, 2, 'rpn_cls_score_reshape')
+                rpn_cls_prob_reshape = self._softmax(rpn_cls_score_reshape, 'rpn_cls_prob_reshape')
+                # rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name='rpn_cls_pred')
+                rpn_cls_prob = self._reshape(rpn_cls_prob_reshape, self._num_anchors * 2, 'rpn_cls_prob')
+                rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=False,
+                                            weights_initializer=initializer, padding='VALID', activation_fn=None,
+                                            scope='rpn_bbox_pred')
+
+                # in _proposal_layer
+                with tf.variable_scope('rois'):
+                    post_nms_topn = 300
+                    nms_thresh = 0.7
+                    scores = rpn_cls_prob[:, :, :, self._num_anchors:]
+                    scores = tf.reshape(scores, [-1])
+                    rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])
+
+                    boxes = tf.cast(self._anchors, rpn_bbox_pred.dtype)
+                    widths = boxes[:, 2] - boxes[:, 0] + 1.0
+                    heights = boxes[:, 3] - boxes[:, 1] + 1.0
+                    ctr_x = boxes[:, 0] + widths * 0.5
+                    ctr_y = boxes[:, 1] + heights * 0.5
+
+                    dx = rpn_bbox_pred[:, 0]
+                    dy = rpn_bbox_pred[:, 1]
+                    dw = rpn_bbox_pred[:, 2]
+                    dh = rpn_bbox_pred[:, 3]
+
+                    pred_ctr_x = dx * widths + ctr_x
+                    pred_ctr_y = dy * heights + ctr_y
+                    pred_w = tf.exp(dw) * widths
+                    pred_h = tf.exp(dh) * heights
+
+                    pred_boxes0 = pred_ctr_x - pred_w * 0.5
+                    pred_boxes1 = pred_ctr_y - pred_h * 0.5
+                    pred_boxes2 = pred_ctr_x + pred_w * 0.5
+                    pred_boxes3 = pred_ctr_y + pred_h * 0.5
+
+                    b0 = tf.clip_by_value(pred_boxes0, 0, self._im_info[1] - 1)
+                    b1 = tf.clip_by_value(pred_boxes1, 0, self._im_info[0] - 1)
+                    b2 = tf.clip_by_value(pred_boxes2, 0, self._im_info[1] - 1)
+                    b3 = tf.clip_by_value(pred_boxes3, 0, self._im_info[0] - 1)
+
+                    proposals = tf.stack([b0, b1, b2, b3], axis=1)
+                    indices = tf.image.non_max_suppression(proposals, scores, max_output_size=post_nms_topn,
+                                                           iou_threshold=nms_thresh)
+                    boxes = tf.to_float(tf.gather(proposals, indices))
+                    # rpn_scores = tf.reshape(tf.gather(scores, indices), [-1, 1])
+
+                    batch_inds = tf.zeros([tf.shape(indices)[0], 1], dtype=tf.float32)
+                    rois = tf.concat([batch_inds, boxes], 1)
+
+                # in _crop_pool_layer
+                with tf.variable_scope('pool5'):
+                    batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name='bath_id'), [1])
+                    bottom_shape = tf.shape(net_conv)
+                    height = (tf.to_float(bottom_shape[1]) - 1) * 16.0
+                    width = (tf.to_float(bottom_shape[2]) - 1) * 16.0
+                    x1 = tf.slice(rois, [0, 1], [-1, 1], name='x1') / width
+                    y1 = tf.slice(rois, [0, 2], [-1, 1], name='y1') / height
+                    x2 = tf.slice(rois, [0, 3], [-1, 1], name='x2') / width
+                    y2 = tf.slice(rois, [0, 4], [-1, 1], name='y2') / height
+                    bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
+                    pool5 = tf.image.crop_and_resize(net_conv, bboxes, tf.to_int32(batch_ids), [7, 7], name='crops')
+            # in _head_to_tail
+            with slim.arg_scope(self._resnet_arg_scope()):
+                fc7, _ = resnet_v1(pool5, self._blocks[-1:], global_pool=False, include_root_block=False,
+                                   scope=self._scope)
+                fc7 = tf.reduce_mean(fc7, axis=[1, 2])
+            with tf.variable_scope(self._scope, self._scope):
+                # in _region_classification
+                cls_score = slim.fully_connected(fc7, 2, weights_initializer=initializer, trainable=False,
+                                                 activation_fn=None, scope='cls_score')
+                cls_prob = self._softmax(cls_score, 'cls_prob')
+                # cls_pred = tf.argmax(cls_score, 'cls_pred')
+                bbox_pred = slim.fully_connected(fc7, 2*4, weights_initializer=initializer_bbox, trainable=False,
+                                                 activation_fn=None, scope='bbox_pred')
+        self._cls_score = cls_score
+        self._cls_prob = cls_prob
+        self._bbox_pred = bbox_pred
+        self._rois = rois
+
+        stds = np.tile(np.array([0.1, 0.1, 0.2, 0.2]), 2)
+        means = np.tile(np.array([0.0, 0.0, 0.0, 0.0]), 2)
+        self._bbox_pred *= stds
+        self._bbox_pred += means
+
+    @staticmethod
+    def _resnet_arg_scope():
+        batch_norm_params = {
+            'is_training': False,
+            'decay': 0.997,
+            'epsilon': 1e-5,
+            'scale': True,
+            'trainable': False,
+            'updates_collections': tf.GraphKeys.UPDATE_OPS
+        }
+        with arg_scope([slim.conv2d],
+                       weights_regularizer=slim.l2_regularizer(0.0001),
+                       weights_initializer=slim.variance_scaling_initializer(),
+                       trainable=False,
+                       activation_fn=tf.nn.relu,
+                       normalizer_fn=slim.batch_norm,
+                       normalizer_params=batch_norm_params):
+            with arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc:
+                return arg_sc
+
+    @staticmethod
+    def _reshape(bottom, num_dim, name):
+        input_shape = tf.shape(bottom)
+        with tf.variable_scope(name):
+            to_caffe = tf.transpose(bottom, [0, 3, 1, 2])
+            reshaped = tf.reshape(to_caffe, [1, num_dim, -1, input_shape[2]])
+            to_tf = tf.transpose(reshaped, [0, 2, 3, 1])
+        return to_tf
+
+    @staticmethod
+    def _softmax(bottom, name):
+        if name.startswith('rpn_cls_prob_reshape'):
+            input_shape = tf.shape(bottom)
+            bottom_reshaped = tf.reshape(bottom, [-1, input_shape[-1]])
+            reshaped_score = tf.nn.softmax(bottom_reshaped, name=name)
+            return tf.reshape(reshaped_score, input_shape)
+        return tf.nn.softmax(bottom, name=name)
+
+    def test_image(self, sess, image, im_info):
+        return sess.run([self._cls_score, self._cls_prob, self._bbox_pred, self._rois], feed_dict={
+            self._image: image,
+            self._im_info: im_info
+        })
diff --git a/anime-face-detector/main.py b/anime-face-detector/main.py
new file mode 100644
index 0000000..11f7e4d
--- /dev/null
+++ b/anime-face-detector/main.py
@@ -0,0 +1,170 @@
+import numpy as np
+import cv2
+from faster_rcnn_wrapper import FasterRCNNSlim
+import tensorflow as tf
+import argparse
+import os
+import json
+import time
+from nms_wrapper import NMSType, NMSWrapper
+
+
+def detect(sess, rcnn_cls, image):
+    # pre-processing image for Faster-RCNN
+    img_origin = image.astype(np.float32, copy=True)
+    img_origin -= np.array([[[102.9801, 115.9465, 112.7717]]])
+
+    img_shape = img_origin.shape
+    img_size_min = np.min(img_shape[:2])
+    img_size_max = np.max(img_shape[:2])
+
+    img_scale = 600 / img_size_min
+    if np.round(img_scale * img_size_max) > 1000:
+        img_scale = 1000 / img_size_max
+    img = cv2.resize(img_origin, None, None, img_scale, img_scale, cv2.INTER_LINEAR)
+    img_info = np.array([img.shape[0], img.shape[1], img_scale], dtype=np.float32)
+    img = np.expand_dims(img, 0)
+
+    # test image
+    _, scores, bbox_pred, rois = rcnn_cls.test_image(sess, img, img_info)
+
+    # bbox transform
+    boxes = rois[:, 1:] / img_scale
+
+    boxes = boxes.astype(bbox_pred.dtype, copy=False)
+    widths = boxes[:, 2] - boxes[:, 0] + 1
+    heights = boxes[:, 3] - boxes[:, 1] + 1
+    ctr_x = boxes[:, 0] + 0.5 * widths
+    ctr_y = boxes[:, 1] + 0.5 * heights
+    dx = bbox_pred[:, 0::4]
+    dy = bbox_pred[:, 1::4]
+    dw = bbox_pred[:, 2::4]
+    dh = bbox_pred[:, 3::4]
+    pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
+    pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
+    pred_w = np.exp(dw) * widths[:, np.newaxis]
+    pred_h = np.exp(dh) * heights[:, np.newaxis]
+    pred_boxes = np.zeros_like(bbox_pred, dtype=bbox_pred.dtype)
+    pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
+    pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
+    pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
+    pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
+    # clipping edge
+    pred_boxes[:, 0::4] = np.maximum(pred_boxes[:, 0::4], 0)
+    pred_boxes[:, 1::4] = np.maximum(pred_boxes[:, 1::4], 0)
+    pred_boxes[:, 2::4] = np.minimum(pred_boxes[:, 2::4], img_shape[1] - 1)
+    pred_boxes[:, 3::4] = np.minimum(pred_boxes[:, 3::4], img_shape[0] - 1)
+    return scores, pred_boxes
+
+
+def load_file_from_dir(dir_path):
+    ret = []
+    for file in os.listdir(dir_path):
+        path_comb = os.path.join(dir_path, file)
+        if os.path.isdir(path_comb):
+            ret += load_file_from_dir(path_comb)
+        else:
+            ret.append(path_comb)
+    return ret
+
+
+def fmt_time(dtime):
+    if dtime <= 0:
+        return '0:00.000'
+    elif dtime < 60:
+        return '0:%02d.%03d' % (int(dtime), int(dtime * 1000) % 1000)
+    elif dtime < 3600:
+        return '%d:%02d.%03d' % (int(dtime / 60), int(dtime) % 60, int(dtime * 1000) % 1000)
+    else:
+        return '%d:%02d:%02d.%03d' % (int(dtime / 3600), int((dtime % 3600) / 60), int(dtime) % 60,
+                                      int(dtime * 1000) % 1000)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Anime face detector demo')
+    parser.add_argument('-i', help='The input path of an image or directory', required=True, dest='input', type=str)
+    parser.add_argument('-o', help='The output json path of the detection result', dest='output')
+    parser.add_argument('-nms', help='Change the threshold for non maximum suppression',
+                        dest='nms_thresh', default=0.3, type=float)
+    parser.add_argument('-conf', help='Change the threshold for class regression', dest='conf_thresh',
+                        default=0.8, type=float)
+    parser.add_argument('-model', help='Specify a new path for model', dest='model', type=str,
+                        default='model/res101_faster_rcnn_iter_60000.ckpt')
+    parser.add_argument('-nms-type', help='Type of nms', choices=['PY_NMS', 'CPU_NMS', 'GPU_NMS'], dest='nms_type',
+                        default='CPU_NMS')
+
+    args = parser.parse_args()
+
+    assert os.path.exists(args.input), 'The input path does not exists'
+
+    if os.path.isdir(args.input):
+        files = load_file_from_dir(args.input)
+    else:
+        files = [args.input]
+    file_len = len(files)
+
+    if args.nms_type == 'PY_NMS':
+        nms_type = NMSType.PY_NMS
+    elif args.nms_type == 'CPU_NMS':
+        nms_type = NMSType.CPU_NMS
+    elif args.nms_type == 'GPU_NMS':
+        nms_type = NMSType.GPU_NMS
+    else:
+        raise ValueError('Incorrect NMS Type, not supported yet')
+
+    nms = NMSWrapper(nms_type)
+
+    cfg = tf.ConfigProto()
+    cfg.gpu_options.allow_growth = True
+    sess = tf.Session(config=cfg)
+
+    net = FasterRCNNSlim()
+    saver = tf.train.Saver()
+
+    saver.restore(sess, args.model)
+
+    result = {}
+
+    time_start = time.time()
+
+    for idx, file in enumerate(files):
+        elapsed = time.time() - time_start
+        eta = (file_len - idx) * elapsed / idx if idx > 0 else 0
+        print('[%d/%d] Elapsed: %s, ETA: %s >> %s' % (idx+1, file_len, fmt_time(elapsed), fmt_time(eta), file))
+        img = cv2.imread(file)
+        scores, boxes = detect(sess, net, img)
+        boxes = boxes[:, 4:8]
+        scores = scores[:, 1]
+        keep = nms(np.hstack([boxes, scores[:, np.newaxis]]).astype(np.float32), args.nms_thresh)
+        boxes = boxes[keep, :]
+        scores = scores[keep]
+        inds = np.where(scores >= args.conf_thresh)[0]
+        scores = scores[inds]
+        boxes = boxes[inds, :]
+
+        result[file] = []
+        for i in range(scores.shape[0]):
+            x1, y1, x2, y2 = boxes[i, :].tolist()
+            new_result = {'score': float(scores[i]),
+                          'bbox': [x1, y1, x2, y2]}
+            result[file].append(new_result)
+
+            if args.output is None:
+                cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
+        if args.output:
+            if ((idx+1) % 1000) == 0:
+                # saving the temporary result
+                with open(args.output, 'w') as f:
+                    json.dump(result, f)
+        else:
+            cv2.imshow(file, img)
+
+    if args.output:
+        with open(args.output, 'w') as f:
+            json.dump(result, f)
+    else:
+        cv2.waitKey()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/anime-face-detector/make.bat b/anime-face-detector/make.bat
new file mode 100644
index 0000000..b0d9bff
--- /dev/null
+++ b/anime-face-detector/make.bat
@@ -0,0 +1,20 @@
+@echo off
+if /i "%1" == "clean" goto clean
+goto all
+
+:all
+python setup.py build_ext --inplace
+rd /s /q build
+
+goto exit
+
+
+
+:clean
+del /f /s /q *.cpp
+del /f /s /q *.c
+del /f /s /q *.pyd
+
+goto exit
+
+:exit
diff --git a/anime-face-detector/model/.gitignore b/anime-face-detector/model/.gitignore
new file mode 100644
index 0000000..b1d31d3
--- /dev/null
+++ b/anime-face-detector/model/.gitignore
@@ -0,0 +1,5 @@
+# all pre-trained models
+*.index
+*.data-00000-of-00001
+*.meta
+*.pkl
diff --git a/anime-face-detector/nms/.gitignore b/anime-face-detector/nms/.gitignore
new file mode 100644
index 0000000..40d7cb4
--- /dev/null
+++ b/anime-face-detector/nms/.gitignore
@@ -0,0 +1,2 @@
+*.c
+*.cpp
diff --git a/anime-face-detector/nms/__init__.py b/anime-face-detector/nms/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/anime-face-detector/nms/__init__.py
diff --git a/anime-face-detector/nms/cpu_nms.pyx b/anime-face-detector/nms/cpu_nms.pyx
new file mode 100644
index 0000000..71fbab1
--- /dev/null
+++ b/anime-face-detector/nms/cpu_nms.pyx
@@ -0,0 +1,68 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+
+def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.int64_t, ndim=1] order = scores.argsort()[::-1]
+
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+
+    keep = []
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        keep.append(i)
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+
+    return keep
diff --git a/anime-face-detector/nms/gpu_nms.hpp b/anime-face-detector/nms/gpu_nms.hpp
new file mode 100644
index 0000000..68b6d42
--- /dev/null
+++ b/anime-face-detector/nms/gpu_nms.hpp
@@ -0,0 +1,2 @@
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id);
diff --git a/anime-face-detector/nms/gpu_nms.pyx b/anime-face-detector/nms/gpu_nms.pyx
new file mode 100644
index 0000000..55878db
--- /dev/null
+++ b/anime-face-detector/nms/gpu_nms.pyx
@@ -0,0 +1,31 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+assert sizeof(int) == sizeof(np.int32_t)
+
+cdef extern from "gpu_nms.hpp":
+    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+            np.int32_t device_id=0):
+    cdef int boxes_num = dets.shape[0]
+    cdef int boxes_dim = dets.shape[1]
+    cdef int num_out
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        keep = np.zeros(boxes_num, dtype=np.int32)
+    cdef np.ndarray[np.float32_t, ndim=1] \
+        scores = dets[:, 4]
+    cdef np.ndarray[np.int64_t, ndim=1] \
+        order = scores.argsort()[::-1]
+    cdef np.ndarray[np.float32_t, ndim=2] \
+        sorted_dets = dets[order, :]
+    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+    keep = keep[:num_out]
+    return list(order[keep])
diff --git a/anime-face-detector/nms/nms_kernel.cu b/anime-face-detector/nms/nms_kernel.cu
new file mode 100644
index 0000000..038a590
--- /dev/null
+++ b/anime-face-detector/nms/nms_kernel.cu
@@ -0,0 +1,144 @@
+// ------------------------------------------------------------------
+// Faster R-CNN
+// Copyright (c) 2015 Microsoft
+// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
+// Written by Shaoqing Ren
+// ------------------------------------------------------------------
+
+#include "gpu_nms.hpp"
+#include <vector>
+#include <iostream>
+
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    if (error != cudaSuccess) { \
+      std::cout << cudaGetErrorString(error) << std::endl; \
+    } \
+  } while (0)
+
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+void _set_device(int device_id) {
+  int current_device;
+  CUDA_CHECK(cudaGetDevice(&current_device));
+  if (current_device == device_id) {
+    return;
+  }
+  // The call to cudaSetDevice must come before any calls to Get, which
+  // may perform initialization using the GPU.
+  CUDA_CHECK(cudaSetDevice(device_id));
+}
+
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id) {
+  _set_device(device_id);
+
+  float* boxes_dev = NULL;
+  unsigned long long* mask_dev = NULL;
+
+  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+
+  CUDA_CHECK(cudaMalloc(&boxes_dev,
+                        boxes_num * boxes_dim * sizeof(float)));
+  CUDA_CHECK(cudaMemcpy(boxes_dev,
+                        boxes_host,
+                        boxes_num * boxes_dim * sizeof(float),
+                        cudaMemcpyHostToDevice));
+
+  CUDA_CHECK(cudaMalloc(&mask_dev,
+                        boxes_num * col_blocks * sizeof(unsigned long long)));
+
+  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
+              DIVUP(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  CUDA_CHECK(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  *num_out = num_to_keep;
+
+  CUDA_CHECK(cudaFree(boxes_dev));
+  CUDA_CHECK(cudaFree(mask_dev));
+}
diff --git a/anime-face-detector/nms/py_cpu_nms.py b/anime-face-detector/nms/py_cpu_nms.py
new file mode 100644
index 0000000..54e7b25
--- /dev/null
+++ b/anime-face-detector/nms/py_cpu_nms.py
@@ -0,0 +1,38 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
diff --git a/anime-face-detector/nms_wrapper.py b/anime-face-detector/nms_wrapper.py
new file mode 100644
index 0000000..ca900e8
--- /dev/null
+++ b/anime-face-detector/nms_wrapper.py
@@ -0,0 +1,29 @@
+from enum import Enum
+
+
+class NMSType(Enum):
+    PY_NMS = 1
+    CPU_NMS = 2
+    GPU_NMS = 3
+
+
+default_nms_type = NMSType.PY_NMS
+
+
+class NMSWrapper:
+    def __init__(self, nms_type=default_nms_type):
+        assert type(nms_type) == NMSType
+        if nms_type == NMSType.PY_NMS:
+            from nms.py_cpu_nms import py_cpu_nms
+            self._nms = py_cpu_nms
+        elif nms_type == NMSType.CPU_NMS:
+            from nms.cpu_nms import cpu_nms
+            self._nms = cpu_nms
+        elif nms_type == NMSType.GPU_NMS:
+            from nms.gpu_nms import gpu_nms
+            self._nms = gpu_nms
+        else:
+            raise ValueError('current nms type is not implemented yet')
+
+    def __call__(self, *args, **kwargs):
+        return self._nms(*args, **kwargs)
diff --git a/anime-face-detector/setup.py b/anime-face-detector/setup.py
new file mode 100644
index 0000000..dc634f5
--- /dev/null
+++ b/anime-face-detector/setup.py
@@ -0,0 +1,42 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import os
+from os.path import join as pjoin
+import numpy as np
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+import sys
+
+
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+
+# run the customize_compiler
+class custom_build_ext(build_ext):
+    def build_extensions(self):
+        build_ext.build_extensions(self)
+
+ext_modules = [
+    Extension(
+        "nms.cpu_nms",
+        ["nms/cpu_nms.pyx"],
+        extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] if sys.platform == 'linux' else [],
+        include_dirs = [numpy_include]
+    )
+]
+
+setup(
+    name='tf_faster_rcnn',
+    ext_modules=ext_modules,
+    # inject our custom trigger
+    cmdclass={'build_ext': custom_build_ext},
+)