aboutsummaryrefslogtreecommitdiffstats
path: root/anime-face-detector/nms
diff options
context:
space:
mode:
authorcarp <25677564+carp@users.noreply.github.com>2020-07-13 13:27:55 -0400
committercarp <25677564+carp@users.noreply.github.com>2020-07-13 13:27:55 -0400
commitcb961ed0d6896309336159bda34ed2f75fb60a84 (patch)
tree6d75fcd9bd32b2e884e299ea97e361513036b3a2 /anime-face-detector/nms
parente78f351e06e432a607ebadc8de3ea6d27315c088 (diff)
downloadyaoi-communism-cb961ed0d6896309336159bda34ed2f75fb60a84.tar.gz
yaoi-communism-cb961ed0d6896309336159bda34ed2f75fb60a84.zip
add face detection
Diffstat (limited to 'anime-face-detector/nms')
-rw-r--r--anime-face-detector/nms/.gitignore2
-rw-r--r--anime-face-detector/nms/__init__.py0
-rw-r--r--anime-face-detector/nms/cpu_nms.pyx68
-rw-r--r--anime-face-detector/nms/gpu_nms.hpp2
-rw-r--r--anime-face-detector/nms/gpu_nms.pyx31
-rw-r--r--anime-face-detector/nms/nms_kernel.cu144
-rw-r--r--anime-face-detector/nms/py_cpu_nms.py38
7 files changed, 285 insertions, 0 deletions
diff --git a/anime-face-detector/nms/.gitignore b/anime-face-detector/nms/.gitignore
new file mode 100644
index 0000000..40d7cb4
--- /dev/null
+++ b/anime-face-detector/nms/.gitignore
@@ -0,0 +1,2 @@
+*.c
+*.cpp
diff --git a/anime-face-detector/nms/__init__.py b/anime-face-detector/nms/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/anime-face-detector/nms/__init__.py
diff --git a/anime-face-detector/nms/cpu_nms.pyx b/anime-face-detector/nms/cpu_nms.pyx
new file mode 100644
index 0000000..71fbab1
--- /dev/null
+++ b/anime-face-detector/nms/cpu_nms.pyx
@@ -0,0 +1,68 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+ return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+ return a if a <= b else b
+
+def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+ cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+ cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+ cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+ cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+ cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+
+ cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ cdef np.ndarray[np.int64_t, ndim=1] order = scores.argsort()[::-1]
+
+ cdef int ndets = dets.shape[0]
+ cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+ np.zeros((ndets), dtype=np.int)
+
+ # nominal indices
+ cdef int _i, _j
+ # sorted indices
+ cdef int i, j
+ # temp variables for box i's (the box currently under consideration)
+ cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+ # variables for computing overlap with box j (lower scoring box)
+ cdef np.float32_t xx1, yy1, xx2, yy2
+ cdef np.float32_t w, h
+ cdef np.float32_t inter, ovr
+
+ keep = []
+ for _i in range(ndets):
+ i = order[_i]
+ if suppressed[i] == 1:
+ continue
+ keep.append(i)
+ ix1 = x1[i]
+ iy1 = y1[i]
+ ix2 = x2[i]
+ iy2 = y2[i]
+ iarea = areas[i]
+ for _j in range(_i + 1, ndets):
+ j = order[_j]
+ if suppressed[j] == 1:
+ continue
+ xx1 = max(ix1, x1[j])
+ yy1 = max(iy1, y1[j])
+ xx2 = min(ix2, x2[j])
+ yy2 = min(iy2, y2[j])
+ w = max(0.0, xx2 - xx1 + 1)
+ h = max(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (iarea + areas[j] - inter)
+ if ovr >= thresh:
+ suppressed[j] = 1
+
+ return keep
diff --git a/anime-face-detector/nms/gpu_nms.hpp b/anime-face-detector/nms/gpu_nms.hpp
new file mode 100644
index 0000000..68b6d42
--- /dev/null
+++ b/anime-face-detector/nms/gpu_nms.hpp
@@ -0,0 +1,2 @@
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+ int boxes_dim, float nms_overlap_thresh, int device_id);
diff --git a/anime-face-detector/nms/gpu_nms.pyx b/anime-face-detector/nms/gpu_nms.pyx
new file mode 100644
index 0000000..55878db
--- /dev/null
+++ b/anime-face-detector/nms/gpu_nms.pyx
@@ -0,0 +1,31 @@
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+assert sizeof(int) == sizeof(np.int32_t)
+
+cdef extern from "gpu_nms.hpp":
+ void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+ np.int32_t device_id=0):
+ cdef int boxes_num = dets.shape[0]
+ cdef int boxes_dim = dets.shape[1]
+ cdef int num_out
+ cdef np.ndarray[np.int32_t, ndim=1] \
+ keep = np.zeros(boxes_num, dtype=np.int32)
+ cdef np.ndarray[np.float32_t, ndim=1] \
+ scores = dets[:, 4]
+ cdef np.ndarray[np.int64_t, ndim=1] \
+ order = scores.argsort()[::-1]
+ cdef np.ndarray[np.float32_t, ndim=2] \
+ sorted_dets = dets[order, :]
+ _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ keep = keep[:num_out]
+ return list(order[keep])
diff --git a/anime-face-detector/nms/nms_kernel.cu b/anime-face-detector/nms/nms_kernel.cu
new file mode 100644
index 0000000..038a590
--- /dev/null
+++ b/anime-face-detector/nms/nms_kernel.cu
@@ -0,0 +1,144 @@
+// ------------------------------------------------------------------
+// Faster R-CNN
+// Copyright (c) 2015 Microsoft
+// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
+// Written by Shaoqing Ren
+// ------------------------------------------------------------------
+
+#include "gpu_nms.hpp"
+#include <vector>
+#include <iostream>
+
+#define CUDA_CHECK(condition) \
+ /* Code block avoids redefinition of cudaError_t error */ \
+ do { \
+ cudaError_t error = condition; \
+ if (error != cudaSuccess) { \
+ std::cout << cudaGetErrorString(error) << std::endl; \
+ } \
+ } while (0)
+
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+ float left = max(a[0], b[0]), right = min(a[2], b[2]);
+ float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+ float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+ float interS = width * height;
+ float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+ float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+ return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+ const float *dev_boxes, unsigned long long *dev_mask) {
+ const int row_start = blockIdx.y;
+ const int col_start = blockIdx.x;
+
+ // if (row_start > col_start) return;
+
+ const int row_size =
+ min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+ const int col_size =
+ min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+ __shared__ float block_boxes[threadsPerBlock * 5];
+ if (threadIdx.x < col_size) {
+ block_boxes[threadIdx.x * 5 + 0] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+ block_boxes[threadIdx.x * 5 + 1] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+ block_boxes[threadIdx.x * 5 + 2] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+ block_boxes[threadIdx.x * 5 + 3] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+ block_boxes[threadIdx.x * 5 + 4] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+ }
+ __syncthreads();
+
+ if (threadIdx.x < row_size) {
+ const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+ const float *cur_box = dev_boxes + cur_box_idx * 5;
+ int i = 0;
+ unsigned long long t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = threadIdx.x + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+ t |= 1ULL << i;
+ }
+ }
+ const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
+ dev_mask[cur_box_idx * col_blocks + col_start] = t;
+ }
+}
+
+void _set_device(int device_id) {
+ int current_device;
+ CUDA_CHECK(cudaGetDevice(&current_device));
+ if (current_device == device_id) {
+ return;
+ }
+ // The call to cudaSetDevice must come before any calls to Get, which
+ // may perform initialization using the GPU.
+ CUDA_CHECK(cudaSetDevice(device_id));
+}
+
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+ int boxes_dim, float nms_overlap_thresh, int device_id) {
+ _set_device(device_id);
+
+ float* boxes_dev = NULL;
+ unsigned long long* mask_dev = NULL;
+
+ const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+
+ CUDA_CHECK(cudaMalloc(&boxes_dev,
+ boxes_num * boxes_dim * sizeof(float)));
+ CUDA_CHECK(cudaMemcpy(boxes_dev,
+ boxes_host,
+ boxes_num * boxes_dim * sizeof(float),
+ cudaMemcpyHostToDevice));
+
+ CUDA_CHECK(cudaMalloc(&mask_dev,
+ boxes_num * col_blocks * sizeof(unsigned long long)));
+
+ dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
+ DIVUP(boxes_num, threadsPerBlock));
+ dim3 threads(threadsPerBlock);
+ nms_kernel<<<blocks, threads>>>(boxes_num,
+ nms_overlap_thresh,
+ boxes_dev,
+ mask_dev);
+
+ std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+ CUDA_CHECK(cudaMemcpy(&mask_host[0],
+ mask_dev,
+ sizeof(unsigned long long) * boxes_num * col_blocks,
+ cudaMemcpyDeviceToHost));
+
+ std::vector<unsigned long long> remv(col_blocks);
+ memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+ int num_to_keep = 0;
+ for (int i = 0; i < boxes_num; i++) {
+ int nblock = i / threadsPerBlock;
+ int inblock = i % threadsPerBlock;
+
+ if (!(remv[nblock] & (1ULL << inblock))) {
+ keep_out[num_to_keep++] = i;
+ unsigned long long *p = &mask_host[0] + i * col_blocks;
+ for (int j = nblock; j < col_blocks; j++) {
+ remv[j] |= p[j];
+ }
+ }
+ }
+ *num_out = num_to_keep;
+
+ CUDA_CHECK(cudaFree(boxes_dev));
+ CUDA_CHECK(cudaFree(mask_dev));
+}
diff --git a/anime-face-detector/nms/py_cpu_nms.py b/anime-face-detector/nms/py_cpu_nms.py
new file mode 100644
index 0000000..54e7b25
--- /dev/null
+++ b/anime-face-detector/nms/py_cpu_nms.py
@@ -0,0 +1,38 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+import numpy as np
+
+def py_cpu_nms(dets, thresh):
+ """Pure Python NMS baseline."""
+ x1 = dets[:, 0]
+ y1 = dets[:, 1]
+ x2 = dets[:, 2]
+ y2 = dets[:, 3]
+ scores = dets[:, 4]
+
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+ order = scores.argsort()[::-1]
+
+ keep = []
+ while order.size > 0:
+ i = order[0]
+ keep.append(i)
+ xx1 = np.maximum(x1[i], x1[order[1:]])
+ yy1 = np.maximum(y1[i], y1[order[1:]])
+ xx2 = np.minimum(x2[i], x2[order[1:]])
+ yy2 = np.minimum(y2[i], y2[order[1:]])
+
+ w = np.maximum(0.0, xx2 - xx1 + 1)
+ h = np.maximum(0.0, yy2 - yy1 + 1)
+ inter = w * h
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+ inds = np.where(ovr <= thresh)[0]
+ order = order[inds + 1]
+
+ return keep