diff options
Diffstat (limited to 'anime-face-detector/nms')
-rw-r--r-- | anime-face-detector/nms/.gitignore | 2 | ||||
-rw-r--r-- | anime-face-detector/nms/__init__.py | 0 | ||||
-rw-r--r-- | anime-face-detector/nms/cpu_nms.pyx | 68 | ||||
-rw-r--r-- | anime-face-detector/nms/gpu_nms.hpp | 2 | ||||
-rw-r--r-- | anime-face-detector/nms/gpu_nms.pyx | 31 | ||||
-rw-r--r-- | anime-face-detector/nms/nms_kernel.cu | 144 | ||||
-rw-r--r-- | anime-face-detector/nms/py_cpu_nms.py | 38 |
7 files changed, 0 insertions, 285 deletions
diff --git a/anime-face-detector/nms/.gitignore b/anime-face-detector/nms/.gitignore deleted file mode 100644 index 40d7cb4..0000000 --- a/anime-face-detector/nms/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.c -*.cpp diff --git a/anime-face-detector/nms/__init__.py b/anime-face-detector/nms/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/anime-face-detector/nms/__init__.py +++ /dev/null diff --git a/anime-face-detector/nms/cpu_nms.pyx b/anime-face-detector/nms/cpu_nms.pyx deleted file mode 100644 index 71fbab1..0000000 --- a/anime-face-detector/nms/cpu_nms.pyx +++ /dev/null @@ -1,68 +0,0 @@ -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -import numpy as np -cimport numpy as np - -cdef inline np.float32_t max(np.float32_t a, np.float32_t b): - return a if a >= b else b - -cdef inline np.float32_t min(np.float32_t a, np.float32_t b): - return a if a <= b else b - -def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): - cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] - cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] - cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] - cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] - cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] - - cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) - cdef np.ndarray[np.int64_t, ndim=1] order = scores.argsort()[::-1] - - cdef int ndets = dets.shape[0] - cdef np.ndarray[np.int_t, ndim=1] suppressed = \ - np.zeros((ndets), dtype=np.int) - - # nominal indices - cdef int _i, _j - # sorted indices - cdef int i, j - # temp variables for box i's (the box currently under consideration) - cdef np.float32_t ix1, iy1, ix2, iy2, iarea - # variables for computing overlap with box j (lower scoring box) - cdef np.float32_t xx1, yy1, xx2, yy2 - cdef np.float32_t w, h - cdef np.float32_t inter, ovr - - keep = [] - for _i in range(ndets): - i = order[_i] - if suppressed[i] == 1: - continue - keep.append(i) - ix1 = x1[i] - iy1 = y1[i] - ix2 = x2[i] - iy2 = y2[i] - iarea = areas[i] - for _j in range(_i + 1, ndets): - j = order[_j] - if suppressed[j] == 1: - continue - xx1 = max(ix1, x1[j]) - yy1 = max(iy1, y1[j]) - xx2 = min(ix2, x2[j]) - yy2 = min(iy2, y2[j]) - w = max(0.0, xx2 - xx1 + 1) - h = max(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (iarea + areas[j] - inter) - if ovr >= thresh: - suppressed[j] = 1 - - return keep diff --git a/anime-face-detector/nms/gpu_nms.hpp b/anime-face-detector/nms/gpu_nms.hpp deleted file mode 100644 index 68b6d42..0000000 --- a/anime-face-detector/nms/gpu_nms.hpp +++ /dev/null @@ -1,2 +0,0 @@ -void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, - int boxes_dim, float nms_overlap_thresh, int device_id); diff --git a/anime-face-detector/nms/gpu_nms.pyx b/anime-face-detector/nms/gpu_nms.pyx deleted file mode 100644 index 55878db..0000000 --- a/anime-face-detector/nms/gpu_nms.pyx +++ /dev/null @@ -1,31 +0,0 @@ -# -------------------------------------------------------- -# Faster R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -import numpy as np -cimport numpy as np - -assert sizeof(int) == sizeof(np.int32_t) - -cdef extern from "gpu_nms.hpp": - void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) - -def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, - np.int32_t device_id=0): - cdef int boxes_num = dets.shape[0] - cdef int boxes_dim = dets.shape[1] - cdef int num_out - cdef np.ndarray[np.int32_t, ndim=1] \ - keep = np.zeros(boxes_num, dtype=np.int32) - cdef np.ndarray[np.float32_t, ndim=1] \ - scores = dets[:, 4] - cdef np.ndarray[np.int64_t, ndim=1] \ - order = scores.argsort()[::-1] - cdef np.ndarray[np.float32_t, ndim=2] \ - sorted_dets = dets[order, :] - _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) - keep = keep[:num_out] - return list(order[keep]) diff --git a/anime-face-detector/nms/nms_kernel.cu b/anime-face-detector/nms/nms_kernel.cu deleted file mode 100644 index 038a590..0000000 --- a/anime-face-detector/nms/nms_kernel.cu +++ /dev/null @@ -1,144 +0,0 @@ -// ------------------------------------------------------------------ -// Faster R-CNN -// Copyright (c) 2015 Microsoft -// Licensed under The MIT License [see fast-rcnn/LICENSE for details] -// Written by Shaoqing Ren -// ------------------------------------------------------------------ - -#include "gpu_nms.hpp" -#include <vector> -#include <iostream> - -#define CUDA_CHECK(condition) \ - /* Code block avoids redefinition of cudaError_t error */ \ - do { \ - cudaError_t error = condition; \ - if (error != cudaSuccess) { \ - std::cout << cudaGetErrorString(error) << std::endl; \ - } \ - } while (0) - -#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) -int const threadsPerBlock = sizeof(unsigned long long) * 8; - -__device__ inline float devIoU(float const * const a, float const * const b) { - float left = max(a[0], b[0]), right = min(a[2], b[2]); - float top = max(a[1], b[1]), bottom = min(a[3], b[3]); - float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); - float interS = width * height; - float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); - float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); - return interS / (Sa + Sb - interS); -} - -__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, - const float *dev_boxes, unsigned long long *dev_mask) { - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - __shared__ float block_boxes[threadsPerBlock * 5]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 5 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; - block_boxes[threadIdx.x * 5 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; - block_boxes[threadIdx.x * 5 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; - block_boxes[threadIdx.x * 5 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; - block_boxes[threadIdx.x * 5 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const float *cur_box = dev_boxes + cur_box_idx * 5; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { - t |= 1ULL << i; - } - } - const int col_blocks = DIVUP(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } -} - -void _set_device(int device_id) { - int current_device; - CUDA_CHECK(cudaGetDevice(¤t_device)); - if (current_device == device_id) { - return; - } - // The call to cudaSetDevice must come before any calls to Get, which - // may perform initialization using the GPU. - CUDA_CHECK(cudaSetDevice(device_id)); -} - -void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, - int boxes_dim, float nms_overlap_thresh, int device_id) { - _set_device(device_id); - - float* boxes_dev = NULL; - unsigned long long* mask_dev = NULL; - - const int col_blocks = DIVUP(boxes_num, threadsPerBlock); - - CUDA_CHECK(cudaMalloc(&boxes_dev, - boxes_num * boxes_dim * sizeof(float))); - CUDA_CHECK(cudaMemcpy(boxes_dev, - boxes_host, - boxes_num * boxes_dim * sizeof(float), - cudaMemcpyHostToDevice)); - - CUDA_CHECK(cudaMalloc(&mask_dev, - boxes_num * col_blocks * sizeof(unsigned long long))); - - dim3 blocks(DIVUP(boxes_num, threadsPerBlock), - DIVUP(boxes_num, threadsPerBlock)); - dim3 threads(threadsPerBlock); - nms_kernel<<<blocks, threads>>>(boxes_num, - nms_overlap_thresh, - boxes_dev, - mask_dev); - - std::vector<unsigned long long> mask_host(boxes_num * col_blocks); - CUDA_CHECK(cudaMemcpy(&mask_host[0], - mask_dev, - sizeof(unsigned long long) * boxes_num * col_blocks, - cudaMemcpyDeviceToHost)); - - std::vector<unsigned long long> remv(col_blocks); - memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); - - int num_to_keep = 0; - for (int i = 0; i < boxes_num; i++) { - int nblock = i / threadsPerBlock; - int inblock = i % threadsPerBlock; - - if (!(remv[nblock] & (1ULL << inblock))) { - keep_out[num_to_keep++] = i; - unsigned long long *p = &mask_host[0] + i * col_blocks; - for (int j = nblock; j < col_blocks; j++) { - remv[j] |= p[j]; - } - } - } - *num_out = num_to_keep; - - CUDA_CHECK(cudaFree(boxes_dev)); - CUDA_CHECK(cudaFree(mask_dev)); -} diff --git a/anime-face-detector/nms/py_cpu_nms.py b/anime-face-detector/nms/py_cpu_nms.py deleted file mode 100644 index 54e7b25..0000000 --- a/anime-face-detector/nms/py_cpu_nms.py +++ /dev/null @@ -1,38 +0,0 @@ -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -import numpy as np - -def py_cpu_nms(dets, thresh): - """Pure Python NMS baseline.""" - x1 = dets[:, 0] - y1 = dets[:, 1] - x2 = dets[:, 2] - y2 = dets[:, 3] - scores = dets[:, 4] - - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (areas[i] + areas[order[1:]] - inter) - - inds = np.where(ovr <= thresh)[0] - order = order[inds + 1] - - return keep |