Guide
RefineDet is based on Caffe.
See Install and Configure Caffe on ubuntu 16.04
- ubuntu 16.04
- CUDA 9.2 + cudnn 7.1.4 (for caffe/tensorrt/anakin)
- opencv 3.3.0
- python 2.7
- caffe (from refinedet)
compile
git clone https://github.com/sfzhang15/RefineDet.git
cd RefineDet
mkdir build && cd build && cmake-gui ..
make -j8 && make pycaffe
options
USE_CUDNN True
USE_OPENCV True
WITH_PYTHON_LAYER True
BLAS atlas
CMAKE_INSTALL_PREFIX /home/kezunlin/program/refinedet/build/install
tips: vim CMakeLists.txt and comment out examples and docs
#add_subdirectory(examples)
add_subdirectory(python)
add_subdirectory(matlab)
#add_subdirectory(docs)
fix gflags error
- caffe/include/caffe/common.hpp
- caffe/examples/mnist/convert_mnist_data.cpp
Comment out the ifndef
// #ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
// #endif // GFLAGS_GFLAGS_H_
example
two version:
- single version
- batch version
'''
In this example, we will load a RefineDet model and use it to detect objects.
'''
import argparse
import os
import sys
import numpy as np
import skimage.io as io
import cv2
# Make sure that caffe is on the python path:
caffe_root = './'
os.chdir(caffe_root)
sys.path.insert(0, os.path.join(caffe_root, 'python'))
import caffe
classes = ['background', 'person']
def filter_boxs(boxs, threshold=0.4):
"""
boxs: 500*6 (xmin,ymin,xmax,ymax,confidence,class_index)
class_index: 0 background, 1 person
confidence: 0-1
return:
new_boxs `list` [b1,b2,b3,...]
"""
new_boxs = []
for i in range(0, boxs.shape[0]):
xmin,ymin,xmax,ymax,confidence,class_index = boxs[i]
#print(type(class_index)) # float32
if int(class_index)>0 and confidence >= threshold:
box = [int(xmin),int(ymin),int(xmax),int(ymax),confidence, int(class_index)]
new_boxs.append(box)
return new_boxs # list [b1,b2,b3,...]
def save_results(counter, image_file, boxs, save_fig=False):
img = cv2.imread(image_file)
for i in range(0, len(boxs)):
xmin,ymin,xmax,ymax,confidence,class_index = boxs[i]
name = classes[class_index]
coords = (xmin, ymin), xmax - xmin, ymax - ymin
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), 3) # bgr
#display_text = '%s: %.2f' % (name, confidence)
display_text = '%.2f' % (confidence)
cv2.putText(img, display_text, (xmin, ymin-5), cv2.FONT_HERSHEY_SIMPLEX, 1, color=(0,0,255), thickness=2)
if save_fig:
image_filepath = 'output/{0}_results.jpg'.format(counter)
cv2.imwrite(image_filepath, img)
print('Saved: ' + image_filepath)
def single():
caffe.set_device(0)
caffe.set_mode_gpu()
save_dir = "./output"
if not os.path.exists(save_dir):
os.mkdir(save_dir)
# load model
model_def = 'models/ResNet/coco/refinedet_resnet101_512x512/deploy.prototxt'
model_weights = 'models/ResNet/coco/refinedet_resnet101_512x512/coco_refinedet_resnet101_512x512_iter_75000.caffemodel'
net = caffe.Net(model_def, model_weights, caffe.TEST)
# image preprocessing
img_resize = 512
net.blobs['data'].reshape(1, 3, img_resize, img_resize)
data_shape = net.blobs['data'].data.shape
print("data_shape=", data_shape) # 1, 3, 512, 512
# by default, caffe use chw, bgr, 0-255, image-[104, 117, 123]
transformer = caffe.io.Transformer({'data':data_shape})
transformer.set_transpose('data', (2, 0, 1)) # hwc ===> chw
transformer.set_channel_swap('data', (2, 1, 0)) # rgb===>bgr
transformer.set_raw_scale('data', 255) # [0-1]===> [0,255]
transformer.set_mean('data', np.array([104, 117, 123])) # bgr mean pixel
files = ["./images/1.png", "./images/2.png"]# 500,7 + 384,7 === 500,7 + 500,7
for index,image_file in enumerate(files):
print("image_file=", image_file)
image = caffe.io.load_image(image_file) # hwc, rgb, 0-1
print("image.shape=", image.shape)
transformed_image = transformer.preprocess('data', image)
print("transformed_image.shape=", transformed_image.shape)
net.blobs['data'].data[...] = transformed_image
detections = net.forward()['detection_out']
print("detections.shape = ",detections.shape) # 1, 1, 500, 7
det_label = detections[0, 0, :, 1] # 0 back, 1 -person (now only ==1)
det_conf = detections[0, 0, :, 2] # 0-1
det_xmin = detections[0, 0, :, 3] * image.shape[1]
det_ymin = detections[0, 0, :, 4] * image.shape[0]
det_xmax = detections[0, 0, :, 5] * image.shape[1]
det_ymax = detections[0, 0, :, 6] * image.shape[0]
boxs = np.column_stack([det_xmin, det_ymin, det_xmax, det_ymax, det_conf, det_label])
print("boxs = ", boxs.shape) # 500,6
new_boxs = filter_boxs(boxs)
print("new_boxs = ", len(new_boxs)) # 3 boxs
# show result
save_results(index, image_file, new_boxs, save_fig=True)
def batch():
caffe.set_device(0)
caffe.set_mode_gpu()
save_dir = "./output"
if not os.path.exists(save_dir):
os.mkdir(save_dir)
# load model
model_def = 'models/ResNet/coco/refinedet_resnet101_512x512/deploy.prototxt'
model_weights = 'models/ResNet/coco/refinedet_resnet101_512x512/coco_refinedet_resnet101_512x512_iter_75000.caffemodel'
net = caffe.Net(model_def, model_weights, caffe.TEST)
box_count_per_image = 500
#files = ["./images/2.png"]
files = ["./images/1.png", "./images/2.png"]# 500,7 + 384,7 === 500,7 + 500,7
# update detection_output_layer.cpp and cu to keep 500 box results
batch_size = len(files)
# image preprocessing
img_resize = 512
net.blobs['data'].reshape(batch_size, 3, img_resize, img_resize)
data_shape = net.blobs['data'].data.shape
print("data_shape=", data_shape) # 1, 3, 512, 512
# by default, caffe use chw, bgr, 0-255, image-[104, 117, 123]
transformer = caffe.io.Transformer({'data':data_shape})
transformer.set_transpose('data', (2, 0, 1)) # hwc ===> chw
transformer.set_channel_swap('data', (2, 1, 0)) # rgb===>bgr
transformer.set_raw_scale('data', 255) # [0-1]===> [0,255]
transformer.set_mean('data', np.array([104, 117, 123])) # bgr mean pixel
for i in range(len(files)):
#image_file = "./images/1.png"
image_file = files[i]
print("image_file=", image_file)
image = caffe.io.load_image(image_file) # hwc, rgb, 0-1
print("image.shape=", image.shape)
transformed_image = transformer.preprocess('data', image)
print("transformed_image.shape=", transformed_image.shape)
net.blobs['data'].data[i,:,:,:] = transformed_image
detections = net.forward()['detection_out']
print("detections.shape = ",detections.shape) # 1, 1, 500+384, 7 ===> 1,1, 1000,7
for i in range(batch_size):
start = i * box_count_per_image
end = (i+1) * box_count_per_image
print("start-end: ",start, end)
det_label = detections[0, 0, start:end, 1] # 0 back, 1 -person (now only ==1)
print(det_label[:10])
det_conf = detections[0, 0, start:end, 2] # 0-1
det_xmin = detections[0, 0, start:end, 3] * image.shape[1]
det_ymin = detections[0, 0, start:end, 4] * image.shape[0]
det_xmax = detections[0, 0, start:end, 5] * image.shape[1]
det_ymax = detections[0, 0, start:end, 6] * image.shape[0]
boxs = np.column_stack([det_xmin, det_ymin, det_xmax, det_ymax, det_conf, det_label])
print("boxs = ", boxs.shape) # 500,6
new_boxs = filter_boxs(boxs)
print("new_boxs = ", len(new_boxs)) # 3 boxs
# show result
save_results(i, image_file, new_boxs, save_fig=True)
if __name__ == '__main__':
#single()
batch()
output
('data_shape=', (2, 3, 512, 512))
('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
('image_file=', './images/2.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
('detections.shape = ', (1, 1, 1000, 7))
('start-end: ', 0, 500)
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
('boxs = ', (500, 6))
('new_boxs = ', 3)
Saved: output/0_results.jpg
('start-end: ', 500, 1000)
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
('boxs = ', (500, 6))
('new_boxs = ', 6)
Saved: output/1_results.jpg
Reference
History
- 20181127: created.