voidget_color_table() { // cache color value in table[256] int divideWith = 10; uchar table[256]; for (int i = 0; i < 256; ++i) table[i] = divideWith* (i / divideWith); }
// C ptr []: faster but not safe Mat& ScanImageAndReduce_Cptr(Mat& I, const uchar* const table) { // accept only char type matrices CV_Assert(I.depth() != sizeof(uchar)); int channels = I.channels(); int nRows = I.rows; int nCols = I.cols* channels; if (I.isContinuous()) { nCols *= nRows; nRows = 1; } int i, j; uchar* p; for (i = 0; i < nRows; ++i) { p = I.ptr<uchar>(i); for (j = 0; j < nCols; ++j) { p[j] = table[p[j]]; } } return I; }
// MatIterator_<uchar>: safe but slow Mat& ScanImageAndReduce_Iterator(Mat& I, const uchar* const table) { // accept only char type matrices CV_Assert(I.depth() != sizeof(uchar)); constint channels = I.channels(); switch (channels) { case1: { MatIterator_<uchar> it, end; for (it = I.begin<uchar>(), end = I.end<uchar>(); it != end; ++it) *it = table[*it]; break; } case3: { MatIterator_<Vec3b> it, end; for (it = I.begin<Vec3b>(), end = I.end<Vec3b>(); it != end; ++it) { (*it)[0] = table[(*it)[0]]; (*it)[1] = table[(*it)[1]]; (*it)[2] = table[(*it)[2]]; } } } return I; }
opencv LUT
1 2 3 4 5 6 7 8 9 10 11
// LUT Mat& ScanImageAndReduce_LUT(Mat& I, const uchar* const table) { Mat lookUpTable(1, 256, CV_8U); uchar* p = lookUpTable.data; for (int i = 0; i < 256; ++i) p[i] = table[i];
cv::LUT(I, lookUpTable, I); return I; }
forEach
forEach method of the Mat class that utilizes all the cores on your machine to apply any function at every pixel.
// Parallel execution with function object. structForEachOperator { uchar m_table[256]; ForEachOperator(const uchar* const table) { for (size_t i = 0; i < 256; i++) { m_table[i] = table[i]; } }
voidoperator()(uchar& p, constint * position)const { // Perform a simple operation p = m_table[p]; } };
// forEach use multiple processors, very fast Mat& ScanImageAndReduce_forEach(Mat& I, const uchar* const table) { I.forEach<uchar>(ForEachOperator(table)); return I; }
forEach with lambda
1 2 3 4 5 6 7 8 9 10 11 12
// forEach lambda use multiple processors, very fast (lambda slower than ForEachOperator) Mat& ScanImageAndReduce_forEach_with_lambda(Mat& I, const uchar* const table) { I.forEach<uchar> ( [=](uchar &p, constint * position) -> void { p = table[p]; } ); return I; }
time cost
no foreach
[1 Cptr ] times=5000, total_cost=988 ms, avg_cost=0.1976 ms
[1 Cptr2 ] times=5000, total_cost=1704 ms, avg_cost=0.3408 ms
[2 atRandom] times=5000, total_cost=9611 ms, avg_cost=1.9222 ms
[3 Iterator] times=5000, total_cost=20195 ms, avg_cost=4.039 ms
[4 LUT ] times=5000, total_cost=899 ms, avg_cost=0.1798 ms
[1 Cptr ] times=10000, total_cost=2425 ms, avg_cost=0.2425 ms
[1 Cptr2 ] times=10000, total_cost=3391 ms, avg_cost=0.3391 ms
[2 atRandom] times=10000, total_cost=20024 ms, avg_cost=2.0024 ms
[3 Iterator] times=10000, total_cost=39980 ms, avg_cost=3.998 ms
[4 LUT ] times=10000, total_cost=103 ms, avg_cost=0.0103 ms
foreach
[5 forEach ] times=200000, total_cost=199 ms, avg_cost=0.000995 ms
[5 forEach lambda] times=200000, total_cost=521 ms, avg_cost=0.002605 ms
[5 forEach ] times=20000, total_cost=17 ms, avg_cost=0.00085 ms
[5 forEach lambda] times=20000, total_cost=23 ms, avg_cost=0.00115 ms
# import the necessary packages import matplotlib.pyplot as plt import cv2 print(cv2.__version__)
%matplotlib inline
3.4.2
1 2 3 4 5 6
# load the original image, convert it to grayscale, and display # it inline image = cv2.imread("cat.jpg") image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) print(image.shape) #plt.imshow(image, cmap="gray")
(360, 480)
1
%load_ext cython
The cython extension is already loaded. To reload it, use:
%reload_ext cython
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
%%cython -a def threshold_python(T, image): # grab the image dimensions h = image.shape[0] w = image.shape[1] # loop over the image, pixel by pixel for y in range(0, h): for x in range(0, w): # threshold the pixel image[y, x] = 255 if image[y, x] >= T else 0 # return the thresholded image return image
1
%timeit threshold_python(5, image)
263 ms ± 20.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%%cython -a import cython @cython.boundscheck(False) cpdef unsigned char[:, :] threshold_cython(int T, unsigned char [:, :] image): # set the variable extension types cdef int x, y, w, h # grab the image dimensions h = image.shape[0] w = image.shape[1] # loop over the image for y in range(0, h): for x in range(0, w): # threshold the pixel image[y, x] = 255 if image[y, x] >= T else 0 # return the thresholded image return image
numba
1
%timeit threshold_cython(5, image)
150 µs ± 7.14 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
from numba import njit
@njit defthreshold_njit(T, image): # grab the image dimensions h = image.shape[0] w = image.shape[1] # loop over the image, pixel by pixel for y inrange(0, h): for x inrange(0, w): # threshold the pixel image[y, x] = 255if image[y, x] >= T else0 # return the thresholded image return image
1
%timeit threshold_njit(5, image)
43.5 µs ± 142 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)