Guide Matplot (skimage/ PIL Image) 1 2 3 4 5 6 7 8 9 import matplotlib.pyplot as pltimport matplotlib.image as imgimage = img.imread("images/cat.jpg" ) print image.shape print image[:5 ,:5 ,0 ]plt.imshow(image)
(360, 480, 3)
[[26 27 25 28 30]
[26 27 25 26 28]
[26 26 26 26 27]
[27 26 27 28 29]
[29 27 26 26 29]]
PIL.Image 1 2 3 4 5 6 7 8 9 import matplotlib.pyplot as pltfrom PIL import Imageimage = ("images/cat.jpg" ) print (image)plt.imshow(image)
skimage 1 2 import skimageimage =
OpenCV 1 2 3 4 5 6 7 8 import cv2image = cv2.imread("images/cat.jpg" ) print image.shape print image[:5 ,:5 ,0 ]plt.imshow(image)
(360, 480, 3)
[[49 50 47 48 50]
[51 52 48 48 50]
[51 51 49 48 49]
[50 49 49 48 49]
[52 50 49 48 49]]
The colors of our image are clearly wrong! Why is this?
The answer lies as a caveat with OpenCV.OpenCV represents RGB images as multi-dimensional NumPy arrays…but in reverse order! This means that OpenCV images are actually represented in BGR order rather than RGB !
1 2 3 4 5 6 7 import cv2image = cv2.imread("images/cat.jpg" ) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) plt.axis("off" ) plt.imshow(rgb_image)
Matplot VS. OpenCV 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 import numpy as npimport matplotlib.pyplot as pltimport matplotlib.image as imgimage1 = img.imread("images/cat.jpg" ) import cv2image = cv2.imread("images/cat.jpg" ) image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) print image1.dtypeprint image1[:5 ,:5 ,0 ] print print image2.dtypeprint image2[:5 ,:5 ,0 ] equal_count = np.sum ( np.equal(image1[:,:,:],image2[:,:,:]) ) print equal_countprint equal_count == 360 *480 *3
[[26 27 25 28 30]
[26 27 25 26 28]
[26 26 26 26 27]
[27 26 27 28 29]
[29 27 26 26 29]]
[[26 27 25 28 30]
[26 27 25 26 28]
[26 26 26 26 27]
[27 26 27 28 29]
[29 27 26 26 29]]
loads data in a normalized form (0-1)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 import numpy as npimport matplotlib.pyplot as plt%matplotlib inline plt.rcParams['image.interpolation' ] = 'nearest' import syscaffe_root = '../' sys.path.insert(0 , caffe_root + 'python' ) import caffeimage = + 'examples/images/cat.jpg' ) print image.shape,image.dtype print image[:5 ,:5 ,0 ]plt.figure() plt.imshow(image) image2 = + 'examples/images/cat.jpg' ,color=False ) print image2.shape gray_image2 = image2.squeeze() print gray_image2.shape,gray_image2.dtype print gray_image2[:5 ,:5 ]plt.figure() plt.imshow(gray_image2) image3 = + 'examples/images/cat_gray.jpg' ,color=False ) print image3.shape gray_image3 = image3.squeeze() print gray_image3.shape,gray_image3.dtype print gray_image3[:5 ,:5 ]plt.figure() plt.imshow(gray_image3)
(360, 480, 3) float32
[[ 0.10196079 0.10588235 0.09803922 0.10980392 0.11764706]
[ 0.10196079 0.10588235 0.09803922 0.10196079 0.10980392]
[ 0.10196079 0.10196079 0.10196079 0.10196079 0.10588235]
[ 0.10588235 0.10196079 0.10588235 0.10980392 0.11372549]
[ 0.11372549 0.10588235 0.10196079 0.10196079 0.11372549]]
(360, 480, 1)
(360, 480) float32
[[ 0.19543412 0.19935569 0.18842432 0.19120707 0.1990502 ]
[ 0.19599961 0.19992118 0.19151255 0.19234589 0.20018902]
[ 0.19599961 0.19599961 0.19543412 0.19234589 0.19626746]
[ 0.19935569 0.19543412 0.19626746 0.19120707 0.19512863]
[ 0.20719883 0.19935569 0.19543412 0.19234589 0.19512863]]
(360, 480, 1)
(360, 480) float32
[[ 0.10196079 0.10588235 0.09803922 0.10980392 0.11372549]
[ 0.10196079 0.10588235 0.09803922 0.10196079 0.10980392]
[ 0.10196079 0.10588235 0.10196079 0.10196079 0.10588235]
[ 0.10588235 0.10196079 0.10588235 0.10980392 0.11372549]
[ 0.11764706 0.10196079 0.10196079 0.10588235 0.10980392]]
for Network input blob(m,c,h,w):
caffe Network default use BGR image format just as OpenCV format .
caffe mean files use BGR ordering, which is calculated from trainning images instead of test images. mu = np.array([104, 117, 123] # BGR
pixel range in [0,255] with dtype float32.
(m,c,h,w), BGR order,[0,255] range,float32
input image: (h,w,c),RGB,[0,1],float32
transformed image: (c,h,w), BGR,[0,255] float32
Note that the mean subtraction is always carried out before scaling.
transformer.set_transpose(‘data’, (2,0,1)) #(h,w,c)->(c,h,w)
transformer.set_channel_swap(‘data’, (2,1,0)) # RGB->BGR
transformer.set_raw_scale(‘data’, 255) # [0,1]->[0,255] float32
transformer.set_mean(‘data’, mu) # subtract BGR
keep in mind that the Transformer is only required when using a deploy.prototxt -like network definition, so without the Data Layer. When using a Data Layer, things get easier to understand.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 import numpy as npimport matplotlib.pyplot as pltimport syscaffe_root = '../' sys.path.insert(0 , caffe_root + 'python' ) import caffeimage = + 'examples/images/cat.jpg' ) print image.shape,image.dtype print image[:5 ,:5 ,0 ]mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy' ) mu = mu.mean(1 ).mean(1 ) data_shape = (10 , 3 , 227 , 227 ) transformer ={'data' : data_shape}) transformer.set_transpose('data' , (2 ,0 ,1 )) transformer.set_channel_swap('data' , (2 ,1 ,0 )) transformer.set_raw_scale('data' , 255 ) transformer.set_mean('data' , mu) transformed_image = transformer.preprocess('data' , image) print print 'original image: ' ,image.shape,image.dtype print 'transform image: ' ,transformed_image.shape,transformed_image.dtype print transformed_image[0 ,:5 ,:5 ]
(360, 480, 3) float32
[[ 0.10196079 0.10588235 0.09803922 0.10980392 0.11764706]
[ 0.10196079 0.10588235 0.09803922 0.10196079 0.10980392]
[ 0.10196079 0.10196079 0.10196079 0.10196079 0.10588235]
[ 0.10588235 0.10196079 0.10588235 0.10980392 0.11372549]
[ 0.11372549 0.10588235 0.10196079 0.10196079 0.11372549]]
original image: (360, 480, 3) float32
transform image: (3, 227, 227) float32
[[-53.86381531 -56.23903656 -53.54626465 -53.14715195 -51.32625961]
[-52.93947601 -55.71855164 -54.00423813 -54.76469803 -52.88771057]
[-53.89373398 -55.67879486 -55.4278717 -55.22265625 -53.47174454]
[-50.98455811 -51.3506012 -54.06866074 -52.09104156 -52.94168854]
[-49.92769241 -49.85874176 -52.08575439 -52.50840759 -51.3900528 ]]
input image: cv2.imread: (h,w,c),BGR,[0,255],float32
transformed image: (c,h,w), BGR order,[0,255] float32
Note that the mean subtraction is always carried out before scaling.
transformer.set_transpose(‘data’, (2,0,1)) #(h,w,c)->(c,h,w)
transformer.set_mean(‘data’, mu) # subtract BGR
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 import cv2image = cv2.imread("test/cat.jpg" ) data_shape = (10 , 3 , 227 , 227 ) transformer ={'data' : data_shape}) transformer.set_transpose('data' , (2 ,0 ,1 )) transformer.set_mean('data' , mu) transformed_image = transformer.preprocess('data' , image) print print 'original image: ' ,image.shape,image.dtype print 'transform image: ' ,transformed_image.shape,transformed_image.dtype print transformed_image[0 ,:5 ,:5 ]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 def deprocess_net_image (image ): image = image.copy() image = image[::-1 ] image = image.transpose(1 , 2 , 0 ) image += [123 , 117 , 104 ] image[image < 0 ], image[image > 255 ] = 0 , 255 image = np.round (image) image = np.require(image, dtype=np.uint8) return image image = deprocess_net_image(transformed_image) print image.shape,image.dtype print image[:5 ,:5 ,0 ]plt.imshow(image)
(227, 227, 3) uint8
[[27 27 29 29 30]
[26 26 28 27 28]
[27 27 27 26 28]
[27 28 25 28 27]
[26 29 28 28 28]]
1 2 3 4 5 6 7 8 9 10 import numpy as npdata = np.zeros((2 ,3 ,4 ,4 )) print dataimage = np.arange(48 ).reshape(3 ,4 ,4 ) print print imageprint 'set image to data' data[...] = image print data
[[[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]]
[[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]
[[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]
[ 0. 0. 0. 0.]]]]
[[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]]
[[16 17 18 19]
[20 21 22 23]
[24 25 26 27]
[28 29 30 31]]
[[32 33 34 35]
[36 37 38 39]
[40 41 42 43]
[44 45 46 47]]]
set image to data
[[[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]
[ 12. 13. 14. 15.]]
[[ 16. 17. 18. 19.]
[ 20. 21. 22. 23.]
[ 24. 25. 26. 27.]
[ 28. 29. 30. 31.]]
[[ 32. 33. 34. 35.]
[ 36. 37. 38. 39.]
[ 40. 41. 42. 43.]
[ 44. 45. 46. 47.]]]
[[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]
[ 12. 13. 14. 15.]]
[[ 16. 17. 18. 19.]
[ 20. 21. 22. 23.]
[ 24. 25. 26. 27.]
[ 28. 29. 30. 31.]]
[[ 32. 33. 34. 35.]
[ 36. 37. 38. 39.]
[ 40. 41. 42. 43.]
[ 44. 45. 46. 47.]]]] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 import os import sys import cv2 import numpy as np # Make sure that caffe is on the python path: caffe_root = './' os.chdir(caffe_root) sys.path.insert(0, os.path.join(caffe_root, 'python')) import caffe # transformer + python code data_shape = [1,3,512,512] transformer ={'data':data_shape}) # resize transformer.set_transpose('data', (2, 0, 1)) # hwc ===> chw transformer.set_channel_swap('data', (2, 1, 0)) # rgb===>bgr transformer.set_raw_scale('data', 255) # [0-1]===> [0,255] transformer.set_mean('data', np.array([104, 117, 123])) # bgr mean pixel image_file = "./images/1.png" print("image_file=", image_file) image = # hwc, rgb, 0-1 print("image.shape=", image.shape) transformed_image = transformer.preprocess('data', image) # print("transformed_image.shape=", transformed_image.shape) # 3,512,512 b,g,r = transformed_image print(b.shape) # 512,512 print(g.shape) print(r.shape) print("") print(transformed_image[:,:5,:5])
('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
(512, 512)
(512, 512)
(512, 512)
[[[ -98. -98. -98. -98. -98. ]
[ -98. -98. -98. -98. -98. ]
[ -23.96776581 -28.58105469 -31.359375 -25.08592987 -28.90721893]
[ -8.21874237 -12.71092987 -15.46875 -15.27832031 -10.57226562]
[ -7.75 -12.12499237 -15. -15. -10.984375 ]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96776581 -48.58105469 -51.359375 -45.08592987 -48.90721893]
[ -26.21874237 -30.71092987 -33.46875 -33.27832031 -33.57226562]
[ -24.75 -29.12499237 -32. -32. -31.984375 ]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.96776581 -57.58105469 -60.359375 -54.08592987 -57.90721893]
[ -40.21874237 -44.71092987 -47.46875 -47.27832031 -44.572258 ]
[ -40.75 -45.12499237 -48. -48. -47.984375 ]]]
python code 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 print(image.shape) # hwc,rgb,0-1 (1080, 1920, 3) print(image.dtype) # float32 # resize image = cv2.resize(image, (512,512)) print("image resize = ",image.shape) # (512, 512, 3) # hwc,rgb ===> chw, bgr r,g,b = image[:,:,0],image[:,:,1],image[:,:,2] print(b.shape) # (512, 512) print(g.shape) # (512, 512) print(r.shape) # (512, 512) bgr = np.zeros([3,b.shape[0],b.shape[1]]) print(bgr.shape) bgr[0,:,:] = b bgr[1,:,:] = g bgr[2,:,:] = r # 0-1 ===>0-255 bgr = bgr *255. # -mean print("") bgr[0] -= 104 bgr[1] -= 117 bgr[2] -= 123 print(bgr[:,:5,:5])
(1080, 1920, 3)
('image resize = ', (512, 512, 3))
(512, 512)
(512, 512)
(512, 512)
(3, 512, 512)
[[[ -97.99999988 -97.99999988 -97.99999988 -97.99999988 -97.99999988]
[ -97.99999988 -97.99999988 -97.99999988 -97.99999988 -97.99999988]
[ -23.9677673 -28.58105415 -31.35937387 -25.0859333 -28.90722105]
[ -8.21874478 -12.71093214 -15.46874815 -15.27831757 -10.5722701 ]
[ -7.74999434 -12.12499598 -14.99999771 -14.99999771 -10.98437318]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96776688 -48.58105373 -51.35937345 -45.08593288 -48.90722823]
[ -26.21874449 -30.71093184 -33.46874785 -33.27831727 -33.5722695 ]
[ -24.7499941 -29.12499574 -31.99999747 -31.99999747 -31.98437271]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.9677667 -57.58105356 -60.35937327 -54.0859327 -57.90722805]
[ -40.21874401 -44.71093136 -47.46874738 -47.2783168 -44.5722692 ]
[ -40.7499935 -45.12499514 -47.99999687 -47.99999687 -47.98437211]]]
cv2.imread 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 data_shape = [1 ,3 ,512 ,512 ] transformer ={'data' :data_shape}) transformer.set_transpose('data' , (2 , 0 , 1 )) transformer.set_mean('data' , np.array([104 , 117 , 123 ])) image_file = "./images/1.png" print ("image_file=" , image_file)image = cv2.imread(image_file) print ("image.shape=" , image.shape)transformed_image = transformer.preprocess('data' , image) print ("transformed_image.shape=" , transformed_image.shape) b,g,r = transformed_image print (b.shape) print (g.shape)print (r.shape)print ("" )print (transformed_image[:,:5 ,:5 ])
('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
(512, 512)
(512, 512)
(512, 512)
[[[ -98. -98. -98. -98. -98. ]
[ -98. -98. -98. -98. -98. ]
[ -23.96777344 -28.58105469 -31.359375 -25.0859375 -28.90722656]
[ -8.21875 -12.7109375 -15.46875 -15.27832031 -10.57226562]
[ -7.75 -12.125 -15. -15. -10.984375 ]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96777344 -48.58105469 -51.359375 -45.0859375 -48.90722656]
[ -26.21875 -30.7109375 -33.46875 -33.27832031 -33.57226562]
[ -24.75 -29.125 -32. -32. -31.984375 ]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.96777344 -57.58105469 -60.35937119 -54.0859375 -57.90722656]
[ -40.21875 -44.7109375 -47.46875 -47.27832031 -44.57226562]
[ -40.75 -45.125 -48. -48. -47.984375 ]]]
python code 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 print(image.shape) # hwc,bgr,0-255 (1080, 1920, 3) print(image.dtype) # uint8 # int8 ===>float32 image = image.astype('float32') # key steps print(image.dtype) # float32 # resize image = cv2.resize(image, (512,512)) print("image resize = ",image.shape) # (512, 512, 3) print(image.dtype) # float32 # hwc ===> chw b,g,r = image[:,:,0],image[:,:,1],image[:,:,2] print(b.shape) # (512, 512) print(g.shape) # (512, 512) print(r.shape) # (512, 512) bgr = np.zeros([3,b.shape[0],b.shape[1]]) print(bgr.shape) # -mean b -= 104 g -= 117 r -= 123 bgr[0,:,:] = b bgr[1,:,:] = g bgr[2,:,:] = r print(bgr[:,:5,:5])
python code v2 1 2 3 4 5 6 7 8 9 10 11 12 image = cv2.imread(filepath) # hwc, bgr,0-255 print(image.dtype) # uint8 image = image.astype('float32') # key steps image = cv2.resize(image, (512,512)) print("image resize = ",image.shape) # (512, 512, 3) print(image.dtype) # float32 image -= np.array((104.00698793,116.66876762,122.67891434)) # bgr mean image = image.transpose((2,0,1)) # hwc ===>chw print(image[:,:5,:5])
(1080, 1920, 3)
('image resize = ', (512, 512, 3))
(512, 512)
(512, 512)
(512, 512)
(3, 512, 512)
[[[ -98. -98. -98. -98. -98. ]
[ -98. -98. -98. -98. -98. ]
[ -23.96777344 -28.58105469 -31.359375 -25.0859375 -28.90722656]
[ -8.21875 -12.7109375 -15.46875 -15.27832031 -10.57226562]
[ -7.75 -12.125 -15. -15. -10.984375 ]]
[[-117. -117. -117. -117. -117. ]
[-117. -117. -117. -117. -117. ]
[ -43.96777344 -48.58105469 -51.359375 -45.0859375 -48.90722656]
[ -26.21875 -30.7109375 -33.46875 -33.27832031 -33.57226562]
[ -24.75 -29.125 -32. -32. -31.984375 ]]
[[-123. -123. -123. -123. -123. ]
[-123. -123. -123. -123. -123. ]
[ -52.96777344 -57.58105469 -60.359375 -54.0859375 -57.90722656]
[ -40.21875 -44.7109375 -47.46875 -47.27832031 -44.57226562]
[ -40.75 -45.125 -48. -48. -47.984375 ]]]
Matplot.imread : dims: (height,width,channels),order: RGB,range: [0,255] dtype: uint8, plot
OpenCV.imread : dims: (height,width,channels),order: BGR ,range: [0,255] dtype: uint8, plot : dims: (height,width,channels),order: RGB,range: [0,1] dtype: float32 (caffe_io_image = matplot_image/255.0) ,plot
caffe Network Input(Transformer) : dims: (m,c,h,w), order: BGR , range [0,255],dtype: float32, PLOT ERROR