0%

matplot RGB vs opencv BGR vs caffe images

Guide

Matplot (skimage/ PIL Image)

1
2
3
4
5
6
7
8
9
# Matplot: dims: (height,width,channels),order: RGB,range: [0,255] dtype: uint8
import matplotlib.pyplot as plt
import matplotlib.image as img
image = img.imread("images/cat.jpg")
print image.shape # (360, 480, 3)
print image[:5,:5,0]
#plt.axis("off")
plt.imshow(image)
plt.show()
(360, 480, 3)
[[26 27 25 28 30]
 [26 27 25 26 28]
 [26 26 26 26 27]
 [27 26 27 28 29]
 [29 27 26 26 29]]

png

PIL.Image

1
2
3
4
5
6
7
8
9
# PIL Image.open: dims: hwc,order: RGB, ??( range: [0,255] dtype: uint8)??

import matplotlib.pyplot as plt
from PIL import Image
image = Image.open("images/cat.jpg")
print(image)
# <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=480x360 at 0x7F258E0B8410>
plt.imshow(image)
plt.show()

png

skimage

1
2
import skimage
image = skimage.io.imread(image_filepath) # RGB (608, 606, 3)

OpenCV

1
2
3
4
5
6
7
8
# OpenCV: dims: (height,width,channels),order: BGR,range: [0,255] dtype: uint8
import cv2
image = cv2.imread("images/cat.jpg")
print image.shape # (360, 480, 3)
print image[:5,:5,0]
#plt.axis("off")
plt.imshow(image)
plt.show()
(360, 480, 3)
[[49 50 47 48 50]
 [51 52 48 48 50]
 [51 51 49 48 49]
 [50 49 49 48 49]
 [52 50 49 48 49]]

png

The colors of our image are clearly wrong! Why is this?

The answer lies as a caveat with OpenCV.OpenCV represents RGB images as multi-dimensional NumPy arrays…but in reverse order! This means that OpenCV images are actually represented in BGR order rather than RGB!

1
2
3
4
5
6
7
import cv2
image = cv2.imread("images/cat.jpg")
# convert from BGR to RGB
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.axis("off")
plt.imshow(rgb_image)
plt.show()

png

Matplot VS. OpenCV

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
image1 = img.imread("images/cat.jpg") # rgb

import cv2
image = cv2.imread("images/cat.jpg")
# convert from BGR to RGB
image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # rgb

#image1 and image2 are same at all.

print image1.dtype
print image1[:5,:5,0]

print
print image2.dtype
print image2[:5,:5,0]

equal_count = np.sum( np.equal(image1[:,:,:],image2[:,:,:]) )
print equal_count
print equal_count == 360*480*3
uint8
[[26 27 25 28 30]
 [26 27 25 26 28]
 [26 26 26 26 27]
 [27 26 27 28 29]
 [29 27 26 26 29]]

uint8
[[26 27 25 28 30]
 [26 27 25 26 28]
 [26 26 26 26 27]
 [27 26 27 28 29]
 [29 27 26 26 29]]
518400
True

caffe.io.load_image

caffe.io.load_image loads data in a normalized form (0-1)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# caffe.io.load_image: dims: (height,width,channels),order: RGB,range: [0,1] dtype: float32
# matplot: caffe_image = matplot_image/255.0

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# configure plotting
#plt.rcParams['figure.figsize'] = (10, 10)
plt.rcParams['image.interpolation'] = 'nearest'
#plt.rcParams['image.cmap'] = 'gray'

import sys
caffe_root = '../' # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')

import caffe
#======================================================================
# load image
#======================================================================
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
print image.shape,image.dtype # (360, 480, 3) float32
print image[:5,:5,0]

plt.figure()
plt.imshow(image) # (360, 480, 3) RGB

#======================================================================
# load color image with color=False
#======================================================================
image2 = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg',color=False)
print image2.shape #(360, 480, 1)
gray_image2 = image2.squeeze()
print gray_image2.shape,gray_image2.dtype # (360, 480) float32
print gray_image2[:5,:5]

plt.figure()
plt.imshow(gray_image2) # (360, 480) gray


#======================================================================
# load color image with color=False
#======================================================================
image3 = caffe.io.load_image(caffe_root + 'examples/images/cat_gray.jpg',color=False)
print image3.shape #(360, 480, 1)
gray_image3 = image3.squeeze()
print gray_image3.shape,gray_image3.dtype # (360, 480) float32
print gray_image3[:5,:5]

plt.figure()
plt.imshow(gray_image3) # (360, 480) gray

plt.show()
(360, 480, 3) float32
[[ 0.10196079  0.10588235  0.09803922  0.10980392  0.11764706]
 [ 0.10196079  0.10588235  0.09803922  0.10196079  0.10980392]
 [ 0.10196079  0.10196079  0.10196079  0.10196079  0.10588235]
 [ 0.10588235  0.10196079  0.10588235  0.10980392  0.11372549]
 [ 0.11372549  0.10588235  0.10196079  0.10196079  0.11372549]]
(360, 480, 1)
(360, 480) float32
[[ 0.19543412  0.19935569  0.18842432  0.19120707  0.1990502 ]
 [ 0.19599961  0.19992118  0.19151255  0.19234589  0.20018902]
 [ 0.19599961  0.19599961  0.19543412  0.19234589  0.19626746]
 [ 0.19935569  0.19543412  0.19626746  0.19120707  0.19512863]
 [ 0.20719883  0.19935569  0.19543412  0.19234589  0.19512863]]
(360, 480, 1)
(360, 480) float32
[[ 0.10196079  0.10588235  0.09803922  0.10980392  0.11372549]
 [ 0.10196079  0.10588235  0.09803922  0.10196079  0.10980392]
 [ 0.10196079  0.10588235  0.10196079  0.10196079  0.10588235]
 [ 0.10588235  0.10196079  0.10588235  0.10980392  0.11372549]
 [ 0.11764706  0.10196079  0.10196079  0.10588235  0.10980392]]

png

png

png

caffe.io.Transformer

caffe.io.Transformer for Network input blob(m,c,h,w):

  • caffe Network default use BGR image format just as OpenCV format.
  • caffe mean files use BGR ordering, which is calculated from trainning images instead of test images. mu = np.array([104, 117, 123] # BGR
  • pixel range in [0,255] with dtype float32.
  • (m,c,h,w), BGR order,[0,255] range,float32

caffe.io.load_image

caffe.io.Transformer:

  • input image: caffe.io.load_image: (h,w,c),RGB,[0,1],float32
  • transformed image: (c,h,w), BGR,[0,255] float32

caffe.io.Transformer steps:

Note that the mean subtraction is always carried out before scaling.

  • transformer.set_transpose(‘data’, (2,0,1)) #(h,w,c)->(c,h,w)
  • transformer.set_channel_swap(‘data’, (2,1,0)) # RGB->BGR
  • transformer.set_raw_scale(‘data’, 255) # [0,1]->[0,255] float32
  • transformer.set_mean(‘data’, mu) # subtract BGR

keep in mind that the Transformer is only required when using a deploy.prototxt-like network definition, so without the Data Layer. When using a Data Layer, things get easier to understand.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
import matplotlib.pyplot as plt

import sys
caffe_root = '../' # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')

import caffe
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
# caffe.io.load_image: dims: (height,width,channels),order: RGB,range: [0,1] dtype: float32
print image.shape,image.dtype # (360, 480, 3) float32
print image[:5,:5,0]

#plt.imshow(image)
#plt.show()

mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1) # # BGR

data_shape = (10, 3, 227, 227)
transformer = caffe.io.Transformer({'data': data_shape})

transformer.set_transpose('data', (2,0,1)) # h,w,c->c,h,w(012->201) move image channels to outermost dimension
transformer.set_channel_swap('data', (2,1,0)) # swap channels from RGB to BGR
transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255]
transformer.set_mean('data', mu) # subtract the dataset-mean value(BGR) in each channel

transformed_image = transformer.preprocess('data', image)
print
print 'original image: ',image.shape,image.dtype # (360, 480, 3) float32
print 'transform image: ',transformed_image.shape,transformed_image.dtype #(3, 227, 227) float32
print transformed_image[0,:5,:5]

# By default, using CaffeNet, your net.blobs['data'].data.shape == (10, 3, 227, 227).
# This is because 10 random 227x227 crops are supposed to be extracted from a 256x256 image
# and passed through the net.

# net.blobs['data'].reshape(50,3,227,227) # we can change network input mini-batch to 50 as we like
# net.blobs['data'].data[...] = transformed_image # --->(50,3,227,227) 50 images
(360, 480, 3) float32
[[ 0.10196079  0.10588235  0.09803922  0.10980392  0.11764706]
 [ 0.10196079  0.10588235  0.09803922  0.10196079  0.10980392]
 [ 0.10196079  0.10196079  0.10196079  0.10196079  0.10588235]
 [ 0.10588235  0.10196079  0.10588235  0.10980392  0.11372549]
 [ 0.11372549  0.10588235  0.10196079  0.10196079  0.11372549]]

original  image:  (360, 480, 3) float32
transform image:  (3, 227, 227) float32
[[-53.86381531 -56.23903656 -53.54626465 -53.14715195 -51.32625961]
 [-52.93947601 -55.71855164 -54.00423813 -54.76469803 -52.88771057]
 [-53.89373398 -55.67879486 -55.4278717  -55.22265625 -53.47174454]
 [-50.98455811 -51.3506012  -54.06866074 -52.09104156 -52.94168854]
 [-49.92769241 -49.85874176 -52.08575439 -52.50840759 -51.3900528 ]]

cv2.imread

caffe.io.Transformer:

  • input image: cv2.imread: (h,w,c),BGR,[0,255],float32
  • transformed image: (c,h,w), BGR order,[0,255] float32

caffe.io.Transformer steps:

Note that the mean subtraction is always carried out before scaling.

  • transformer.set_transpose(‘data’, (2,0,1)) #(h,w,c)->(c,h,w)
  • transformer.set_mean(‘data’, mu) # subtract BGR
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import cv2

image = cv2.imread("test/cat.jpg")

data_shape = (10, 3, 227, 227)

transformer = caffe.io.Transformer({'data': data_shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu)

transformed_image = transformer.preprocess('data', image)
print
print 'original image: ',image.shape,image.dtype # (360, 480, 3) float32
print 'transform image: ',transformed_image.shape,transformed_image.dtype #(3, 227, 227) float32
print transformed_image[0,:5,:5]

# By default, using CaffeNet, your net.blobs['data'].data.shape == (10, 3, 227, 227).
# This is because 10 random 227x227 crops are supposed to be extracted from a 256x256 image
# and passed through the net.

# net.blobs['data'].reshape(50,3,227,227) # we can change network input mini-batch to 50 as we like
# net.blobs['data'].data[...] = transformed_image # --->(50,3,227,227) 50

deprocess transformed_image

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# Helper function for deprocessing preprocessed images, e.g., for display.
def deprocess_net_image(image):
# [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]

# input: (c,h,w), BGR,[lower,upper],float32
# output: (h,w,c), RGB,[0,255], uint8
image = image.copy() # don't modify destructively
image = image[::-1] # BGR -> RGB
image = image.transpose(1, 2, 0) # CHW -> HWC
image += [123, 117, 104] # (approximately) undo mean subtraction RGB

# clamp values in [0, 255]
image[image < 0], image[image > 255] = 0, 255

# round and cast from float32 to uint8
image = np.round(image)
image = np.require(image, dtype=np.uint8)

return image

image = deprocess_net_image(transformed_image)
#(h,w,c), RGB,[0,255], uint8

print image.shape,image.dtype # (227, 227, 3) uint8
print image[:5,:5,0]
plt.imshow(image)
plt.show()
(227, 227, 3) uint8
[[27 27 29 29 30]
 [26 26 28 27 28]
 [27 27 27 26 28]
 [27 28 25 28 27]
 [26 29 28 28 28]]

png

set 3-dim image to 4-dim input blob data

1
2
3
4
5
6
7
8
9
10
import numpy as np
data = np.zeros((2,3,4,4))
print data
image = np.arange(48).reshape(3,4,4)
print
print image

print 'set image to data'
data[...] = image # auto broadcasting from 3-dims to 4-dims
print data
[[[[ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]]

  [[ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]]

  [[ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]]]


 [[[ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]]

  [[ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]]

  [[ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]
   [ 0.  0.  0.  0.]]]]

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]
  [12 13 14 15]]

 [[16 17 18 19]
  [20 21 22 23]
  [24 25 26 27]
  [28 29 30 31]]

 [[32 33 34 35]
  [36 37 38 39]
  [40 41 42 43]
  [44 45 46 47]]]
set image to data
[[[[  0.   1.   2.   3.]
   [  4.   5.   6.   7.]
   [  8.   9.  10.  11.]
   [ 12.  13.  14.  15.]]

  [[ 16.  17.  18.  19.]
   [ 20.  21.  22.  23.]
   [ 24.  25.  26.  27.]
   [ 28.  29.  30.  31.]]

  [[ 32.  33.  34.  35.]
   [ 36.  37.  38.  39.]
   [ 40.  41.  42.  43.]
   [ 44.  45.  46.  47.]]]


 [[[  0.   1.   2.   3.]
   [  4.   5.   6.   7.]
   [  8.   9.  10.  11.]
   [ 12.  13.  14.  15.]]

  [[ 16.  17.  18.  19.]
   [ 20.  21.  22.  23.]
   [ 24.  25.  26.  27.]
   [ 28.  29.  30.  31.]]

  [[ 32.  33.  34.  35.]
   [ 36.  37.  38.  39.]
   [ 40.  41.  42.  43.]
   [ 44.  45.  46.  47.]]]]

transformer vs. python code

caffe.io.load_image

transformer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
import sys
import cv2
import numpy as np
# Make sure that caffe is on the python path:
caffe_root = './'
os.chdir(caffe_root)
sys.path.insert(0, os.path.join(caffe_root, 'python'))
import caffe


# caffe.io.load_image: transformer + python code
data_shape = [1,3,512,512]

transformer = caffe.io.Transformer({'data':data_shape}) # resize
transformer.set_transpose('data', (2, 0, 1)) # hwc ===> chw
transformer.set_channel_swap('data', (2, 1, 0)) # rgb===>bgr
transformer.set_raw_scale('data', 255) # [0-1]===> [0,255]
transformer.set_mean('data', np.array([104, 117, 123])) # bgr mean pixel

image_file = "./images/1.png"
print("image_file=", image_file)
image = caffe.io.load_image(image_file) # hwc, rgb, 0-1
print("image.shape=", image.shape)

transformed_image = transformer.preprocess('data', image) #
print("transformed_image.shape=", transformed_image.shape) # 3,512,512
b,g,r = transformed_image
print(b.shape) # 512,512
print(g.shape)
print(r.shape)

print("")
print(transformed_image[:,:5,:5])

output

('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
(512, 512)
(512, 512)
(512, 512)

[[[ -98.          -98.          -98.          -98.          -98.        ]
  [ -98.          -98.          -98.          -98.          -98.        ]
  [ -23.96776581  -28.58105469  -31.359375    -25.08592987  -28.90721893]
  [  -8.21874237  -12.71092987  -15.46875     -15.27832031  -10.57226562]
  [  -7.75        -12.12499237  -15.          -15.          -10.984375  ]]

 [[-117.         -117.         -117.         -117.         -117.        ]
  [-117.         -117.         -117.         -117.         -117.        ]
  [ -43.96776581  -48.58105469  -51.359375    -45.08592987  -48.90721893]
  [ -26.21874237  -30.71092987  -33.46875     -33.27832031  -33.57226562]
  [ -24.75        -29.12499237  -32.          -32.          -31.984375  ]]

 [[-123.         -123.         -123.         -123.         -123.        ]
  [-123.         -123.         -123.         -123.         -123.        ]
  [ -52.96776581  -57.58105469  -60.359375    -54.08592987  -57.90721893]
  [ -40.21874237  -44.71092987  -47.46875     -47.27832031  -44.572258  ]
  [ -40.75        -45.12499237  -48.          -48.          -47.984375  ]]]
python code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
print(image.shape) # hwc,rgb,0-1   (1080, 1920, 3)
print(image.dtype) # float32

# resize
image = cv2.resize(image, (512,512))
print("image resize = ",image.shape) # (512, 512, 3)

# hwc,rgb ===> chw, bgr
r,g,b = image[:,:,0],image[:,:,1],image[:,:,2]

print(b.shape) # (512, 512)
print(g.shape) # (512, 512)
print(r.shape) # (512, 512)

bgr = np.zeros([3,b.shape[0],b.shape[1]])
print(bgr.shape)
bgr[0,:,:] = b
bgr[1,:,:] = g
bgr[2,:,:] = r

# 0-1 ===>0-255
bgr = bgr *255.

# -mean
print("")
bgr[0] -= 104
bgr[1] -= 117
bgr[2] -= 123
print(bgr[:,:5,:5])

output

(1080, 1920, 3)
float32
('image resize = ', (512, 512, 3))
float32
(512, 512)
(512, 512)
(512, 512)
(3, 512, 512)

[[[ -97.99999988  -97.99999988  -97.99999988  -97.99999988  -97.99999988]
  [ -97.99999988  -97.99999988  -97.99999988  -97.99999988  -97.99999988]
  [ -23.9677673   -28.58105415  -31.35937387  -25.0859333   -28.90722105]
  [  -8.21874478  -12.71093214  -15.46874815  -15.27831757  -10.5722701 ]
  [  -7.74999434  -12.12499598  -14.99999771  -14.99999771  -10.98437318]]

 [[-117.         -117.         -117.         -117.         -117.        ]
  [-117.         -117.         -117.         -117.         -117.        ]
  [ -43.96776688  -48.58105373  -51.35937345  -45.08593288  -48.90722823]
  [ -26.21874449  -30.71093184  -33.46874785  -33.27831727  -33.5722695 ]
  [ -24.7499941   -29.12499574  -31.99999747  -31.99999747  -31.98437271]]

 [[-123.         -123.         -123.         -123.         -123.        ]
  [-123.         -123.         -123.         -123.         -123.        ]
  [ -52.9677667   -57.58105356  -60.35937327  -54.0859327   -57.90722805]
  [ -40.21874401  -44.71093136  -47.46874738  -47.2783168   -44.5722692 ]
  [ -40.7499935   -45.12499514  -47.99999687  -47.99999687  -47.98437211]]]

cv2.imread

transformer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# cv2.imread: transformer  + python code
data_shape = [1,3,512,512]

transformer = caffe.io.Transformer({'data':data_shape}) # resize
transformer.set_transpose('data', (2, 0, 1)) # hwc ===> chw
#transformer.set_channel_swap('data', (2, 1, 0)) # rgb===>bgr
#transformer.set_raw_scale('data', 255) # [0-1]===> [0,255]
transformer.set_mean('data', np.array([104, 117, 123])) # bgr mean pixel

image_file = "./images/1.png"
print("image_file=", image_file)
image = cv2.imread(image_file) # hwc, bgr, 0-255
print("image.shape=", image.shape)

transformed_image = transformer.preprocess('data', image) #
print("transformed_image.shape=", transformed_image.shape) # 3,512,512
b,g,r = transformed_image
print(b.shape) # 512,512
print(g.shape)
print(r.shape)

print("")
print(transformed_image[:,:5,:5])

output

('image_file=', './images/1.png')
('image.shape=', (1080, 1920, 3))
('transformed_image.shape=', (3, 512, 512))
(512, 512)
(512, 512)
(512, 512)

[[[ -98.          -98.          -98.          -98.          -98.        ]
  [ -98.          -98.          -98.          -98.          -98.        ]
  [ -23.96777344  -28.58105469  -31.359375    -25.0859375   -28.90722656]
  [  -8.21875     -12.7109375   -15.46875     -15.27832031  -10.57226562]
  [  -7.75        -12.125       -15.          -15.          -10.984375  ]]

 [[-117.         -117.         -117.         -117.         -117.        ]
  [-117.         -117.         -117.         -117.         -117.        ]
  [ -43.96777344  -48.58105469  -51.359375    -45.0859375   -48.90722656]
  [ -26.21875     -30.7109375   -33.46875     -33.27832031  -33.57226562]
  [ -24.75        -29.125       -32.          -32.          -31.984375  ]]

 [[-123.         -123.         -123.         -123.         -123.        ]
  [-123.         -123.         -123.         -123.         -123.        ]
  [ -52.96777344  -57.58105469  -60.35937119  -54.0859375   -57.90722656]
  [ -40.21875     -44.7109375   -47.46875     -47.27832031  -44.57226562]
  [ -40.75        -45.125       -48.          -48.          -47.984375  ]]]
python code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
print(image.shape) # hwc,bgr,0-255   (1080, 1920, 3)
print(image.dtype) # uint8

# int8 ===>float32
image = image.astype('float32') # key steps
print(image.dtype) # float32

# resize
image = cv2.resize(image, (512,512))
print("image resize = ",image.shape) # (512, 512, 3)
print(image.dtype) # float32

# hwc ===> chw
b,g,r = image[:,:,0],image[:,:,1],image[:,:,2]

print(b.shape) # (512, 512)
print(g.shape) # (512, 512)
print(r.shape) # (512, 512)

bgr = np.zeros([3,b.shape[0],b.shape[1]])
print(bgr.shape)

# -mean
b -= 104
g -= 117
r -= 123

bgr[0,:,:] = b
bgr[1,:,:] = g
bgr[2,:,:] = r


print(bgr[:,:5,:5])
python code v2
1
2
3
4
5
6
7
8
9
10
11
12
image = cv2.imread(filepath) # hwc, bgr,0-255
print(image.dtype) # uint8

image = image.astype('float32') # key steps
image = cv2.resize(image, (512,512))
print("image resize = ",image.shape) # (512, 512, 3)
print(image.dtype) # float32

image -= np.array((104.00698793,116.66876762,122.67891434)) # bgr mean
image = image.transpose((2,0,1)) # hwc ===>chw

print(image[:,:5,:5])

output

(1080, 1920, 3)
uint8
float32
('image resize = ', (512, 512, 3))
float32
(512, 512)
(512, 512)
(512, 512)
(3, 512, 512)

[[[ -98.          -98.          -98.          -98.          -98.        ]
  [ -98.          -98.          -98.          -98.          -98.        ]
  [ -23.96777344  -28.58105469  -31.359375    -25.0859375   -28.90722656]
  [  -8.21875     -12.7109375   -15.46875     -15.27832031  -10.57226562]
  [  -7.75        -12.125       -15.          -15.          -10.984375  ]]

 [[-117.         -117.         -117.         -117.         -117.        ]
  [-117.         -117.         -117.         -117.         -117.        ]
  [ -43.96777344  -48.58105469  -51.359375    -45.0859375   -48.90722656]
  [ -26.21875     -30.7109375   -33.46875     -33.27832031  -33.57226562]
  [ -24.75        -29.125       -32.          -32.          -31.984375  ]]

 [[-123.         -123.         -123.         -123.         -123.        ]
  [-123.         -123.         -123.         -123.         -123.        ]
  [ -52.96777344  -57.58105469  -60.359375    -54.0859375   -57.90722656]
  [ -40.21875     -44.7109375   -47.46875     -47.27832031  -44.57226562]
  [ -40.75        -45.125       -48.          -48.          -47.984375  ]]]

Conclusions

  • Matplot.imread: dims: (height,width,channels),order: RGB,range: [0,255] dtype: uint8, plot
  • OpenCV.imread: dims: (height,width,channels),order: BGR,range: [0,255] dtype: uint8, plot
  • caffe.io.load_image: dims: (height,width,channels),order: RGB,range: [0,1] dtype: float32 (caffe_io_image = matplot_image/255.0) ,plot
  • caffe Network Input(Transformer): dims: (m,c,h,w), order: BGR, range [0,255],dtype: float32, PLOT ERROR

Reference

History

  • 20180816: created.