python numpy random shuffle and theano shuffle

Publish Date: 2018-08-07

Word Count: 974

Read Times: 6 Min

Read Count:

Examples of shuffle

"""
numpy.random.shuffle(x)
Modify a sequence in-place by shuffling its contents.

random.shuffle(list)只能对list进行随机打乱。

Parameters:
x : array_like
    The array or list to be shuffled.

Returns: None

This function only shuffles the array along the first index of a multi-dimensional array
（多维矩阵中，只对第一维（行）做打乱顺序操作）
"""
import numpy as np
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

np.random.shuffle(arr)
arr

array([1, 4, 7, 3, 0, 9, 5, 8, 2, 6])

arr = np.arange(9).reshape((3, 3))
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

# 对于矩阵，按照row进行打乱。
np.random.shuffle(arr)
arr

array([[3, 4, 5],
       [0, 1, 2],
       [6, 7, 8]])

# 对于矩阵，按照row进行打乱。
np.random.shuffle(arr)
arr

array([[3, 4, 5],
       [6, 7, 8],
       [0, 1, 2]])

# random.shuffle(list)
l = [i for i in xrange(0,10)]

import random
random.shuffle(l)
l

[2, 9, 5, 8, 6, 3, 7, 1, 4, 0]

random.shuffle(l)
l

[3, 9, 2, 1, 5, 7, 8, 4, 0, 6]

Shuffe two sequences at the same time

shuffle的状态依赖于random.seed,np.random.seed，可以使用time作为seed

random.seed(time.time())

2次shuffle的seed不一样，结果不一致

import random
import numpy as np
train_data = (np.array([[1,1],
                       [2,2],
                       [3,3],
                       [4,4]]), np.array([11,22,33,44]))
x = train_data[0]
y = train_data[1]

# shuffle x,y的时候，只要保证seed相同，那么shuffle之后的x,y对应元素的顺序保持一样
#np.random.seed(1)
np.random.shuffle(x)
print x

#np.random.seed(1)
np.random.shuffle(y)
print y

print
print train_data

[[3 3]
 [1 1]
 [2 2]
 [4 4]]
[22 11 33 44]

(array([[3, 3],
       [1, 1],
       [2, 2],
       [4, 4]]), array([22, 11, 33, 44]))

2次shuffle的seed一样，结果保持一致

import random
import numpy as np
train_data = (np.array([[1,1],
                       [2,2],
                       [3,3],
                       [4,4]]), np.array([11,22,33,44]))
x = train_data[0]
y = train_data[1]

# shuffle x,y的时候，只要保证seed相同，那么shuffle之后的x,y对应元素的顺序保持一样
seed = 1

np.random.seed(seed)
np.random.shuffle(x)
print x

np.random.seed(seed)
np.random.shuffle(y)
print y

print
print train_data

[[4 4]
 [3 3]
 [1 1]
 [2 2]]
[44 33 11 22]

(array([[4, 4],
       [3, 3],
       [1, 1],
       [2, 2]]), array([44, 33, 11, 22]))

shuffle_data(x,y)

import time
import random
import numpy as np
train_data = (np.array([[1,1],
                       [2,2],
                       [3,3],
                       [4,4]]), np.array([11,22,33,44]))
x = train_data[0]
y = train_data[1]

def shuffle_data(x,y):
    #seed = int(time.time()) 

    seed = 1
    np.random.seed(seed)
    np.random.shuffle(x)

    np.random.seed(seed)
    np.random.shuffle(y)

shuffle_data(x,y)
print x
print y
print 
print train_data

[[4 4]
 [3 3]
 [1 1]
 [2 2]]
[44 33 11 22]

(array([[4, 4],
       [3, 3],
       [1, 1],
       [2, 2]]), array([44, 33, 11, 22]))

Shuffle in theano with TensorVariable

import time
import random
import numpy as np
import theano
import theano.tensor as T

train_data = (np.array([[1,1],
                       [2,2],
                       [3,3],
                       [4,4]],dtype='float64'), np.array([11,22,33,44],dtype='int32'))
x = train_data[0]
y = train_data[1]

def shuffle_data(x,y):
    #seed = int(time.time()) 

    seed = 1
    np.random.seed(seed)
    np.random.shuffle(x)

    np.random.seed(seed)
    np.random.shuffle(y)

# OK because of shared<---->train data
shared_x = theano.shared(train_data[0],  borrow=True) 
shared_y = theano.shared(train_data[1],  borrow=True) 

#shared_x = theano.shared(np.asarray(train_data[0], dtype=theano.config.floatX), borrow=True)  # no copy
#shared_y = theano.shared(np.asarray(train_data[1], dtype=theano.config.floatX), borrow=True)  # no copy

y_cast = T.cast(shared_y,"int32") 
# shared_y dtype int32, no copy, y_cast is  TensorSharedVariable(int32,vector)
# shared_y dtype float64, copy,  y_cast is  TensorVariable(int32,vector)

print shared_x.type,shared_y.type,y_cast.type
print type(shared_x),type(shared_y),type(y_cast)
print shared_y is y_cast

print 'old train'
print train_data

#print '\nupdate train'
#x[0] = 100
#y[0] = 100

print '\nshuffle train'
#shuffle_data(train_data[0],train_data[1])

originX = shared_x.get_value(borrow=True)
originY = shared_y.get_value(borrow=True)
print originX is x # true 
print originY is y # true
shuffle_data(originX,originY)

print train_data

print '\nshared x and y'
print shared_x.get_value() 
print shared_y.get_value()

WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

WARNING (theano.sandbox.cuda): CUDA is installed, but device gpu is not available  (error: Unable to get the number of gpus available: no CUDA-capable device is detected)


TensorType(float64, matrix) TensorType(int32, vector) TensorType(int32, vector)
<class 'theano.tensor.sharedvar.TensorSharedVariable'> <class 'theano.tensor.sharedvar.TensorSharedVariable'> <class 'theano.tensor.sharedvar.TensorSharedVariable'>
True
old train
(array([[ 1.,  1.],
       [ 2.,  2.],
       [ 3.,  3.],
       [ 4.,  4.]]), array([11, 22, 33, 44], dtype=int32))

shuffle train
True
True
(array([[ 4.,  4.],
       [ 3.,  3.],
       [ 1.,  1.],
       [ 2.,  2.]]), array([44, 33, 11, 22], dtype=int32))

shared x and y
[[ 4.  4.]
 [ 3.  3.]
 [ 1.  1.]
 [ 2.  2.]]
[44 33 11 22]