Example 1-theano linear regression with gradient descent

Series

code example

#!/usr/bin/python
# -*- coding: utf-8 -*-

# https://blogs.msdn.microsoft.com/lukassteindl/2015/12/13/linear-regression-example-with-python-and-theano/
# http://blog.csdn.net/vins_napoleon/article/details/38057927

from numpy import *
import numpy as np
import theano
import theano.tensor as T
import time

class Linear_Reg(object):
    def __init__(self,x):
        #x,y是scalar,vector(n,),matrix(m,n)
        self.w = theano.shared(value = 0.0,name = 'w')
        self.b = theano.shared(value = 0.0,name = 'b')
        # w,b is scalar,so x can be vector
        self.y_pred = x * self.w + self.b
        self.params = [self.w,self.b]
        
    def msl(self,y):
        return T.sum((y - self.y_pred)**2)

class Linear_Reg2(object):
    def __init__(self,x):
        #x,y是scalar,vector(1,),matrix(m,1)
        self.w = theano.shared(value = np.zeros((1,),dtype=theano.config.floatX),name = 'w')
        self.b = theano.shared(value = np.zeros((1,),dtype=theano.config.floatX),name = 'b')
        # w,b is vector,so x cab be scalar
        self.y_pred = x * self.w + self.b
        self.params = [self.w,self.b]
        
    def msl(self,y):
        return T.sum((y - self.y_pred)**2)

def test_type():
    # x,y be scalar
    points_x = [1.1,2.2] 
    points_y = [3.3,4.4]
    X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX),borrow = True)
    Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX),borrow = True)
    x = T.dscalar('x')
    print x
    print x.type
    print X[0]
    print X[0].type
    
    #x
    #TensorType(float64, scalar)
    #Subtensor{int64}.0
    #TensorType(float64, scalar)
    
def test_type2():
    # x,y be vector
    points_x = [1.1,2.2] 
    points_y = [3.3,4.4]
    X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX),borrow = True)
    Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX),borrow = True)
    x = T.dvector('x')
    print x
    print x.type
    print X[0:2]
    print X[0:2].type
    
    #x
    #TensorType(float64, vector)
    #Subtensor{int64:int64:}.0
    #TensorType(float64, vector)
    

def run_model1(mode):
    """
    [w,b be scalar]
    
    mode = scalar(), set mini_batch_size = 1
    mode = vector(m,),  reshape X,Y to vector  
    mode = matrix(m,n), reshape X,Y to matrix
    """
    eta = 0.000001
    epochs = 1000
    points = genfromtxt("data.csv", delimiter=",")# (100,2)
    points_x = points[:,0] # (100,)   numpy.float64
    points_y = points[:,1] # (100,)   numpy.float64
    N = points_x.shape[0]
    
    if mode == "scalar":
        mini_batch_size = 1  # must be 1 so that all X[i] are used
        X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX),borrow = True)
        Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX),borrow = True)
        
        # so than we get X[0],X[1],...
        x = T.dscalar('tx')
        y = T.dscalar('ty')
    elif mode == "vector":
        mini_batch_size = 5
        X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX),borrow = True)
        Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX),borrow = True)
                      
        # so than we get X[0:5],X[5:10],...
        x = T.dvector('tx')
        y = T.dvector('ty')
    elif mode == "matrix":
        mini_batch_size = 5
        X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX).reshape(N,1),borrow = True)
        Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX).reshape(N,1),borrow = True)
                      
        # so than we get X[0:5],X[5:10],...
        x = T.dmatrix('tx')
        y = T.dmatrix('ty')             
        
    num_batches = N/mini_batch_size
        
    #index = T.lscalar() # l int64
    
    reg = Linear_Reg(x = x)
    cost = reg.msl(y)

    w_g = T.grad(cost = cost, wrt = reg.w)
    b_g = T.grad(cost = cost, wrt = reg.b)

    updates=[(reg.w, reg.w - eta * w_g),
             (reg.b, reg.b - eta * b_g)]

    # use x,y as input (调用函数的时候，x,y的参数化必须是python数值，不能是theano variable)
    train_model = theano.function(inputs=[x,y],
                                  outputs = cost,
                                  updates = updates,
                                  )

    cost_t = 0.0
    costs = []
    start_time = time.clock()

    for epoch in xrange(epochs):
        # 1个epoch,所有N个样本参与训练,mini = m,学习N/m次
        cost_l = []
        for index in range(num_batches):
            if mode == "scalar":
                x = X.get_value()[index]
                y = Y.get_value()[index]
            else:  
                x = X.get_value()[index*mini_batch_size:(index+1)*mini_batch_size]
                y = Y.get_value()[index*mini_batch_size:(index+1)*mini_batch_size] 
            cost_l.append( train_model(x,y) )

        cost_t = np.mean(cost_l)
        costs.append(cost_t)

    end_time = time.clock()
    print '\nTotal time is ：',end_time -start_time,' s' 
    print 'last cost :',cost_t
    print 'w value : ',reg.w.get_value() 
    print 'b value : ',reg.b.get_value() 

    
def run_model2(mode):
    """
    [w,b be vector(1,)]
    
    mode = scalar(),   set mini_batch_size = 1
    mode = vector(1,), set mini_batch_size = 1, reshape X,Y to vector  
    mode = matrix(m,1), reshape X,Y to matrix
    """
    eta = 0.000001
    epochs = 1000
    points = genfromtxt("data.csv", delimiter=",")# (100,2)
    points_x = points[:,0] # (100,)   numpy.float64
    points_y = points[:,1] # (100,)   numpy.float64
    N = points_x.shape[0]
    
    if mode == "scalar":
        mini_batch_size = 1 
        X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX),borrow = True)
        Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX),borrow = True)
        
        # so than we get X[0],X[1],...
        x = T.dscalar('tx')
        y = T.dscalar('ty')
    elif mode == "vector":
        mini_batch_size = 1 
        X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX),borrow = True)
        Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX),borrow = True)
                      
        # so than we get X[0:5],X[5:10],...
        x = T.dvector('tx')
        y = T.dvector('ty')
    elif mode == "matrix":
        mini_batch_size = 5
        X = theano.shared(np.asarray(points_x,dtype=theano.config.floatX).reshape(N,1),borrow = True)
        Y = theano.shared(np.asarray(points_y,dtype=theano.config.floatX).reshape(N,1),borrow = True)
                      
        # so than we get X[0:5],X[5:10],...
        x = T.dmatrix('tx')
        y = T.dmatrix('ty')             
        
    num_batches = N/mini_batch_size
        
    index = T.lscalar() # l int64
    reg = Linear_Reg2(x = x)
    cost = reg.msl(y)

    #w_g,b_g = T.grad(cost,[reg.w,reg.b])
    #updates=[(reg.w, reg.w - eta * w_g), (reg.b, reg.b - eta * b_g)]

    #==========================================================================
    # use params and updates
    #==========================================================================
    params = [reg.w,reg.b]       # list of [w,b]
    grads = T.grad(cost,params)  # list of [w_g,b_g]
    updates = [(param,param-eta*grad) 
               for param,grad in zip(params,grads)]               
                                # list of [ (w,w-eta*w_g), (b, b-eta*b_g) ]
        
    # updates必须是shared变量;
    # use x,y as input (调用函数的时候，x,y的参数化必须是python数值，不能是theano variable)
    # use index as input (调用函数的时候，index的参数化必须是python数值),
    # 通过givens替换掉x,y，X[index],Y[index]和x,y的类型必须一致，都是theano variable
    # 此处x,y，X[index],Y[index]都是  TensorType(float64, scalar)　
    # 或者是float64, vector,matrix类型
    if mode == "scalar":
        train_model = theano.function(inputs=[index],
                                      outputs = cost,
                                      updates = updates,
                                      givens = {x:X[index],
                                                y:Y[index]})
    else:
        train_model = theano.function(inputs=[index],
                                      outputs = cost,
                                      updates = updates,
                                      givens = {x:X[index*mini_batch_size:(index+1)*mini_batch_size],
                                                y:Y[index*mini_batch_size:(index+1)*mini_batch_size]})

    cost_t = 0.0
    costs = []
    start_time = time.clock()

    for epoch in xrange(epochs):
        # 1个epoch,所有N个样本参与训练,mini = m,学习N/m次
        cost_l = []
        for index in range(num_batches):
            cost_l.append( train_model(index) )
            
        cost_t = np.mean(cost_l)
        costs.append(cost_t)

    end_time = time.clock()
    print '\nTotal time is ：',end_time -start_time,' s' 
    print 'last cost :',cost_t
    print 'w value : ',reg.w.get_value()  
    print 'b value : ',reg.b.get_value()


"""
run:
Total time is ： 2.796644  s
last cost : 113.178407322
w value :  1.48497718432
b value :  0.0890071567283

run2:
Total time is ： 2.127487  s
last cost : 113.178407322
w value :  [ 1.48497718]
b value :  [ 0.08900716]

run3:
Total time is ： 0.480144  s
avg cost : 565.419900755
w value :  [ 1.48492605]
b value :  [ 0.08896923]
"""
  
def test():
    test_type()
    test_type2()
    
if __name__ == '__main__':
    #test()
    
    #run_model1("scalar")
    #run_model1("vector")   
    #run_model1("matrix")   
    
    run_model2("scalar")
    run_model2("vector")   
    run_model2("matrix")

Total time is ： 1.929041  s
last cost : 113.178407322
w value :  [ 1.48497718]
b value :  [ 0.08900716]

Total time is ： 2.040197  s
last cost : 113.178407322
w value :  [ 1.48497718]
b value :  [ 0.08900716]

Total time is ： 0.553913  s
last cost : 565.419900755
w value :  [ 1.48492605]
b value :  [ 0.08896923]

Reference

History

20180807: created.