While Caffe is made for deep networks it can likewise represent “shallow” models like logistic regression for classification. We’ll do simple logistic regression on synthetic data that we’ll generate and save to HDF5 to feed vectors to Caffe. Once that model is done, we’ll add layers to improve accuracy. That’s what Caffe is about: define a model, experiment, and then deploy.
# Write out the data to HDF5 files in a temp directory. # This file is assumed to be caffe_root/examples/hdf5_classification.ipynb dirname = os.path.abspath('./examples/hdf5_classification/data') ifnot os.path.exists(dirname): os.makedirs(dirname)
# HDF5DataLayer source should be a file containing a list of HDF5 filenames. # To show this off, we'll list the same data file twice. with h5py.File(train_filename, 'w') as f: f['data'] = X f['label'] = y.astype(np.float32) withopen(os.path.join(dirname, 'train.txt'), 'w') as f: f.write(train_filename + '\n') f.write(train_filename + '\n') # HDF5 is pretty efficient, but can be further compressed. comp_kwargs = {'compression': 'gzip', 'compression_opts': 1} with h5py.File(test_filename, 'w') as f: f.create_dataset('data', data=Xt, **comp_kwargs) f.create_dataset('label', data=yt.astype(np.float32), **comp_kwargs) withopen(os.path.join(dirname, 'test.txt'), 'w') as f: f.write(test_filename + '\n')
Let’s define logistic regression in Caffe through Python net specification. This is a quick and natural way to define nets that sidesteps manually editing the protobuf model.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
from caffe import layers as L from caffe import params as P
train_net_path = 'examples/hdf5_classification/logreg_auto_train.prototxt' withopen(train_net_path, 'w') as f: f.write(str(logreg('examples/hdf5_classification/data/train.txt', 10)))
test_net_path = 'examples/hdf5_classification/logreg_auto_test.prototxt' withopen(test_net_path, 'w') as f: f.write(str(logreg('examples/hdf5_classification/data/test.txt', 10)))
Now, we’ll define our “solver” which trains the network by specifying the locations of the train and test nets we defined above, as well as setting values for various parameters used for learning, display, and “snapshotting”.
defsolver(train_net_path, test_net_path): s = caffe_pb2.SolverParameter()
# Specify locations of the train and test networks. s.train_net = train_net_path s.test_net.append(test_net_path)
s.test_interval = 1000# Test after every 1000 training iterations. s.test_iter.append(250) # Test 250 "batches" each time we test.
s.max_iter = 10000# # of times to update the net (training iterations)
# Set the initial learning rate for stochastic gradient descent (SGD). s.base_lr = 0.01
# Set `lr_policy` to define how the learning rate changes during training. # Here, we 'step' the learning rate by multiplying it by a factor `gamma` # every `stepsize` iterations. s.lr_policy = 'step' s.gamma = 0.1 s.stepsize = 5000
# Set other optimization parameters. Setting a non-zero `momentum` takes a # weighted average of the current gradient and previous gradients to make # learning more stable. L2 weight decay regularizes learning, to help prevent # the model from overfitting. s.momentum = 0.9 s.weight_decay = 5e-4
# Display the current training loss and accuracy every 1000 iterations. s.display = 1000
# Snapshots are files used to store networks we've trained. Here, we'll # snapshot every 10K iterations -- just once at the end of training. # For larger networks that take longer to train, you may want to set # snapshot < max_iter to save the network and training state to disk during # optimization, preventing disaster in case of machine crashes, etc. s.snapshot = 10000 s.snapshot_prefix = 'examples/hdf5_classification/data/train'
# We'll train on the CPU for fair benchmarking against scikit-learn. # Changing to GPU should result in much faster training! s.solver_mode = caffe_pb2.SolverParameter.CPU return s
solver_path = 'examples/hdf5_classification/logreg_solver.prototxt' withopen(solver_path, 'w') as f: f.write(str(solver(train_net_path, test_net_path)))
Time to learn and evaluate our Caffeinated logistic regression in Python.
I0224 00:32:03.232779 655 caffe.cpp:178] Use CPU.
I0224 00:32:03.391911 655 solver.cpp:48] Initializing solver from parameters:
train_net: "examples/hdf5_classification/logreg_auto_train.prototxt"
test_net: "examples/hdf5_classification/logreg_auto_test.prototxt"
......
I0224 00:32:04.087514 655 solver.cpp:406] Test net output #0: accuracy = 0.77
I0224 00:32:04.087532 655 solver.cpp:406] Test net output #1: loss = 0.593815 (* 1 = 0.593815 loss)
I0224 00:32:04.087541 655 solver.cpp:323] Optimization Done.
I0224 00:32:04.087548 655 caffe.cpp:222] Optimization Done.
If you look at output or the logreg_auto_train.prototxt, you’ll see that the model is simple logistic regression. We can make it a little more advanced by introducing a non-linearity between weights that take the input and weights that give the output – now we have a two-layer network. That network is given in nonlinear_auto_train.prototxt, and that’s the only change made in nonlinear_logreg_solver.prototxt which we will now use.
The final accuracy of the new network should be higher than logistic regression!
from caffe import layers as L from caffe import params as P
defnonlinear_net(hdf5, batch_size): # one small nonlinearity, one leap for model kind n = caffe.NetSpec() n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2) # define a hidden layer of dimension 40 n.ip1 = L.InnerProduct(n.data, num_output=40, weight_filler=dict(type='xavier')) # transform the output through the ReLU (rectified linear) non-linearity n.relu1 = L.ReLU(n.ip1, in_place=True) # score the (now non-linear) features n.ip2 = L.InnerProduct(n.ip1, num_output=2, weight_filler=dict(type='xavier')) # same accuracy and loss as before n.accuracy = L.Accuracy(n.ip2, n.label) n.loss = L.SoftmaxWithLoss(n.ip2, n.label) return n.to_proto()
train_net_path = 'examples/hdf5_classification/nonlinear_auto_train.prototxt' withopen(train_net_path, 'w') as f: f.write(str(nonlinear_net('examples/hdf5_classification/data/train.txt', 10)))
test_net_path = 'examples/hdf5_classification/nonlinear_auto_test.prototxt' withopen(test_net_path, 'w') as f: f.write(str(nonlinear_net('examples/hdf5_classification/data/test.txt', 10)))
solver_path = 'examples/hdf5_classification/nonlinear_logreg_solver.prototxt' withopen(solver_path, 'w') as f: f.write(str(solver(train_net_path, test_net_path)))