# 備注:這里我修改了lenet_auto_solver.prototxt,因為我不是在caffe_root下操作的,所以不能使用相關路徑;# 如果這個文件中的路徑錯了,后面的程序會直接死掉,無法運行,所以無法運行時可以查看下這個文件中定義的路徑是否出錯了
!cat mnist/lenet_auto_solver.prototxt
# The train/test net protocol buffer definition
# train_net: "mnist/lenet_auto_train.prototxt"
train_net: "/home/xhb/caffe/caffe/examples/mnist/lenet_auto_train.prototxt"
# test_net: "mnist/lenet_auto_test.prototxt"
test_net: "/home/xhb/caffe/caffe/examples/mnist/lenet_auto_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "/home/xhb/caffe/caffe/examples/mnist/lenet"
# 備注:我在筆記本上跑的,所以沒有采用GPU模式,而是使用了CPU模式# caffe.set_device(0)# caffe.set_mode_gpu()
caffe.set_mode_cpu()### load the solver and create train and test nets# solver = None# ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.SGDSolver('mnist/lenet_auto_solver.prototxt')
為了大致了解下網絡結構,我們可以檢查一下中間特征(blob)的維度和參數。
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]
[('data', (64, 1, 28, 28)),('label', (64,)),('conv1', (64, 20, 24, 24)),('pool1', (64, 20, 12, 12)),('conv2', (64, 50, 8, 8)),('pool2', (64, 50, 4, 4)),('fc1', (64, 500)),('score', (64, 10)),('loss', ())]
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]
[('conv1', (20, 1, 5, 5)),('conv2', (50, 20, 5, 5)),('fc1', (500, 800)),('score', (10, 500))]
%%time
niter = 200
test_interval = 25# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))
output = zeros((niter, 8, 10))# the main solver loopfor it in range(niter):solver.step(1) # SGD by Caffe# store the train losstrain_loss[it] = solver.net.blobs['loss'].data# store the output on the first test batch# (start the forward pass at conv1 to avoid loading new data)solver.test_nets[0].forward(start='conv1')output[it] = solver.test_nets[0].blobs['score'].data[:8]# run a full test every so often# (Caffe can also do this for us and write to a log, but we show here# how to do it directly in Python, where more complicated things are easier.)if it % test_interval == 0:print'Iteration', it, 'testing...'correct = 0for test_it in range(100):solver.test_nets[0].forward()correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)== solver.test_nets[0].blobs['label'].data)test_acc[it // test_interval] = correct / 1e4
Iteration 0 testing...
Iteration 25 testing...
Iteration 50 testing...
Iteration 75 testing...
Iteration 100 testing...
Iteration 125 testing...
Iteration 150 testing...
Iteration 175 testing...
CPU times: user 1min 21s, sys: 68 ms, total: 1min 21s
Wall time: 1min 20s
for i in range(8):figure(figsize=(2,2))imshow(solver.test_nets[0].blobs['data'].data[i, 0], cmap='gray')figure(figsize=(10,2))imshow(output[150:200,i].T, interpolation='nearest', cmap='gray')xlabel('iteration')ylabel('label')
for i in range(8):figure(figsize=(2,2))imshow(solver.test_nets[0].blobs['data'].data[i, 0], cmap='gray')figure(figsize=(10,2))imshow(exp(output[150:200,i].T) / exp(output[150:200,i].T).sum(0), interpolation='nearest', cmap='gray')xlabel('iteration')ylabel('label')
examples_path = '/home/xhb/caffe/caffe/examples/'train_net_path = examples_path + 'mnist/custom_auto_train.prototxt'
test_net_path = examples_path + 'mnist/custom_auto_test.prototxt'
solver_config_path = examples_path + 'mnist/custom_auto_solver.prototxt'### define netdefcustom_net(lmdb, batch_size):# define your own net!n = caffe.NetSpec()# keep this data layer for all networksn.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,transform_param=dict(scale=1./255), ntop=2)# EDIT HERE to try different networks# this single layer defines a simple linear classifier# (in particular this defines a multiway logistic regression)n.score = L.InnerProduct(n.data, num_output=10, weight_filler=dict(type='xavier'))# EDIT HERE this is the LeNet variant we have already tried# n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))# n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)# n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))# n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)# n.fc1 = L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))# EDIT HERE consider L.ELU or L.Sigmoid for the nonlinearity# n.relu1 = L.ReLU(n.fc1, in_place=True)# n.score = L.InnerProduct(n.fc1, num_output=10, weight_filler=dict(type='xavier'))# keep this loss layer for all networksn.loss = L.SoftmaxWithLoss(n.score, n.label)return n.to_proto()with open(train_net_path, 'w') as f:f.write(str(custom_net('mnist/mnist_train_lmdb', 64)))
with open(test_net_path, 'w') as f:f.write(str(custom_net('mnist/mnist_test_lmdb', 100)))### define solverfrom caffe.proto import caffe_pb2
s = caffe_pb2.SolverParameter()# Set a seed for reproducible experiments:# this controls for randomization in training.
s.random_seed = 0xCAFFE# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
s.test_net.append(test_net_path)
s.test_interval = 500# Test after every 500 training iterations.
s.test_iter.append(100) # Test on 100 batches each time we test.s.max_iter = 10000# no. of times to update the net (training iterations)# EDIT HERE to try different solvers# solver types include "SGD", "Adam", and "Nesterov" among others.
s.type = "SGD"# Set the initial learning rate for SGD.
s.base_lr = 0.01# EDIT HERE to try different learning rates# Set momentum to accelerate learning by# taking weighted average of current and previous updates.
s.momentum = 0.9# Set weight decay to regularize and prevent overfitting
s.weight_decay = 5e-4# Set `lr_policy` to define how the learning rate changes during training.# This is the same policy as our default LeNet.
s.lr_policy = 'inv'
s.gamma = 0.0001
s.power = 0.75# EDIT HERE to try the fixed rate (and compare with adaptive solvers)# `fixed` is the simplest policy that keeps the learning rate constant.# s.lr_policy = 'fixed'# Display the current training loss and accuracy every 1000 iterations.
s.display = 1000# Snapshots are files used to store networks we've trained.# We'll snapshot every 5K iterations -- twice during training.
s.snapshot = 5000
s.snapshot_prefix = 'mnist/custom_net'# Train on the GPU
s.solver_mode = caffe_pb2.SolverParameter.GPU# Write the solver to a temporary file and return its filename.with open(solver_config_path, 'w') as f:f.write(str(s))### load the solver and create train and test nets
solver = None# ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.get_solver(solver_config_path)### solve
niter = 250# EDIT HERE increase to train for longer
test_interval = niter / 10# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))# the main solver loopfor it in range(niter):solver.step(1) # SGD by Caffe# store the train losstrain_loss[it] = solver.net.blobs['loss'].data# run a full test every so often# (Caffe can also do this for us and write to a log, but we show here# how to do it directly in Python, where more complicated things are easier.)if it % test_interval == 0:print'Iteration', it, 'testing...'correct = 0for test_it in range(100):solver.test_nets[0].forward()correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)== solver.test_nets[0].blobs['label'].data)test_acc[it // test_interval] = correct / 1e4_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_xlabel('iteration')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
ax2.set_title('Custom Test Accuracy: {:.2f}'.format(test_acc[-1]))
Iteration 0 testing...
Iteration 25 testing...
Iteration 50 testing...
Iteration 75 testing...
Iteration 100 testing...
Iteration 125 testing...
Iteration 150 testing...
Iteration 175 testing...
Iteration 200 testing...
Iteration 225 testing...
Text(0.5,1,u'Custom Test Accuracy: 0.88')