Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| be0d955f61 |
@@ -0,0 +1,104 @@
|
|||||||
|
require 'torch' -- torch
|
||||||
|
require 'optim'
|
||||||
|
require 'nn' -- provides a normalization operator
|
||||||
|
local train_file_path = 'train.th7'
|
||||||
|
local test_file_path = 'test.th7'
|
||||||
|
local train_data = torch.load(train_file_path)
|
||||||
|
local test_data = torch.load(test_file_path)
|
||||||
|
local Y = train_data[{{},{2,5}}]
|
||||||
|
local X = train_data[{{},{6,-1}}]
|
||||||
|
local test_labels = test_data[{{},{2,5}}]
|
||||||
|
local test_X = test_data[{{},{6,-1}}]
|
||||||
|
local batch_size = 30
|
||||||
|
epochs = 3
|
||||||
|
|
||||||
|
model = nn.Sequential() -- define the container
|
||||||
|
ninputs = 350; noutputs = 4 ; nhiddens1 = 1024; nhiddens2 = 512; nhiddens3 = 256
|
||||||
|
model:add(nn.Linear(ninputs,nhiddens1))
|
||||||
|
model:add(nn.Sigmoid())
|
||||||
|
model:add(nn.Linear(nhiddens1,nhiddens2))
|
||||||
|
model:add(nn.Sigmoid())
|
||||||
|
model:add(nn.Linear(nhiddens2,nhiddens3))
|
||||||
|
model:add(nn.Sigmoid())
|
||||||
|
model:add(nn.Linear(nhiddens3,noutputs))
|
||||||
|
criterion = nn.AbsCriterion()--MSECriterion()
|
||||||
|
x, dl_dx = model:getParameters()
|
||||||
|
sgd_params = {
|
||||||
|
learningRate = 0.01,
|
||||||
|
learningRateDecay = 1e-08,
|
||||||
|
weightDecay = 0,
|
||||||
|
momentum = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
function train(X,Y)
|
||||||
|
|
||||||
|
current_loss = 0
|
||||||
|
for batch = 1,(#train_data)[1], batch_size do
|
||||||
|
|
||||||
|
local inputs = {}
|
||||||
|
local targets = {}
|
||||||
|
local x_start = batch
|
||||||
|
local x_end = math.min(batch + batch_size-1, (#train_data)[1])
|
||||||
|
for i = x_start,x_end do
|
||||||
|
local target = Y[i]
|
||||||
|
local input = X[i]
|
||||||
|
table.insert(inputs, input)
|
||||||
|
table.insert(targets, target)
|
||||||
|
end
|
||||||
|
local feval = function(x_new)
|
||||||
|
if x ~= x_new then
|
||||||
|
x:copy(x_new)
|
||||||
|
end
|
||||||
|
dl_dx:zero()
|
||||||
|
local f=0
|
||||||
|
for i = 1, #inputs do
|
||||||
|
local loss_x = criterion:forward(model:forward(inputs[i]), targets[i])
|
||||||
|
model:backward(inputs[i], criterion:backward(model.output, targets[i]))
|
||||||
|
f = f+loss_x
|
||||||
|
end
|
||||||
|
return f/#inputs, dl_dx:div(#inputs)
|
||||||
|
end
|
||||||
|
_,fs = optim.adagrad(feval,x,sgd_params)
|
||||||
|
current_loss = current_loss + fs[1]
|
||||||
|
end
|
||||||
|
current_loss = current_loss/( (#train_data)[1]/batch_size)
|
||||||
|
print('train loss = ' .. current_loss)
|
||||||
|
return current_loss
|
||||||
|
end
|
||||||
|
|
||||||
|
time = sys.clock()
|
||||||
|
local cumm_loss = 0.
|
||||||
|
for j = 1, epochs do
|
||||||
|
print(j)
|
||||||
|
cumm_loss = train( X, Y )
|
||||||
|
print( 'Final loss = ' .. cumm_loss )
|
||||||
|
if j%10 == 0 then
|
||||||
|
print('id approx text')
|
||||||
|
local loss1 = 0.0
|
||||||
|
local loss2 = 0.0
|
||||||
|
local loss3 = 0.0
|
||||||
|
local loss4 = 0.0
|
||||||
|
for i = 1,(#test_data)[1] do
|
||||||
|
local myPrediction = model:forward(test_X[i])
|
||||||
|
loss1 = loss1+math.abs(myPrediction[1] - test_labels[i][1])
|
||||||
|
loss2 = loss2+math.abs(myPrediction[2] - test_labels[i][2])
|
||||||
|
loss3 = loss3+math.abs(myPrediction[3] - test_labels[i][3])
|
||||||
|
loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4])
|
||||||
|
end
|
||||||
|
|
||||||
|
loss1 = loss1/(#test_data)[1]
|
||||||
|
loss2 = loss2/(#test_data)[1]
|
||||||
|
loss3 = loss3/(#test_data)[1]
|
||||||
|
loss4 = loss4/(#test_data)[1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
-- time taken
|
||||||
|
time = sys.clock() - time
|
||||||
|
print( "Time per epoch = " .. (time / epochs) .. '[s]')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print(loss1,loss2,loss3,loss4)
|
||||||
|
torch.save('estimation_model.dat',model)
|
||||||
@@ -0,0 +1,130 @@
|
|||||||
|
require 'rnn'
|
||||||
|
require 'optim'
|
||||||
|
|
||||||
|
batchSize = 30
|
||||||
|
rho = 10
|
||||||
|
hiddenSize = 512
|
||||||
|
hiddenSize1 = 256
|
||||||
|
inputSize = 400
|
||||||
|
outputSize = 3
|
||||||
|
epochs = 100
|
||||||
|
xStart = 6
|
||||||
|
yStart = 2
|
||||||
|
yEnd = 4
|
||||||
|
|
||||||
|
|
||||||
|
local train_file_path = 'recurrent_train.th7'
|
||||||
|
local train_data = torch.load(train_file_path)
|
||||||
|
local Y = train_data[{{},{yStart,yEnd}}]
|
||||||
|
local X = train_data[{{},{xStart,-1}}]
|
||||||
|
seriesSize = (#train_data)[1]
|
||||||
|
print(seriesSize)
|
||||||
|
local test_file_path = 'recurrent_test.th7'
|
||||||
|
local test_data = torch.load(test_file_path)
|
||||||
|
local test_labels = test_data[{{},{yStart,yEnd}}]
|
||||||
|
local test_X = test_data[{{},{xStart,-1}}]
|
||||||
|
|
||||||
|
model = nn.Sequential()
|
||||||
|
model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize, rho)))
|
||||||
|
model:add(nn.Sequencer(nn.FastLSTM(hiddenSize, hiddenSize1, rho)))
|
||||||
|
model:add(nn.Sequencer(nn.Linear(hiddenSize1, outputSize)))
|
||||||
|
|
||||||
|
criterion = nn.SequencerCriterion(nn.AbsCriterion())
|
||||||
|
|
||||||
|
-- dummy dataset (task predict the next item)
|
||||||
|
--dataset = torch.randn(seriesSize, inputSize)
|
||||||
|
|
||||||
|
-- define the index of the batch elements
|
||||||
|
offsets = {}
|
||||||
|
for i= 1, batchSize do
|
||||||
|
table.insert(offsets, i)--math.ceil(math.random() * batchSize))
|
||||||
|
end
|
||||||
|
offsets = torch.LongTensor(offsets)
|
||||||
|
|
||||||
|
function nextBatch()
|
||||||
|
local inputs, targets = {}, {}
|
||||||
|
for step = 1, rho do
|
||||||
|
--get a batch of inputs
|
||||||
|
table.insert(inputs, X:index(1, offsets))
|
||||||
|
-- shift of one batch indexes
|
||||||
|
offsets:add(1)
|
||||||
|
for j=1,batchSize do
|
||||||
|
if offsets[j] > seriesSize then
|
||||||
|
offsets[j] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- a batch of targets
|
||||||
|
table.insert(targets, Y[{{},{1,3}}]:index(1,offsets))
|
||||||
|
end
|
||||||
|
return inputs, targets
|
||||||
|
end
|
||||||
|
|
||||||
|
-- get weights and loss wrt weights from the model
|
||||||
|
x, dl_dx = model:getParameters()
|
||||||
|
|
||||||
|
feval = function(x_new)
|
||||||
|
-- copy the weight if are changed
|
||||||
|
if x ~= x_new then
|
||||||
|
x:copy(x_new)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- select a training batch
|
||||||
|
local inputs, targets = nextBatch()
|
||||||
|
|
||||||
|
-- reset gradients (gradients are always accumulated, to accommodate
|
||||||
|
-- batch methods)
|
||||||
|
dl_dx:zero()
|
||||||
|
|
||||||
|
-- evaluate the loss function and its derivative wrt x, given mini batch
|
||||||
|
local prediction = model:forward(inputs)
|
||||||
|
local loss_x = criterion:forward(prediction, targets)
|
||||||
|
model:backward(inputs, criterion:backward(prediction, targets))
|
||||||
|
|
||||||
|
return loss_x, dl_dx
|
||||||
|
end
|
||||||
|
|
||||||
|
sgd_params = {
|
||||||
|
learningRate = 0.01,
|
||||||
|
learningRateDecay = 1e-08,
|
||||||
|
weightDecay = 0,
|
||||||
|
momentum = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
time = sys.clock()
|
||||||
|
for j = 1, epochs do
|
||||||
|
-- train a mini_batch of batchSize in parallel
|
||||||
|
_, fs = optim.adagrad(feval,x, sgd_params)
|
||||||
|
print('error for iteration ' .. sgd_params.evalCounter .. ' is ' .. fs[1])
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
print('id approx text')
|
||||||
|
local loss1 = 0.0
|
||||||
|
local loss2 = 0.0
|
||||||
|
local loss3 = 0.0
|
||||||
|
local loss4 = 0.0
|
||||||
|
for i = 1,(#test_data)[1], 1 do
|
||||||
|
local inputs = {}
|
||||||
|
for step = 1, 1 do
|
||||||
|
--get a batch of inputs
|
||||||
|
table.insert(inputs, test_X[i])
|
||||||
|
end
|
||||||
|
local myPrediction = model:forward(inputs)
|
||||||
|
loss1 = loss1+math.abs(myPrediction[1][1] - test_labels[i][1])
|
||||||
|
loss2 = loss2+math.abs(myPrediction[1][2] - test_labels[i][2])
|
||||||
|
loss3 = loss3+math.abs(myPrediction[1][3] - test_labels[i][3])
|
||||||
|
--loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4])
|
||||||
|
end
|
||||||
|
|
||||||
|
loss1 = loss1/(#test_data)[1]
|
||||||
|
loss2 = loss2/(#test_data)[1]
|
||||||
|
loss3 = loss3/(#test_data)[1]
|
||||||
|
--loss4 = loss4/(#test_data)[1]
|
||||||
|
|
||||||
|
-- time taken
|
||||||
|
time = sys.clock() - time
|
||||||
|
print( "Time per epoch = " .. (time / epochs) .. '[s]')
|
||||||
|
|
||||||
|
print(loss1,loss2,loss3,loss4)
|
||||||
|
torch.save('recurrent.dat',model)
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
require 'torch' -- torch
|
||||||
|
require 'optim'
|
||||||
|
require 'nn' -- provides a normalization operator
|
||||||
|
local train_file_path = 'train.th7'
|
||||||
|
local test_file_path = 'test.th7'
|
||||||
|
local train_data = torch.load(train_file_path)
|
||||||
|
local test_data = torch.load(test_file_path)
|
||||||
|
local train_labels = train_data[{{},{2,5}}]
|
||||||
|
local train_X = train_data[{{},{6,-1}}]
|
||||||
|
local test_labels = test_data[{{},{2,5}}]
|
||||||
|
local test_X = test_data[{{},{6,-1}}]
|
||||||
|
local batch_size = 30
|
||||||
|
model = nn.Sequential() -- define the container
|
||||||
|
ninputs = 350; noutputs = 4 ; nhiddens1 = 1024; nhiddens2 = 512; nhiddens3 = 256
|
||||||
|
--model:add(nn.Linear(ninputs, noutputs)) -- define the only module
|
||||||
|
model:add(nn.Linear(ninputs,nhiddens1))
|
||||||
|
model:add(nn.Sigmoid())
|
||||||
|
model:add(nn.Linear(nhiddens1,nhiddens2))
|
||||||
|
model:add(nn.Sigmoid())
|
||||||
|
model:add(nn.Linear(nhiddens2,nhiddens3))
|
||||||
|
model:add(nn.Sigmoid())
|
||||||
|
model:add(nn.Linear(nhiddens3,noutputs))
|
||||||
|
criterion = nn.AbsCriterion()--MSECriterion()
|
||||||
|
x, dl_dx = model:getParameters()
|
||||||
|
|
||||||
|
feval = function(x_new)
|
||||||
|
if x ~= x_new then
|
||||||
|
x:copy(x_new)
|
||||||
|
end
|
||||||
|
-- select a new training sample
|
||||||
|
_nidx_ = (_nidx_ or 0) + 1
|
||||||
|
if _nidx_ > (#train_data)[1] then _nidx_ = 1 end
|
||||||
|
--local sample = data[_nidx_]
|
||||||
|
local target = train_labels[_nidx_] -- this funny looking syntax allows
|
||||||
|
local inputs = train_X[_nidx_] -- slicing of arrays.
|
||||||
|
-- reset gradients (gradients are always accumulated, to accommodate
|
||||||
|
-- batch methods)
|
||||||
|
dl_dx:zero()
|
||||||
|
-- evaluate the loss function and its derivative wrt x, for that sample
|
||||||
|
--print(inputs)
|
||||||
|
--print(target)
|
||||||
|
for i=1, 350 do
|
||||||
|
if type(inputs[i]) ~= 'number' then
|
||||||
|
print(i)
|
||||||
|
print(inputs[i])
|
||||||
|
print(type(inputs[i])) end
|
||||||
|
end
|
||||||
|
--io.write("continue with this operation (y/n)?")
|
||||||
|
--answer=io.read()
|
||||||
|
local loss_x = criterion:forward(model:forward(inputs), target)
|
||||||
|
model:backward(inputs, criterion:backward(model.output, target))
|
||||||
|
-- return loss(x) and dloss/dx
|
||||||
|
return loss_x, dl_dx
|
||||||
|
end
|
||||||
|
-- Given the function above, we can now easily train the model using SGD.
|
||||||
|
-- For that, we need to define four key parameters:
|
||||||
|
-- + a learning rate: the size of the step taken at each stochastic
|
||||||
|
-- estimate of the gradient
|
||||||
|
-- + a weight decay, to regularize the solution (L2 regularization)
|
||||||
|
-- + a momentum term, to average steps over time
|
||||||
|
-- + a learning rate decay, to let the algorithm converge more precisely
|
||||||
|
sgd_params = {
|
||||||
|
learningRate = 0.01,
|
||||||
|
learningRateDecay = 1e-08,
|
||||||
|
weightDecay = 0,
|
||||||
|
momentum = 0
|
||||||
|
}
|
||||||
|
-- We're now good to go... all we have left to do is run over the dataset
|
||||||
|
-- for a certain number of iterations, and perform a stochastic update
|
||||||
|
-- at each iteration. The number of iterations is found empirically here,
|
||||||
|
-- but should typically be determinined using cross-validation.
|
||||||
|
-- we cycle 1e4 times over our training data
|
||||||
|
for i = 1,1 do
|
||||||
|
print(i)
|
||||||
|
-- this variable is used to estimate the average loss
|
||||||
|
current_loss = 0
|
||||||
|
-- an epoch is a full loop over our training data
|
||||||
|
for i = 1,(#train_data)[1] do
|
||||||
|
-- optim contains several optimization algorithms.
|
||||||
|
-- All of these algorithms assume the same parameters:
|
||||||
|
-- + a closure that computes the loss, and its gradient wrt to x,
|
||||||
|
-- given a point x
|
||||||
|
-- + a point x
|
||||||
|
-- + some parameters, which are algorithm-specific
|
||||||
|
_,fs = optim.adagrad(feval,x,sgd_params)
|
||||||
|
-- Functions in optim all return two things:
|
||||||
|
-- + the new x, found by the optimization method (here SGD)
|
||||||
|
-- + the value of the loss functions at all points that were used by
|
||||||
|
-- the algorithm. SGD only estimates the function once, so
|
||||||
|
-- that list just contains one value.
|
||||||
|
current_loss = current_loss + fs[1]
|
||||||
|
end
|
||||||
|
-- report average error on epoch
|
||||||
|
current_loss = current_loss / (#train_data)[1]
|
||||||
|
print('train loss = ' .. current_loss)
|
||||||
|
|
||||||
|
end
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
-- 5. Test the trained model.
|
||||||
|
|
||||||
|
-- Now that the model is trained, one can test it by evaluating it
|
||||||
|
-- on new samples.
|
||||||
|
|
||||||
|
-- The text solves the model exactly using matrix techniques and determines
|
||||||
|
-- that
|
||||||
|
-- corn = 31.98 + 0.65 * fertilizer + 1.11 * insecticides
|
||||||
|
|
||||||
|
-- We compare our approximate results with the text's results.
|
||||||
|
|
||||||
|
print('id approx text')
|
||||||
|
local loss1 = 0.0
|
||||||
|
local loss2 = 0.0
|
||||||
|
local loss3 = 0.0
|
||||||
|
local loss4 = 0.0
|
||||||
|
for i = 1,(#test_data)[1] do
|
||||||
|
local myPrediction = model:forward(test_X[i])
|
||||||
|
loss1 = loss1+math.abs(myPrediction[1] - test_labels[i][1])
|
||||||
|
loss2 = loss2+math.abs(myPrediction[2] - test_labels[i][2])
|
||||||
|
loss3 = loss3+math.abs(myPrediction[3] - test_labels[i][3])
|
||||||
|
loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4])
|
||||||
|
end
|
||||||
|
|
||||||
|
loss1 = loss1/(#test_data)[1]
|
||||||
|
loss2 = loss2/(#test_data)[1]
|
||||||
|
loss3 = loss3/(#test_data)[1]
|
||||||
|
loss4 = loss4/(#test_data)[1]
|
||||||
|
|
||||||
|
print(loss1,loss2,loss3,loss4)
|
||||||
|
torch.save('save.dat',model)
|
||||||
@@ -0,0 +1,109 @@
|
|||||||
|
require 'rnn'
|
||||||
|
require 'optim'
|
||||||
|
|
||||||
|
function range(from, to, step)
|
||||||
|
step = step or 1
|
||||||
|
return function(_, lastvalue)
|
||||||
|
local nextvalue = lastvalue + step
|
||||||
|
if step > 0 and nextvalue <= to or step < 0 and nextvalue >= to or
|
||||||
|
step == 0
|
||||||
|
then
|
||||||
|
return nextvalue
|
||||||
|
end
|
||||||
|
end, nil, from - step
|
||||||
|
end
|
||||||
|
|
||||||
|
local train_file_path = 'recurrent_train.th7'
|
||||||
|
local test_file_path = 'recurrent_test.th7'
|
||||||
|
local train_data = torch.load(train_file_path)
|
||||||
|
local test_data = torch.load(test_file_path)
|
||||||
|
local Y = train_data[{{},{2,5}}]
|
||||||
|
local X = train_data[{{},{6,-1}}]
|
||||||
|
local test_labels = test_data[{{},{2,5}}]
|
||||||
|
local test_X = test_data[{{},{6,-1}}]
|
||||||
|
|
||||||
|
batchSize = 5
|
||||||
|
rho = 10
|
||||||
|
hiddenSize1 = 1024
|
||||||
|
hiddenSize2 = 512
|
||||||
|
hiddenSize3 = 256
|
||||||
|
inputSize = 1
|
||||||
|
outputSize = 1
|
||||||
|
seriesSize = 100
|
||||||
|
|
||||||
|
model = nn.Sequential()
|
||||||
|
model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize2, rho)))
|
||||||
|
model:add(nn.Sequencer(nn.FastLSTM(hiddenSize2, hiddenSize3, rho)))
|
||||||
|
--model:add(nn.Sequencer(nn.Linear(hiddenSize2, hiddenSize3, rho)))
|
||||||
|
--model:add(nn.Sequencer(nn.Sigmoid()))
|
||||||
|
model:add(nn.Sequencer(nn.Linear(hiddenSize3, outputSize)))
|
||||||
|
|
||||||
|
criterion = nn.SequencerCriterion(nn.MSECriterion())
|
||||||
|
|
||||||
|
-- dummy dataset (task predict the next item)
|
||||||
|
--dataset = torch.randn(seriesSize, inputSize)
|
||||||
|
|
||||||
|
-- define the index of the batch elements
|
||||||
|
offsets = {}
|
||||||
|
for i= 1, batchSize do
|
||||||
|
table.insert(offsets,i)
|
||||||
|
end
|
||||||
|
offsets = torch.LongTensor(offsets)
|
||||||
|
print(offsets)
|
||||||
|
function nextBatch()
|
||||||
|
local inputs, targets = {}, {}
|
||||||
|
for step = 1, rho do
|
||||||
|
--get a batch of inputs
|
||||||
|
table.insert(inputs, X:index(1, offsets))
|
||||||
|
-- shift of one batch indexes
|
||||||
|
offsets:add(1)
|
||||||
|
for j=1,batchSize do
|
||||||
|
if offsets[j] > seriesSize then
|
||||||
|
offsets[j] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- a batch of targets
|
||||||
|
table.insert(targets, Y:index(1,offsets))
|
||||||
|
end
|
||||||
|
return inputs, targets
|
||||||
|
end
|
||||||
|
|
||||||
|
-- get weights and loss wrt weights from the model
|
||||||
|
x, dl_dx = model:getParameters()
|
||||||
|
|
||||||
|
feval = function(x_new)
|
||||||
|
-- copy the weight if are changed
|
||||||
|
if x ~= x_new then
|
||||||
|
x:copy(x_new)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- select a training batch
|
||||||
|
local inputs, targets = nextBatch()
|
||||||
|
|
||||||
|
-- reset gradients (gradients are always accumulated, to accommodate
|
||||||
|
-- batch methods)
|
||||||
|
dl_dx:zero()
|
||||||
|
|
||||||
|
-- evaluate the loss function and its derivative wrt x, given mini batch
|
||||||
|
local prediction = model:forward(inputs)
|
||||||
|
local loss_x = criterion:forward(prediction, targets)
|
||||||
|
model:backward(inputs, criterion:backward(prediction, targets))
|
||||||
|
|
||||||
|
return loss_x, dl_dx
|
||||||
|
end
|
||||||
|
|
||||||
|
sgd_params = {
|
||||||
|
learningRate = 0.01,
|
||||||
|
learningRateDecay = 1e-08,
|
||||||
|
weightDecay = 0,
|
||||||
|
momentum = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for i = 1, 2 do
|
||||||
|
-- train a mini_batch of batchSize in parallel
|
||||||
|
_, fs = optim.adagrad(feval,x, sgd_params)
|
||||||
|
|
||||||
|
if sgd_params.evalCounter % 100 == 0 then
|
||||||
|
print('error for iteration ' .. sgd_params.evalCounter .. ' is ' .. fs[1] / rho)
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
require 'rnn'
|
||||||
|
require 'optim'
|
||||||
|
|
||||||
|
batchSize = 30
|
||||||
|
rho = 20
|
||||||
|
hiddenSize = 512
|
||||||
|
hiddenSize1 = 256
|
||||||
|
inputSize = 400
|
||||||
|
outputSize = 4
|
||||||
|
epochs = 10000
|
||||||
|
xStart = 6
|
||||||
|
yStart = 2
|
||||||
|
yEnd = 5
|
||||||
|
|
||||||
|
|
||||||
|
local train_file_path = 'recurrent_train.th7'
|
||||||
|
local train_data = torch.load(train_file_path)
|
||||||
|
local Y = train_data[{{},{yStart,yEnd}}]
|
||||||
|
local X = train_data[{{},{xStart,-1}}]
|
||||||
|
local place = train_data[{{},{1}}]
|
||||||
|
seriesSize = (#train_data)[1]
|
||||||
|
print(seriesSize)
|
||||||
|
local test_file_path = 'recurrent_test.th7'
|
||||||
|
local test_data = torch.load(test_file_path)
|
||||||
|
local test_labels = test_data[{{},{yStart,yEnd}}]
|
||||||
|
local test_X = test_data[{{},{xStart,-1}}]
|
||||||
|
|
||||||
|
model = nn.Sequential()
|
||||||
|
model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize, rho)))
|
||||||
|
model:add(nn.Sequencer(nn.FastLSTM(hiddenSize, hiddenSize1, rho)))
|
||||||
|
model:add(nn.Sequencer(nn.Linear(hiddenSize1, outputSize)))
|
||||||
|
|
||||||
|
criterion = nn.SequencerCriterion(nn.AbsCriterion())
|
||||||
|
--local method = 'xavier'
|
||||||
|
--local model_new = require('weight-init')(model, method)
|
||||||
|
|
||||||
|
-- define the index of the batch elements
|
||||||
|
offsets = {}
|
||||||
|
function offset_(seed)
|
||||||
|
offsets = {}
|
||||||
|
math.randomseed(seed)
|
||||||
|
for i= 1, batchSize do
|
||||||
|
table.insert(offsets, math.ceil(math.random() * batchSize))
|
||||||
|
end
|
||||||
|
offsets = torch.LongTensor(offsets)
|
||||||
|
end
|
||||||
|
function nextBatch()
|
||||||
|
local inputs, targets = {}, {}
|
||||||
|
local nums = {}
|
||||||
|
for step = 1, rho do
|
||||||
|
--get a batch of inputs
|
||||||
|
table.insert(inputs, X:index(1, offsets))
|
||||||
|
-- shift of one batch indexes
|
||||||
|
offsets:add(1)
|
||||||
|
for j=1,batchSize do
|
||||||
|
if offsets[j] > seriesSize then
|
||||||
|
offsets[j] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- a batch of targets
|
||||||
|
table.insert(targets, Y[{{},{1,4}}]:index(1,offsets))
|
||||||
|
table.insert(nums,place:index(1,offsets))
|
||||||
|
end
|
||||||
|
return inputs, targets
|
||||||
|
end
|
||||||
|
|
||||||
|
-- get weights and loss wrt weights from the model
|
||||||
|
x, dl_dx = model:getParameters()
|
||||||
|
|
||||||
|
feval = function(x_new)
|
||||||
|
-- copy the weight if are changed
|
||||||
|
if x ~= x_new then
|
||||||
|
x:copy(x_new)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- select a training batch
|
||||||
|
local inputs, targets = nextBatch()
|
||||||
|
|
||||||
|
-- reset gradients (gradients are always accumulated, to accommodate
|
||||||
|
-- batch methods)
|
||||||
|
dl_dx:zero()
|
||||||
|
|
||||||
|
-- evaluate the loss function and its derivative wrt x, given mini batch
|
||||||
|
local prediction = model:forward(inputs)
|
||||||
|
local loss_x = criterion:forward(prediction, targets)
|
||||||
|
model:backward(inputs, criterion:backward(prediction, targets))
|
||||||
|
|
||||||
|
return loss_x, dl_dx
|
||||||
|
end
|
||||||
|
|
||||||
|
adagrad_params = {
|
||||||
|
learningRate = 0.01,
|
||||||
|
learningRateDecay = 1e-08,
|
||||||
|
weightDecay = 0,
|
||||||
|
momentum = 0
|
||||||
|
}
|
||||||
|
seed = 1
|
||||||
|
offset_(seed)
|
||||||
|
time = sys.clock()
|
||||||
|
for j = 1, epochs do
|
||||||
|
if j%1000 == 0 then
|
||||||
|
seed = seed + 1
|
||||||
|
offset_(seed)
|
||||||
|
end
|
||||||
|
-- train a mini_batch of batchSize in parallel
|
||||||
|
_, fs = optim.adagrad(feval,x, adagrad_params)
|
||||||
|
print('error for iteration ' .. adagrad_params.evalCounter .. ' is ' .. fs[1]/rho)
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
print('id approx text')
|
||||||
|
local loss1 = 0.0
|
||||||
|
local loss2 = 0.0
|
||||||
|
local loss3 = 0.0
|
||||||
|
local loss4 = 0.0
|
||||||
|
predict_batch = 100
|
||||||
|
for i = 1,(#test_data)[1], predict_batch do
|
||||||
|
local inputs = {}
|
||||||
|
for step = 0, predict_batch-1 do
|
||||||
|
--get a batch of inputs
|
||||||
|
table.insert(inputs, test_X[i+step])
|
||||||
|
end
|
||||||
|
local myPrediction = model:forward(inputs)
|
||||||
|
for step = 1, predict_batch do
|
||||||
|
loss1 = loss1+math.abs(myPrediction[step][1] - test_labels[i+step-1][1])
|
||||||
|
loss2 = loss2+math.abs(myPrediction[step][2] - test_labels[i+step-1][2])
|
||||||
|
loss3 = loss3+math.abs(myPrediction[step][3] - test_labels[i+step-1][3])
|
||||||
|
loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4])
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
loss1 = loss1/(#test_data)[1]
|
||||||
|
loss2 = loss2/(#test_data)[1]
|
||||||
|
loss3 = loss3/(#test_data)[1]
|
||||||
|
loss4 = loss4/(#test_data)[1]
|
||||||
|
|
||||||
|
-- time taken
|
||||||
|
time = sys.clock() - time
|
||||||
|
print( "Time per epoch = " .. (time / epochs) .. '[s]')
|
||||||
|
|
||||||
|
print(loss1,loss2,loss3,loss4)
|
||||||
|
torch.save('recurrent3.dat',model)
|
||||||
Reference in New Issue
Block a user