-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathseboost_parallel_simulation.lua
More file actions
202 lines (157 loc) · 6.92 KB
/
seboost_parallel_simulation.lua
File metadata and controls
202 lines (157 loc) · 6.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
local function copy2(obj)
if type(obj) ~= 'table' then return obj end
local res = setmetatable({}, getmetatable(obj))
for k, v in pairs(obj) do res[copy2(k)] = copy2(v) end
return res
end
--[[ A implementation of seboost
ARGS:
- `opfunc` : a function that takes a single input (X), the point
of a evaluation, and returns f(X) and df/dX.
- `x` : the initial point
- `config` : a table with configuration parameters for the optimizer
- `config.optMethod` : The base optimizaion method
- `config.momentum` : weight for SEBOOST's momentum direction
- `config.histSize` : number of previous directions to keep in memory
- `config.anchorPoints` : A tensor of values, each describing the number of iterations between an update of an anchor point
- `config.sesopUpdate` : The number of regular optimization steps between each boosting step
- `config.sesopData` : The training data to use for the boosting stage
- `config.sesopLabels` : The labels to use for the boosting stage
- `config.sesopBatchSize` : The number of samples to use for each optimization step
- `config.isCuda` : Whether to train using cuda or cpu
- `state` : a table describing the state of the optimizer; after each
call the state is modified
- `state.itr` : The current optimization iteration
- `state.dirs` : The set of directions to optimize in
- `state.anchors` : The current anchor points
- `state.aOpt` : The current set of optimal coefficients
- `state.dirIdx` : The next direction to override
RETURN:
- `x` : the new x vector
- `f(x)` : the function, evaluated before the update
]]
require 'optim'
--[[
SV:
We have 'config.numNodes' nodes.
All nodes start from the same 'x'.
Every node does 'state.nodeIters' baseMethod iterations.
Once all the nodes are done, we do a sesop iteration to "merge"
The states of the different nodes.
On true parallel execution, the nodes will execute in parallel,
so the time for one external iteration will be:
T_p = T(baseMethod)*state.nodeIters + T(sesop)
On our simulation, the time is:
T_s = T(baseMethod)*state.nodeIters*config.numNodes + T(sesop)
Assume there are k external iteration in an epoch.
So assume number of internal iterations in an epoch divides state.nodeIters.
I.e., state.nodeIters*k = number of iterations in an epoch:
T_s(epoch) = (T(baseMethod)*state.nodeIters*config.numNodes + T(sesop))*k =
T_s(baseMethod)*(number of iterations in an epoch)*config.numNodes + T(sesop)*k
T_p(epoch) = (T(baseMethod)*state.nodeIters + T(sesop))*k =
T(baseMethod)*(number of iterations in an epoch) + T(sesop)*k
In our first expr, we assume T(sesop) is neglible, and we compute the expected parallel time as:
T_p(epoch) = T_s(epoch)/config.numNodes
And we plot the graphs, we should see an improvemnt as we increase the number of nodes.
]]
function optim.seboost(opfunc, x, config, state)
-- get/update state
local config = config or {}
local state = state or config
local isCuda = config.isCuda or false
local sesopData = config.sesopData
local sesopLabels = config.sesopLabels
local sesopBatchSize = config.sesopBatchSize or 100
state.itr = state.itr or 0
local timer = torch.Timer()
--number of iterations per node
config.nodeIters = config.nodeIters or 100
config.numNodes = config.numNodes or 2
state.currNode = state.currNode or 0 --start from node 0
state.lastNodeXs = state.lastNodeXs or {}
state.splitPoint = state.splitPoint or x:clone() --the first split point is the first point
state.sesopIteration = state.sesopIteration or 0
local isMergeIter = false
state.itr = state.itr + 1
--[[
print ('state.currNode = ' .. state.currNode)
print ('config.nodeIters = ' .. config.nodeIters)
print ('config.numNodes = ' .. config.numNodes)
print ('state.itr = ' .. state.itr)
print ('x = ')
print(x)
print ('state.splitPoint = ')
print(state.splitPoint)
]]
--node switch
if (config.numNodes > 1 and state.itr % (config.nodeIters + 1) == 0) then
--print ('In node switch '.. state.itr)
--a node has finished. Save its last x location
state.lastNodeXs[state.currNode] = state.lastNodeXs[state.currNode] or x:clone()
state.lastNodeXs[state.currNode]:copy(x)
--progress to next node
state.currNode = (state.currNode + 1)%config.numNodes
--merge iteration (run seboost to merge).
if (state.currNode == 0) then
isMergeIter = true
end
--The new node starts from the split point
x:copy(state.splitPoint)
end
if (isMergeIter == false or config.numNodes == 1) then
config.optConfig[state.currNode] = config.optConfig[state.currNode] or copy2(config.initState)
x,fx = config.optMethod(opfunc, x, config.optConfig[state.currNode])
return x,fx
end
--Now x is the split point.
------------------------- SESOP Part ----------------------------
--print ('****************SESOP***********')
--print ('--------------------------------')
state.dirs = state.dirs or torch.zeros(x:size(1), config.numNodes)
state.aOpt = state.aOpt or torch.zeros(config.numNodes)
--state.aOpt[1] = 1 --we start from taking the first node direction (maybe start from avrage?).
state.aOpt = torch.ones(config.numNodes)*(1/config.numNodes) --avrage
if (isCuda) then
state.dirs = state.dirs:cuda()
state.aOpt = state.aOpt:cuda()
end
--SV, build directions matrix
for i = 0, config.numNodes - 1 do
--[{ {}, i }] means: all of the first dim, slice in the second dim at i = get i col.
state.dirs[{ {}, i + 1 }]:copy(state.lastNodeXs[i] - state.splitPoint)
end
--now optimize!
local xInit = state.splitPoint
-- create mini batch
local subT = (state.sesopIteration) * sesopBatchSize + 1
subT = subT % (sesopData:size(1) - sesopBatchSize) --Calculate the next batch index
local sesopInputs = sesopData:narrow(1, subT, sesopBatchSize)
local sesopTargets = sesopLabels:narrow(1, subT, sesopBatchSize)
--print(sesopInputs:size())
if isCuda then
-- sesopInputs = sesopInputs:cuda()
-- sesopTargets = sesopTargets:cuda()
end
-- Create inner opfunc for finding a*
local feval = function(a)
--A function of the coefficients
local dirMat = state.dirs
--Note that opfunc also gets the batch
local afx, adfdx = opfunc(xInit + dirMat*a, sesopInputs, sesopTargets)
return afx, (dirMat:t()*adfdx)
end
--x,f(x)
config.maxIter = config.numNodes
local _, fHist = optim.cg(feval, state.aOpt, config, state) --Apply optimization using inner function
--updating model weights!
x:copy(xInit)
local sesopDir = state.dirs*state.aOpt
x:add(sesopDir)
--the new split point is 'x'.
--The next time this function is called will be with 'x'.
--The next time we will change a node, it will get this 'x'.
state.splitPoint:copy(x)
state.sesopIteration = state.sesopIteration + 1
return x,fHist
end
return optim