-
Notifications
You must be signed in to change notification settings - Fork 2
Description
The multistart descent implemented in
DGCG_algorithm/src/insertion_step.py
Lines 63 to 188 in e5d2a31
def multistart_descent(current_measure): | |
""" Uses multistart descent to search for the global minimizing curve. | |
The multistart method corresponds to descent multiple randomly generated | |
curves and to record the resulting stationary point of this descent | |
expecting to find with this method the global minimizing curve. | |
There are 2 main details: | |
- To decrease the number of descents, this method routinely checks | |
if the current descended curve is close to the already known ones. | |
If so, it stops and discards the curve. | |
- The descented curves are proposed by the insertion_mod module. | |
It consists of: already known curves, crossover curves, random ones. | |
-------------------- | |
Inputs: | |
w_t (operators.w_t class): | |
representing the dual variable of the problem at this iteration. | |
current_measure (curves.measure class): | |
the current iterate of the algorithm. | |
Output: | |
stationary_curves (list of curves.curve): | |
list of curves, found stationary points of F(γ). | |
energy_curves (list of floats, ordered): | |
respective energy of the stationary_curves. | |
Kwargs: None | |
""" | |
logger = config.logger | |
# needed initializations | |
w_t = op.w_t(current_measure) | |
energy_curves = [] | |
stationary_curves = [] | |
# load configuration parameters | |
max_restarts = config.insertion_max_restarts | |
insertion_min_restarts = config.insertion_min_restarts | |
multistart_early_stop = config.multistart_early_stop # this is a function | |
prop_max_iter = config.multistart_proposition_max_iter | |
# | |
min_energy = np.inf | |
tries = 0 | |
while tries <= insertion_min_restarts or \ | |
tries <= min(max_restarts, | |
multistart_early_stop(tries, len(energy_curves))): | |
if len(energy_curves) > 0: | |
min_energy = min(energy_curves) | |
logger.status([1, 1, 1], tries, stationary_curves) | |
# The insertion module proposes curves to descend with negative energy | |
proposed_energy = np.inf | |
num_iter = 0 | |
while proposed_energy >= 0 and num_iter < prop_max_iter: | |
new_curve = insertion_mod.propose(w_t, stationary_curves, energy_curves) | |
proposed_energy = opt.F(new_curve, w_t) | |
num_iter += 1 | |
if num_iter == prop_max_iter: | |
raise Exception('Reached maximum number of tolerated proposed ' + | |
'curves. Please inspect insertion_mod.propose ' + | |
'method') | |
# descent the curve | |
descent_iters = 0 | |
descent_max_iter = config.multistart_descent_max_iter | |
descent_soft_max_iter = config.multistart_descent_soft_max_iter | |
soft_max_threshold = config.multistart_descent_soft_max_threshold | |
stepsize = config.multistart_descent_init_step | |
lim_stepsize = config.multistart_descent_limit_stepsize | |
inter_iters = config.multistart_inter_iteration_checkup | |
# | |
while descent_iters < descent_max_iter and stepsize > lim_stepsize: | |
# This while-loop applies the gradient descent on curves, | |
# while simultaneously it checks in intermediates steps if | |
# certain conditions are satisfied. These are the possible cases: | |
# case 1: A stationary point is found. This is captured when the | |
# stepsize goes below lim_stepsize. | |
# case 2: The descended curve got at some point close to the | |
# stationary set. The while breaks. | |
# case 3: The descended curve is taking too much time to converge | |
# while not getting close enough to the taboo set. | |
# (this is if descent_soft_max_iter is reached) | |
# case 3.1: If the value F(γ) is 0.9 close to the best known case, | |
# the descent continuous up to descent_max_iter is reached. | |
# case 3.2: If the value F(γ) is not close enought to the best | |
# known case, the while loop is ended. | |
close_to_known_set = False | |
new_curve, stepsize = gradient_descent(new_curve, w_t, | |
max_iter=inter_iters, | |
init_step=stepsize) | |
descent_iters += inter_iters | |
new_curve_energy = opt.F(new_curve, w_t) | |
logger.status([1, 1, 2]) | |
if is_close_to_stationaries(new_curve, new_curve_energy, | |
stationary_curves, energy_curves): | |
# if the new_curve is too close to a stationary curve, break and discard | |
logger.status([1, 1, 3]) | |
close_to_known_set = True | |
if descent_iters == inter_iters: | |
# It just converged on the first set of iterations, does not | |
# count toward the iteration count | |
tries = tries - 1 | |
break | |
if descent_iters >= descent_soft_max_iter: | |
# check if the curve is getting somewhere good | |
if new_curve_energy < min_energy*soft_max_threshold: | |
# It is going good | |
pass | |
else: | |
# Just introduce it as it is into the stationary curve set | |
logger.status([1, 1, 4], new_curve_energy, min_energy) | |
# this is a way to exit simulating that the curve converged | |
stepsize = lim_stepsize/2 | |
if close_to_known_set: | |
pass | |
else: | |
# In all the other cases, the descended curve is inserted in | |
# the taboo set. | |
# We insert them in a sorted fashion | |
insert_index = np.searchsorted(energy_curves, new_curve_energy) | |
energy_curves.insert(insert_index, new_curve_energy) | |
stationary_curves.insert(insert_index, new_curve) | |
# the insertion mod needs to know the order of the curves | |
insertion_mod.update_crossover_memory(insert_index) | |
if descent_iters >= descent_max_iter: | |
# Reached maximum of iterations, added to stationary curves set | |
logger.status([1, 1, 5]) | |
elif stepsize <= lim_stepsize: | |
logger.status([1, 1, 7], stationary_curves) | |
else: | |
raise Exception('Unexpected descent case') | |
tries = tries+1 | |
return stationary_curves, energy_curves |
can be implemented such that it allows simultaneous multistarts i.e. paralelization or multithreading
The main difficulty is that the multistart descent keeps track of the found stationary points, and it uses them to
accelerate the descent of other starts by checking if the current descended curve is close to these stationary curves
(avoiding the waste of computer resourced descending a curve towards an already known stationary point)
When multithreading, this knowledge of the stationary points should be shared among threads.
Another acceleration possibility is to throw many random curves and briefly descend them, then switch between partially descended curves using their current "energy" F(\gamma)
as decision criteria. The smaller this value, the better.