我正在用 Keras 实现一个简单的 CNN,并尝试在 Adam 中设置逐层学习率。我参考了这个教程。修改后的Adam如下图:
class Adam_lr_mult(Optimizer):
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
epsilon=None, decay=0., amsgrad=False,
multipliers=None, debug_verbose=True,**kwargs):
...'''Omitted'''
self.multipliers = multipliers
self.layerwise_lr={} # record layer-wise lr
self.debug_verbose = debug_verbose
@interfaces.legacy_get_updates_support
def get_updates(self, loss, params):
grads = self.get_gradients(loss, params)
self.updates = [K.update_add(self.iterations, 1)]
lr = self.lr
if self.initial_decay > 0:
lr *= (1. / (1. + self.decay * K.cast(self.iterations,
K.dtype(self.decay))))
t = K.cast(self.iterations, K.floatx()) + 1
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
(1. - K.pow(self.beta_1, …Run Code Online (Sandbox Code Playgroud)