试图在此处使渴望的exec模型与LR衰减一起工作,但没有成功。这似乎是一个错误,因为学习速率衰减张量似乎没有更新。如果我想念什么,你能帮上忙吗?谢谢。
下面的代码正在学习一些单词嵌入。但是,学习率衰减部分根本不起作用。
class Word2Vec(tf.keras.Model):
def __init__(self, vocab_size, embed_size, num_sampled=NUM_SAMPLED):
self.vocab_size = vocab_size
self.num_sampled = num_sampled
self.embed_matrix = tfe.Variable(tf.random_uniform(
[vocab_size, embed_size]), name="embedding_matrix")
self.nce_weight = tfe.Variable(tf.truncated_normal(
[vocab_size, embed_size],
stddev=1.0 / (embed_size ** 0.5)), name="weights")
self.nce_bias = tfe.Variable(tf.zeros([vocab_size]), name="biases")
def compute_loss(self, center_words, target_words):
"""Computes the forward pass of word2vec with the NCE loss."""
embed = tf.nn.embedding_lookup(self.embed_matrix, center_words)
loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.nce_weight,
biases=self.nce_bias,
labels=target_words,
inputs=embed,
num_sampled=self.num_sampled,
num_classes=self.vocab_size))
return loss
def gen():
yield from word2vec_utils.batch_gen(DOWNLOAD_URL, EXPECTED_BYTES,
VOCAB_SIZE, BATCH_SIZE, SKIP_WINDOW,
VISUAL_FLD)
def main():
dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
(tf.TensorShape([BATCH_SIZE]),
tf.TensorShape([BATCH_SIZE, 1])))
global_step = tf.train.get_or_create_global_step()
starter_learning_rate = 1.0
end_learning_rate = 0.01
decay_steps = 1000
learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step.numpy(),
decay_steps, end_learning_rate,
power=0.5)
train_writer = tf.contrib.summary.create_file_writer('./checkpoints')
train_writer.set_as_default()
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.95)
model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)
total_loss = 0.0 # for average loss in the last SKIP_STEP steps
checkpoint_dir = "./checkpoints/"
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
root = tfe.Checkpoint(optimizer=optimizer,
model=model,
optimizer_step=tf.train.get_or_create_global_step())
while global_step < NUM_TRAIN_STEPS:
for center_words, target_words in tfe.Iterator(dataset):
with tf.contrib.summary.record_summaries_every_n_global_steps(100):
if global_step >= NUM_TRAIN_STEPS:
break
loss_batch, grads = grad_fn(center_words, target_words)
tf.contrib.summary.scalar('loss', loss_batch)
tf.contrib.summary.scalar('learning_rate', learning_rate)
# print(grads)
# print(len(grads))
total_loss += loss_batch
optimizer.apply_gradients(grads, global_step)
if (global_step.numpy() + 1) % SKIP_STEP == 0:
print('Average loss at step {}: {:5.1f}'.format(
global_step.numpy(), total_loss / SKIP_STEP))
total_loss = 0.0
root.save(file_prefix=checkpoint_prefix)
if __name__ == '__main__':
main()
Run Code Online (Sandbox Code Playgroud)
请注意,启用急切执行后,这些tf.Tensor对象表示具体的值(与将在Session.run()调用)。
结果,在上面的代码段中,该行:
learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step.numpy(),
decay_steps, end_learning_rate,
power=0.5)
Run Code Online (Sandbox Code Playgroud)
使用global_step调用时和创建优化器时使用:
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.95)
Run Code Online (Sandbox Code Playgroud)
它的学习率是固定的。
要降低学习率,您需要tf.train.polynomial_decay反复调用(使用的更新值global_step)。一种实现方法是使用类似以下的方法来复制RNN示例中完成的操作:
starter_learning_rate = 1.0
learning_rate = tfe.Variable(starter_learning_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.95)
while global_step < NUM_TRAIN_STEPS:
# ....
learning_rate.assign(tf.train.polynomial_decay(starter_learning_rate, global_step, decay_steps, end_learning_rate, power=0.5))
Run Code Online (Sandbox Code Playgroud)
这样,您已将捕获learning_rate到可以更新的变量中。此外,learning_rate在检查点中同时包含当前内容也很简单(在创建Checkpoint对象)。
希望能有所帮助。
| 归档时间: |
|
| 查看次数: |
1021 次 |
| 最近记录: |