callyblog
def noam_scheme(global_step, num_warmup_steps, num_train_steps, init_lr, warmup=True):
    """
    decay learning rate
    if warmup > global step, the learning rate will be global_step/num_warmup_steps * init_lr
    if warmup < global step, the learning rate will be polynomial decay
    :param global_step: global steps
    :param num_warmup_steps: number of warm up steps
    :param num_train_steps: number of train steps
    :param init_lr: initial learning rate
    :param warmup: if True, it will warm up learning rate
    :return: learning rate
    """
    learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)
    learning_rate = tf.train.polynomial_decay(learning_rate,
                                                global_step,
                                                num_train_steps,
                                                end_learning_rate=0.0,
                                                power=1.0,
                                                cycle=False)

    if warmup:
        global_steps_int = tf.cast(global_step, tf.int32)
        warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)

        global_steps_float = tf.cast(global_steps_int, tf.float32)
        warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

        warmup_percent_done = global_steps_float / warmup_steps_float
        warmup_learning_rate = init_lr * warmup_percent_done

        is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
        learning_rate = ((1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate)

    return learning_rate

 

分类:

技术点:

相关文章:

  • 2021-08-20
  • 2022-01-11
  • 2021-03-28
  • 2021-07-18
  • 2021-11-01
  • 2021-06-29
  • 2021-09-29
  • 2021-04-12
猜你喜欢
  • 2021-12-28
  • 2021-07-07
  • 2021-04-25
  • 2021-11-01
  • 2022-12-23
  • 2022-12-23
  • 2021-07-23
相关资源
相似解决方案