|
- # Copyright 2022 Huawei Technologies Co., Ltd
- # Copyright 2022 Aerospace Information Research Institute,
- # Chinese Academy of Sciences.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """lr schedule"""
- import math
-
- import numpy as np
- from mindspore import Tensor
- from mindspore.ops import operations as P
- import mindspore.common.dtype as mstype
- from mindspore.nn.learning_rate_schedule import LearningRateSchedule
-
- def build_lr(config):
- """build lr"""
- lr_config = config.lr_schedule
- device_num = config.device_num
- batch_size = config.train_config.batch_size
- _check_lr_config(lr_config, device_num=device_num, batch_size=batch_size, arch=config.arch)
- total_epochs = config.train_config.epoch
- warmup_epochs = lr_config.warmup_epochs
- steps_per_epoch = config.data_size
- print(f'steps_per_epoch: {steps_per_epoch}')
- total_steps = total_epochs * steps_per_epoch
- warmup_steps = int(warmup_epochs * steps_per_epoch)
- decay_steps = total_steps - warmup_steps
-
- lr_type = lr_config.lr_type
- lr = None
- if lr_type == 'cosine_decay':
- lr = CosineDecayLR(
- lr_config.min_lr, lr_config.base_lr, decay_steps)
- if lr_type == 'warmup':
- lr = WarmUpLR(
- lr_config.base_lr, warmup_steps, lr_config.warmup_lr)
- if lr_type == 'warmup_cosine_decay':
- lr = WarmUpCosineDecayV1(
- lr_config.min_lr, lr_config.base_lr, warmup_steps, decay_steps, lr_config.warmup_lr)
- if lr_type == 'warmup_cosine_decay_simmim':
- lr = WarmUpCosineDecayV2(
- lr_config.base_lr, total_steps, lr_config.min_lr,
- warmup_t=warmup_steps, warmup_lr_init=lr_config.warmup_lr)
- if lr_type == 'warmup_multistep_decay':
- lr = WarmUpMultiStepDecay(lr_config.base_lr, warmup_steps, lr_config.warmup_lr,
- lr_config.factor, lr_config.multi_epochs, steps_per_epoch)
- return lr
-
-
- def _check_lr_config(config, device_num=1, batch_size=128, arch="simmim"):
- if arch in ('simmim', "ringmo", "ringmo_mm"):
- config.base_lr = (config.base_lr * device_num * batch_size) / 512
- config.min_lr = (config.min_lr * device_num * batch_size) / 512
- config.warmup_lr = (config.warmup_lr * device_num * batch_size) / 512
- if arch == 'mae':
- # base_lr(5e-4) * device_num * batch_size / 256
- config.base_lr = (config.base_lr * device_num * batch_size) / 256
-
-
- class LearningRateWiseLayer(LearningRateSchedule):
- def __init__(self, base_lr, lr_scale):
- super(LearningRateWiseLayer, self).__init__()
- self.base_lr = base_lr
- self.lr_scale = lr_scale
-
- def construct(self, global_step):
- lr = self.base_lr(global_step)
- return self.lr_scale * lr
-
-
- class WarmUpCosineDecayV1(LearningRateSchedule):
- def __init__(self, min_lr, max_lr, warmup_steps, decay_steps, start_warmup_value=0.):
- super(WarmUpCosineDecayV1, self).__init__()
- self.schedule = Tensor([lr_adjust(max_lr, min_lr, i, warmup_steps, decay_steps, start_warmup_value)
- for i in range(warmup_steps + decay_steps)])
-
- def construct(self, global_step):
- return self.schedule[global_step]
-
-
- class MultiEpochsDecayLR(LearningRateSchedule): # for simmim vit.
- """MultiEpochsDecayLR"""
- def __init__(self, learning_rate, multi_epochs, steps_per_epoch=1, factor=10):
- super(MultiEpochsDecayLR, self).__init__()
- if not isinstance(multi_epochs, (list, tuple)):
- raise TypeError("multi_epochs must be list or tuple.")
- self.multi_epochs = Tensor(np.array(multi_epochs, dtype=np.float32) * steps_per_epoch)
- self.num = len(multi_epochs)
- self.start_learning_rate = learning_rate
- self.factor = factor
- self.pow = P.Pow()
- self.cast = P.Cast()
- self.less_equal = P.LessEqual()
- self.reduce_sum = P.ReduceSum()
-
- def construct(self, global_step):
- cur_step = self.cast(global_step, mstype.float32)
- epochs = self.cast(self.less_equal(self.multi_epochs, cur_step), mstype.float32)
- lr = self.start_learning_rate / self.pow(self.factor, self.reduce_sum(epochs, ()))
- return lr
-
-
- class WarmUpMultiStepDecay(LearningRateSchedule):
- """WarmUpMultiStepDecay"""
- def __init__(self, base_lr, warmup_steps, start_warmup_value,
- factor=10, multi_epochs=None, steps_per_epoch=1):
- super(WarmUpMultiStepDecay, self).__init__()
- if multi_epochs is None:
- multi_epochs = [700,]
- self.warmup_lr = WarmUpLR(base_lr, warmup_steps + 1, start_warmup_value)
- self.multisteps_lr = MultiEpochsDecayLR(base_lr, multi_epochs, steps_per_epoch, factor)
-
- self.warmup_steps = warmup_steps
-
- def construct(self, global_step):
- if global_step < self.warmup_steps:
- lr = self.warmup_lr(global_step)
- else:
- lr = self.multisteps_lr(global_step)
-
- return lr
-
-
- class WarmUpLR(LearningRateSchedule):
- def __init__(self, base_lr, warmup_steps, start_warmup_value=0.):
- super(WarmUpLR, self).__init__()
- self.warmup_schedule = Tensor(
- np.linspace(start_warmup_value, base_lr, warmup_steps), mstype.float32)
-
- def construct(self, global_step):
- return self.warmup_schedule[global_step]
-
-
- class CosineDecayLR(LearningRateSchedule):
- def __init__(self, min_lr, max_lr, decay_steps):
- super(CosineDecayLR, self).__init__()
- self.cosine_schedule = Tensor(
- [min_lr + 0.5 * (max_lr - min_lr) * (1.0 + math.cos(math.pi * i / decay_steps))
- for i in range(decay_steps)], mstype.float32)
-
- def construct(self, global_step):
- return self.cosine_schedule[global_step]
-
-
- def lr_adjust(max_lr, min_lr, step, warmup_steps, decay_steps, start_warmup_value=0.):
- if step < warmup_steps:
- lr = max_lr * step / warmup_steps + start_warmup_value
- else:
- lr = min_lr + (max_lr - min_lr) * 0.5 * (
- 1. + math.cos(math.pi * (step - warmup_steps) / decay_steps))
- return lr
-
-
- class WarmUpCosineDecayV2(LearningRateSchedule):
- """WarmUpCosineDecayV2"""
- def __init__(self,
- base_lr: float,
- t_initial: int,
- lr_min: float = 0.,
- cycle_mul: float = 1.,
- cycle_decay: float = 1.,
- cycle_limit: int = 1,
- warmup_t=0,
- warmup_lr_init=0.,
- warmup_prefix=False,
- t_in_epochs=True,
- k_decay=1.0) -> None:
- super(WarmUpCosineDecayV2, self).__init__()
-
- assert t_initial > 0
- assert lr_min >= 0
-
- self.t_initial = t_initial
- self.lr_min = lr_min
- self.cycle_mul = cycle_mul
- self.cycle_decay = cycle_decay
- self.cycle_limit = cycle_limit
- self.warmup_t = warmup_t
- self.warmup_lr_init = warmup_lr_init
- self.warmup_prefix = warmup_prefix
- self.t_in_epochs = t_in_epochs
- self.k_decay = k_decay
-
- self.base_values = [base_lr,]
- if self.warmup_t:
- self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values]
- # super().update_groups(self.warmup_lr_init)
- else:
- self.warmup_steps = [1 for _ in self.base_values]
-
- self.lr_tensor = Tensor([self.get_epoch_values(i) for i in range(self.t_initial)], mstype.float32)
-
- def _get_lr(self, t):
- """get lr"""
- if t < self.warmup_t:
- lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps]
- else:
- if self.warmup_prefix:
- t = t - self.warmup_t
-
- if self.cycle_mul != 1:
- i1 = math.floor(math.log(1 - t / self.t_initial * (1 - self.cycle_mul), self.cycle_mul))
- t_i = self.cycle_mul ** i1 * self.t_initial
- t_curr = t - (1 - self.cycle_mul ** i1) / (1 - self.cycle_mul) * self.t_initial
- else:
- i1 = t // self.t_initial
- t_i = self.t_initial
- t_curr = t - (self.t_initial * i1)
-
- gamma = self.cycle_decay ** i1
- lr_max_values = [v * gamma for v in self.base_values]
- k = self.k_decay
-
- if i1 < self.cycle_limit:
- lrs = [
- self.lr_min + 0.5 * (lr_max - self.lr_min) * (1 + math.cos(math.pi * t_curr ** k / t_i ** k))
- for lr_max in lr_max_values
- ]
- else:
- lrs = [self.lr_min for _ in self.base_values]
-
- return lrs
-
- def get_epoch_values(self, epoch: int):
- if self.t_in_epochs:
- return self._get_lr(epoch)
- return None
-
- def get_update_values(self, num_updates: int):
- if not self.t_in_epochs:
- return self._get_lr(num_updates)
- return None
-
- def get_cycle_length(self, cycles=0):
- cycles = max(1, cycles or self.cycle_limit)
- if self.cycle_mul == 1.0:
- return self.t_initial * cycles
- return int(math.floor(-self.t_initial * (self.cycle_mul ** cycles - 1) / (1 - self.cycle_mul)))
-
- def construct(self, global_step):
- return self.lr_tensor[global_step][0]
- # return self.get_epoch_values(global_step)
-
-
- if __name__ == "__main__":
- # from mindspore import context, Tensor
- # context.set_context(device_target="Ascend", device_id=1)
- # LR = LearningRate(0.001,2.5e-7,100,20,1,None,10,2.5e-7,2,'warmup_cosine_decay')
- # ms = [LR(Tensor(i, mstype.int32)).asnumpy() for i in range(10)]
- # pt = [lr_adjust(0.0024, 0., i, 5, 5) for i in range(10)]
- # print("ms", ms)
- # print("pt", pt)
- # LR = WarmUpMultiStepDecay(0.001, 10, 5e-7, 10, [700, ])
-
- LR = WarmUpCosineDecayV2(
- 0.001,
- t_initial=100,
- # t_mul=1.,
- lr_min=2.5e-7,
- warmup_lr_init=2.5e-7,
- warmup_t=20,
- cycle_limit=1,
- t_in_epochs=True,
- )
- for i in range(100):
- print(i, LR(i))
|