mirror of https://github.com/tensorflow/models.git
289 lines
11 KiB
Python
289 lines
11 KiB
Python
# Copyright 2025 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Dataclasses for learning rate schedule config."""
|
|
from typing import List, Optional
|
|
|
|
import dataclasses
|
|
from official.modeling.hyperparams import base_config
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class ConstantLrConfig(base_config.Config):
|
|
"""Configuration for constant learning rate.
|
|
|
|
This class is a containers for the constant learning rate decay configs.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to Constant.
|
|
learning_rate: A float. The learning rate. Defaults to 0.1.
|
|
"""
|
|
name: str = 'Constant'
|
|
learning_rate: float = 0.1
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class StepwiseLrConfig(base_config.Config):
|
|
"""Configuration for stepwise learning rate decay.
|
|
|
|
This class is a container for the piecewise constant learning rate scheduling
|
|
configs. It will configure an instance of PiecewiseConstantDecay keras
|
|
learning rate schedule.
|
|
|
|
An example (from keras docs): use a learning rate that's 1.0 for the first
|
|
100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps.
|
|
```python
|
|
boundaries: [100000, 110000]
|
|
values: [1.0, 0.5, 0.1]
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to PiecewiseConstant.
|
|
boundaries: A list of ints of strictly increasing entries. Defaults to None.
|
|
values: A list of floats that specifies the values for the intervals defined
|
|
by `boundaries`. It should have one more element than `boundaries`.
|
|
The learning rate is computed as follows: [0, boundaries[0]] ->
|
|
values[0] [boundaries[0], boundaries[1]] -> values[1]
|
|
[boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n],
|
|
end] -> values[n+1] Defaults to None.
|
|
offset: An int. The offset applied to steps. Defaults to 0.
|
|
"""
|
|
name: str = 'PiecewiseConstantDecay'
|
|
boundaries: Optional[List[int]] = None
|
|
values: Optional[List[float]] = None
|
|
offset: int = 0
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class ExponentialLrConfig(base_config.Config):
|
|
"""Configuration for exponential learning rate decay.
|
|
|
|
This class is a containers for the exponential learning rate decay configs.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to ExponentialDecay.
|
|
initial_learning_rate: A float. The initial learning rate. Defaults to None.
|
|
decay_steps: A positive integer that is used for decay computation. Defaults
|
|
to None.
|
|
decay_rate: A float. Defaults to None.
|
|
staircase: A boolean, if true, learning rate is decreased at discreate
|
|
intervals. Defaults to False.
|
|
offset: An int. The offset applied to steps. Defaults to 0.
|
|
"""
|
|
name: str = 'ExponentialDecay'
|
|
initial_learning_rate: Optional[float] = None
|
|
decay_steps: Optional[int] = None
|
|
decay_rate: Optional[float] = None
|
|
staircase: Optional[bool] = None
|
|
offset: int = 0
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class PolynomialLrConfig(base_config.Config):
|
|
"""Configuration for polynomial learning rate decay.
|
|
|
|
This class is a containers for the polynomial learning rate decay configs.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to PolynomialDecay.
|
|
initial_learning_rate: A float. The initial learning rate. Defaults to None.
|
|
decay_steps: A positive integer that is used for decay computation. Defaults
|
|
to None.
|
|
end_learning_rate: A float. The minimal end learning rate.
|
|
power: A float. The power of the polynomial. Defaults to linear, 1.0.
|
|
cycle: A boolean, whether or not it should cycle beyond decay_steps.
|
|
Defaults to False.
|
|
offset: An int. The offset applied to steps. Defaults to 0.
|
|
"""
|
|
name: str = 'PolynomialDecay'
|
|
initial_learning_rate: Optional[float] = None
|
|
decay_steps: Optional[int] = None
|
|
end_learning_rate: float = 0.0001
|
|
power: float = 1.0
|
|
cycle: bool = False
|
|
offset: int = 0
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class CosineLrConfig(base_config.Config):
|
|
"""Configuration for Cosine learning rate decay.
|
|
|
|
This class is a containers for the cosine learning rate decay configs,
|
|
tf_keras.experimental.CosineDecay.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to CosineDecay.
|
|
initial_learning_rate: A float. The initial learning rate. Defaults to None.
|
|
decay_steps: A positive integer that is used for decay computation. Defaults
|
|
to None.
|
|
alpha: A float. Minimum learning rate value as a fraction of
|
|
initial_learning_rate.
|
|
offset: An int. The offset applied to steps. Defaults to 0.
|
|
"""
|
|
name: str = 'CosineDecay'
|
|
initial_learning_rate: Optional[float] = None
|
|
decay_steps: Optional[int] = None
|
|
alpha: float = 0.0
|
|
offset: int = 0
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class DirectPowerLrConfig(base_config.Config):
|
|
"""Configuration for DirectPower learning rate decay.
|
|
|
|
This class configures a schedule following follows lr * (step)^power.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to DirectPowerDecay.
|
|
initial_learning_rate: A float. The initial learning rate. Defaults to None.
|
|
power: A float. Defaults to -0.5, for sqrt decay.
|
|
"""
|
|
name: str = 'DirectPowerDecay'
|
|
initial_learning_rate: Optional[float] = None
|
|
power: float = -0.5
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class PowerAndLinearDecayLrConfig(base_config.Config):
|
|
"""Configuration for DirectPower learning rate decay.
|
|
|
|
The schedule has the following behavoir.
|
|
Let offset_step = step - offset.
|
|
1) offset_step < 0, the actual learning rate equals initial_learning_rate.
|
|
2) offset_step <= total_decay_steps * (1 - linear_decay_fraction), the
|
|
actual learning rate equals lr * offset_step^power.
|
|
3) total_decay_steps * (1 - linear_decay_fraction) <= offset_step <
|
|
total_decay_steps, the actual learning rate equals lr * offset_step^power *
|
|
(total_decay_steps - offset_step) / (total_decay_steps *
|
|
linear_decay_fraction).
|
|
4) offset_step >= total_decay_steps, the actual learning rate equals zero.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to
|
|
PowerAndLinearDecay.
|
|
initial_learning_rate: A float. The initial learning rate. Defaults to None.
|
|
total_decay_steps: An int. The total number of steps for power + linear
|
|
decay. Defaults to None.
|
|
power: A float. The order of the polynomial. Defaults to -0.5, for sqrt
|
|
decay.
|
|
linear_decay_fraction: A float. In the last `linear_decay_fraction` steps,
|
|
the learning rate will be multiplied by a linear decay. Defaults to 0.1.
|
|
offset: An int. The offset applied to steps. Defaults to 0.
|
|
"""
|
|
name: str = 'PowerAndLinearDecay'
|
|
initial_learning_rate: Optional[float] = None
|
|
total_decay_steps: Optional[int] = None
|
|
power: float = -0.5
|
|
linear_decay_fraction: float = 0.1
|
|
offset: int = 0
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class PowerDecayWithOffsetLrConfig(base_config.Config):
|
|
"""Configuration for power learning rate decay with step offset.
|
|
|
|
Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`.
|
|
Otherwise, learning rate equals to lr * (step - offset)^power.
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to
|
|
PowerDecayWithOffset.
|
|
initial_learning_rate: A float. The initial learning rate. Defaults to None.
|
|
power: A float. Defaults to -0.5, for sqrt decay.
|
|
offset: An integer. Power decay happens after `offset` steps.
|
|
pre_offset_learning_rate: A float. The constant learning rate before
|
|
`offset` steps.
|
|
"""
|
|
name: str = 'PowerDecayWithOffset'
|
|
initial_learning_rate: Optional[float] = None
|
|
power: float = -0.5
|
|
offset: int = 0
|
|
pre_offset_learning_rate: float = 1.0e6
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class StepCosineLrConfig(base_config.Config):
|
|
"""Configuration for stepwise learning rate decay.
|
|
|
|
This class is a container for the piecewise cosine learning rate scheduling
|
|
configs. It will configure an instance of StepCosineDecayWithOffset keras
|
|
learning rate schedule.
|
|
|
|
```python
|
|
boundaries: [100000, 110000]
|
|
values: [1.0, 0.5]
|
|
lr_decayed_fn = (
|
|
lr_schedule.StepCosineDecayWithOffset(
|
|
boundaries,
|
|
values))
|
|
```
|
|
from 0 to 100000 step, it will cosine decay from 1.0 to 0.5
|
|
from 100000 to 110000 step, it cosine decay from 0.5 to 0.0
|
|
|
|
Attributes:
|
|
name: The name of the learning rate schedule. Defaults to PiecewiseConstant.
|
|
boundaries: A list of ints of strictly increasing entries. Defaults to None.
|
|
values: A list of floats that specifies the values for the intervals defined
|
|
by `boundaries`. It should have one more element than `boundaries`.
|
|
The learning rate is computed as follows:
|
|
[0, boundaries[0]] -> cosine from values[0] to values[1]
|
|
[boundaries[0], boundaries[1]] -> values[1] to values[2]
|
|
...
|
|
[boundaries[n-1], boundaries[n]] -> values[n] to values[n+1]
|
|
[boundaries[n], end] -> values[n+1] to 0.
|
|
offset: An int. The offset applied to steps. Defaults to 0.
|
|
"""
|
|
name: str = 'StepCosineDecayWithOffset'
|
|
boundaries: Optional[List[int]] = None
|
|
values: Optional[List[float]] = None
|
|
offset: int = 0
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class LinearWarmupConfig(base_config.Config):
|
|
"""Configuration for linear warmup schedule config.
|
|
|
|
This class is a container for the linear warmup schedule configs.
|
|
Warmup_learning_rate is the initial learning rate, the final learning rate of
|
|
the warmup period is the learning_rate of the optimizer in use. The learning
|
|
rate at each step linearly increased according to the following formula:
|
|
warmup_learning_rate = warmup_learning_rate +
|
|
step / warmup_steps * (final_learning_rate - warmup_learning_rate).
|
|
Using warmup overrides the learning rate schedule by the number of warmup
|
|
steps.
|
|
|
|
Attributes:
|
|
name: The name of warmup schedule. Defaults to linear.
|
|
warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0.
|
|
warmup_steps: Warmup steps. Defaults to None.
|
|
"""
|
|
name: str = 'linear'
|
|
warmup_learning_rate: float = 0
|
|
warmup_steps: Optional[int] = None
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class PolynomialWarmupConfig(base_config.Config):
|
|
"""Configuration for linear warmup schedule config.
|
|
|
|
This class is a container for the polynomial warmup schedule configs.
|
|
|
|
Attributes:
|
|
name: The name of warmup schedule. Defaults to Polynomial.
|
|
power: Polynomial power. Defaults to 1.
|
|
warmup_steps: Warmup steps. Defaults to None.
|
|
"""
|
|
name: str = 'polynomial'
|
|
power: float = 1
|
|
warmup_steps: Optional[int] = None
|