From 876fcd8906fc935589e87b168581832a98417b3b Mon Sep 17 00:00:00 2001 From: lwh Date: Mon, 25 Jul 2022 19:39:27 +0800 Subject: [PATCH 01/27] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9=E7=BD=91?= =?UTF-8?q?=E7=BB=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/birealnet/birealnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index afcd59a..1f73e99 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -63,7 +63,7 @@ class HardBinaryConv(nn.Cell): self.padding = padding self.number_of_weights = in_chn * out_chn * kernel_size * kernel_size self.shape = (out_chn, in_chn, kernel_size, kernel_size) - self.weights = Parameter(ops.UniformReal()((self.number_of_weights,1)), requires_grad=True) + self.weights = Parameter(ops.UniformReal()((self.number_of_weights,1)) * 0.001, requires_grad=True) self.conv2d = ops.Conv2D(out_channel=out_chn, kernel_size=3, stride=self.stride, pad=self.padding, pad_mode="pad") self.mean = ops.ReduceMean(keep_dims=True) -- 2.34.1 From 41b7a13733576a01a3d4acfa7941e374150cfbd4 Mon Sep 17 00:00:00 2001 From: lwh Date: Tue, 26 Jul 2022 21:41:08 +0800 Subject: [PATCH 02/27] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E2=80=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/birealnet/birealnet.py | 6 +- src/models/birealnet/birealnet1.py | 170 +++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 3 deletions(-) create mode 100644 src/models/birealnet/birealnet1.py diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index 1f73e99..dda3714 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -18,7 +18,7 @@ class AdaptiveAvgPool2d(nn.Cell): self.mean = ops.ReduceMean(True) def construct(self, x): - x = self.mean(x, (2, 3)) + x = self.mean(x[:, :, 0:7, 0:7 ], (-2,-1)) return x @@ -30,7 +30,7 @@ def conv3x3(in_planes, out_planes, stride=1): def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" - + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, pad_mode="valid", has_bias=False) @@ -121,7 +121,7 @@ class BiRealNet(nn.Cell): self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = AdaptiveAvgPool2d() - self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True) + self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True, weight_init='he_uniform', bias_init='uniform') def _make_layer(self, block, planes, blocks, stride=1): downsample = None diff --git a/src/models/birealnet/birealnet1.py b/src/models/birealnet/birealnet1.py new file mode 100644 index 0000000..1f73e99 --- /dev/null +++ b/src/models/birealnet/birealnet1.py @@ -0,0 +1,170 @@ +import os +from mindspore import nn, ops, Parameter +from mindspore.common import dtype as mstype + +__all__ = ['birealnet18', 'birealnet34'] + + +if os.getenv("DEVICE_TARGET") == "Ascend" and int(os.getenv("DEVICE_NUM")) > 1: + BatchNorm2d = nn.SyncBatchNorm +else: + BatchNorm2d = nn.BatchNorm2d + + +class AdaptiveAvgPool2d(nn.Cell): + """AdaptiveAvgPool2d""" + def __init__(self): + super(AdaptiveAvgPool2d, self).__init__() + self.mean = ops.ReduceMean(True) + + def construct(self, x): + x = self.mean(x, (2, 3)) + return x + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, pad_mode="pad", has_bias=False) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, pad_mode="valid", has_bias=False) + + +class BinaryActivation(nn.Cell): + + def __init__(self): + super(BinaryActivation, self).__init__() + + def construct(self, x): + out_forward = ops.Sign()(x) + #out_e1 = (x^2 + 2*x) + #out_e2 = (-x^2 + 2*x) + out_e_total = 0 + mask1 = x < -1 + mask2 = x < 0 + mask3 = x < 1 + out1 = (-1) * ops.Cast()(mask1, mstype.float32) + (x*x + 2*x) * (1-ops.Cast()(mask1, mstype.float32)) + out2 = out1 * ops.Cast()(mask2, mstype.float32) + (-x*x + 2*x) * (1-ops.Cast()(mask2, mstype.float32)) + out3 = out2 * ops.Cast()(mask3, mstype.float32) + 1 * (1- ops.Cast()(mask3, mstype.float32)) + out = ops.stop_gradient(out_forward) - ops.stop_gradient(out3) + out3 + + return out + + +class HardBinaryConv(nn.Cell): + + def __init__(self, in_chn, out_chn, kernel_size=3, stride=1, padding=1): + super(HardBinaryConv, self).__init__() + self.stride = stride + self.padding = padding + self.number_of_weights = in_chn * out_chn * kernel_size * kernel_size + self.shape = (out_chn, in_chn, kernel_size, kernel_size) + self.weights = Parameter(ops.UniformReal()((self.number_of_weights,1)) * 0.001, requires_grad=True) + self.conv2d = ops.Conv2D(out_channel=out_chn, kernel_size=3, stride=self.stride, pad=self.padding, pad_mode="pad") + self.mean = ops.ReduceMean(keep_dims=True) + + + def construct(self, x): + real_weights = ops.Reshape()(self.weights, self.shape) + scaling_factor = self.mean(self.mean(self.mean(ops.Abs()(real_weights), 3), 2), 1) + # print(scaling_factor, flush=True) + scaling_factor = ops.stop_gradient(scaling_factor) + binary_weights_no_grad = scaling_factor * ops.Sign()(real_weights) + cliped_weights = ops.clip_by_value(real_weights, -1.0, 1.0) + binary_weights = ops.stop_gradient(binary_weights_no_grad) - ops.stop_gradient(cliped_weights) + cliped_weights + # print(binary_weights, flush=True) + y = self.conv2d(x, binary_weights) + + return y + +class BasicBlock(nn.Cell): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + + self.binary_activation = BinaryActivation() + self.binary_conv = HardBinaryConv(inplanes, planes, stride=stride) + self.bn1 = BatchNorm2d(planes) + + self.downsample = downsample + self.stride = stride + + def construct(self, x): + residual = x + + out = self.binary_activation(x) + out = self.binary_conv(out) + out = self.bn1(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + +class BiRealNet(nn.Cell): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): + super(BiRealNet, self).__init__() + self.inplanes = 64 + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, pad_mode="pad", has_bias=False) + self.bn1 = BatchNorm2d(num_features=64) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = AdaptiveAvgPool2d() + self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.SequentialCell([ + nn.AvgPool2d(kernel_size=2, stride=stride), + conv1x1(self.inplanes, planes * block.expansion), + BatchNorm2d(num_features=planes * block.expansion) + ]) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.SequentialCell(layers) + + def construct(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = ops.Reshape()(x, (x.shape[0], -1)) + x = self.fc(x) + + return x + + +def birealnet18(pretrained=False, **kwargs): + """Constructs a BiRealNet-18 model. """ + model = BiRealNet(BasicBlock, [4, 4, 4, 4], **kwargs) + return model + + +def birealnet34(pretrained=False, **kwargs): + """Constructs a BiRealNet-34 model. """ + model = BiRealNet(BasicBlock, [6, 8, 12, 6], **kwargs) + return model + -- 2.34.1 From 1d049d4d50ba7493f5cf47060c75c21c2ceff528 Mon Sep 17 00:00:00 2001 From: lwh Date: Tue, 26 Jul 2022 22:07:13 +0800 Subject: [PATCH 03/27] =?UTF-8?q?fix:=E4=BF=AE=E6=94=B9=E7=BD=91=E7=BB=9C?= =?UTF-8?q?=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/birealnet/birealnet.py | 4 +- src/models/birealnet/birealnet1.py | 170 ----------------------------- 2 files changed, 2 insertions(+), 172 deletions(-) delete mode 100644 src/models/birealnet/birealnet1.py diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index dda3714..070fb34 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -30,7 +30,7 @@ def conv3x3(in_planes, out_planes, stride=1): def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" - + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, pad_mode="valid", has_bias=False) @@ -121,7 +121,7 @@ class BiRealNet(nn.Cell): self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = AdaptiveAvgPool2d() - self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True, weight_init='he_uniform', bias_init='uniform') + self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True) def _make_layer(self, block, planes, blocks, stride=1): downsample = None diff --git a/src/models/birealnet/birealnet1.py b/src/models/birealnet/birealnet1.py deleted file mode 100644 index 1f73e99..0000000 --- a/src/models/birealnet/birealnet1.py +++ /dev/null @@ -1,170 +0,0 @@ -import os -from mindspore import nn, ops, Parameter -from mindspore.common import dtype as mstype - -__all__ = ['birealnet18', 'birealnet34'] - - -if os.getenv("DEVICE_TARGET") == "Ascend" and int(os.getenv("DEVICE_NUM")) > 1: - BatchNorm2d = nn.SyncBatchNorm -else: - BatchNorm2d = nn.BatchNorm2d - - -class AdaptiveAvgPool2d(nn.Cell): - """AdaptiveAvgPool2d""" - def __init__(self): - super(AdaptiveAvgPool2d, self).__init__() - self.mean = ops.ReduceMean(True) - - def construct(self, x): - x = self.mean(x, (2, 3)) - return x - - -def conv3x3(in_planes, out_planes, stride=1): - """3x3 convolution with padding""" - - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, pad_mode="pad", has_bias=False) - - -def conv1x1(in_planes, out_planes, stride=1): - """1x1 convolution""" - - return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, pad_mode="valid", has_bias=False) - - -class BinaryActivation(nn.Cell): - - def __init__(self): - super(BinaryActivation, self).__init__() - - def construct(self, x): - out_forward = ops.Sign()(x) - #out_e1 = (x^2 + 2*x) - #out_e2 = (-x^2 + 2*x) - out_e_total = 0 - mask1 = x < -1 - mask2 = x < 0 - mask3 = x < 1 - out1 = (-1) * ops.Cast()(mask1, mstype.float32) + (x*x + 2*x) * (1-ops.Cast()(mask1, mstype.float32)) - out2 = out1 * ops.Cast()(mask2, mstype.float32) + (-x*x + 2*x) * (1-ops.Cast()(mask2, mstype.float32)) - out3 = out2 * ops.Cast()(mask3, mstype.float32) + 1 * (1- ops.Cast()(mask3, mstype.float32)) - out = ops.stop_gradient(out_forward) - ops.stop_gradient(out3) + out3 - - return out - - -class HardBinaryConv(nn.Cell): - - def __init__(self, in_chn, out_chn, kernel_size=3, stride=1, padding=1): - super(HardBinaryConv, self).__init__() - self.stride = stride - self.padding = padding - self.number_of_weights = in_chn * out_chn * kernel_size * kernel_size - self.shape = (out_chn, in_chn, kernel_size, kernel_size) - self.weights = Parameter(ops.UniformReal()((self.number_of_weights,1)) * 0.001, requires_grad=True) - self.conv2d = ops.Conv2D(out_channel=out_chn, kernel_size=3, stride=self.stride, pad=self.padding, pad_mode="pad") - self.mean = ops.ReduceMean(keep_dims=True) - - - def construct(self, x): - real_weights = ops.Reshape()(self.weights, self.shape) - scaling_factor = self.mean(self.mean(self.mean(ops.Abs()(real_weights), 3), 2), 1) - # print(scaling_factor, flush=True) - scaling_factor = ops.stop_gradient(scaling_factor) - binary_weights_no_grad = scaling_factor * ops.Sign()(real_weights) - cliped_weights = ops.clip_by_value(real_weights, -1.0, 1.0) - binary_weights = ops.stop_gradient(binary_weights_no_grad) - ops.stop_gradient(cliped_weights) + cliped_weights - # print(binary_weights, flush=True) - y = self.conv2d(x, binary_weights) - - return y - -class BasicBlock(nn.Cell): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - - self.binary_activation = BinaryActivation() - self.binary_conv = HardBinaryConv(inplanes, planes, stride=stride) - self.bn1 = BatchNorm2d(planes) - - self.downsample = downsample - self.stride = stride - - def construct(self, x): - residual = x - - out = self.binary_activation(x) - out = self.binary_conv(out) - out = self.bn1(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - - return out - -class BiRealNet(nn.Cell): - - def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): - super(BiRealNet, self).__init__() - self.inplanes = 64 - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, pad_mode="pad", has_bias=False) - self.bn1 = BatchNorm2d(num_features=64) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.avgpool = AdaptiveAvgPool2d() - self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True) - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.SequentialCell([ - nn.AvgPool2d(kernel_size=2, stride=stride), - conv1x1(self.inplanes, planes * block.expansion), - BatchNorm2d(num_features=planes * block.expansion) - ]) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for _ in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.SequentialCell(layers) - - def construct(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = ops.Reshape()(x, (x.shape[0], -1)) - x = self.fc(x) - - return x - - -def birealnet18(pretrained=False, **kwargs): - """Constructs a BiRealNet-18 model. """ - model = BiRealNet(BasicBlock, [4, 4, 4, 4], **kwargs) - return model - - -def birealnet34(pretrained=False, **kwargs): - """Constructs a BiRealNet-34 model. """ - model = BiRealNet(BasicBlock, [6, 8, 12, 6], **kwargs) - return model - -- 2.34.1 From c193e8e7ed355e0a2b16ad6160bdb42d41af0f3d Mon Sep 17 00:00:00 2001 From: lwh Date: Tue, 26 Jul 2022 22:18:08 +0800 Subject: [PATCH 04/27] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9lr=E5=92=8Cepoch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/configs/birealnet34.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 310701e..174a329 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -15,7 +15,7 @@ cutmix: 0. # ===== Learning Rate Policy ======== # optimizer: adam -base_lr: 0.002 +base_lr: 0.001 warmup_lr: 0.000006 min_lr: 0. lr_scheduler: lambda_lr @@ -33,7 +33,7 @@ label_smoothing: 0.1 loss_scale: 1024 weight_decay: 0. momentum: 0.9 -batch_size: 128 +batch_size: 256 # ===== Hardware setup ===== # num_parallel_workers: 16 -- 2.34.1 From 0822b51b5c6d27a457abe0fd762bc5ef1985052a Mon Sep 17 00:00:00 2001 From: lwh Date: Fri, 29 Jul 2022 20:07:41 +0800 Subject: [PATCH 05/27] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9epochs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/configs/birealnet34.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 174a329..d707579 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -28,7 +28,7 @@ keep_bn_fp32: True beta: [ 0.9, 0.999 ] clip_global_norm_value: 5. is_dynamic_loss_scale: True -epochs: 256 +epochs: 128 label_smoothing: 0.1 loss_scale: 1024 weight_decay: 0. -- 2.34.1 From 4fc6dfdf6178292d16524366de52c89dd61b4fa6 Mon Sep 17 00:00:00 2001 From: lwh Date: Fri, 29 Jul 2022 20:10:53 +0800 Subject: [PATCH 06/27] fix --- src/configs/birealnet34.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index d707579..2302744 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -28,12 +28,12 @@ keep_bn_fp32: True beta: [ 0.9, 0.999 ] clip_global_norm_value: 5. is_dynamic_loss_scale: True -epochs: 128 +epochs: 256 label_smoothing: 0.1 loss_scale: 1024 weight_decay: 0. momentum: 0.9 -batch_size: 256 +batch_size: 128 # ===== Hardware setup ===== # num_parallel_workers: 16 -- 2.34.1 From 8cfdc4eccb7b5a4a7e35eb543263c270f475bd57 Mon Sep 17 00:00:00 2001 From: lwh Date: Sat, 30 Jul 2022 16:10:22 +0800 Subject: [PATCH 07/27] =?UTF-8?q?fix:=20=E5=9C=A8256=E4=B8=AAepoch?= =?UTF-8?q?=E5=90=8E=E5=A2=9E=E5=8A=A044=E4=B8=AAlr=3D0.000001=E7=9A=84epo?= =?UTF-8?q?ch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/optimizer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index d749668..e134448 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -33,6 +33,10 @@ def get_optimizer(args, model, batch_num): optim_type = args.optimizer.lower() params = get_param_groups(model) learning_rate = get_learning_rate(args, batch_num) + learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. + learning_rate += [0.000001] * (44 * batch_num) # add 44 epochs + args.epochs += 44 + step = int(args.start_epoch * batch_num) accumulation_step = int(args.accumulation_step) learning_rate = learning_rate[step::accumulation_step] @@ -41,7 +45,7 @@ def get_optimizer(args, model, batch_num): f"=> Start step: {step}\n" f"=> Total step: {train_step}\n" f"=> Accumulation step:{accumulation_step}") - learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. + if accumulation_step > 1: learning_rate = learning_rate * accumulation_step -- 2.34.1 From 6046bb44d401bc62a4859c05f32eeee060be5877 Mon Sep 17 00:00:00 2001 From: lwh Date: Sat, 30 Jul 2022 20:30:59 +0800 Subject: [PATCH 08/27] fix --- src/tools/optimizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index e134448..678cdd0 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -14,6 +14,7 @@ # ============================================================================ """Functions of optimizer""" import os +import numpy as np from mindspore.nn.optim import AdamWeightDecay, Adam from mindspore.nn.optim.momentum import Momentum @@ -34,7 +35,7 @@ def get_optimizer(args, model, batch_num): params = get_param_groups(model) learning_rate = get_learning_rate(args, batch_num) learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. - learning_rate += [0.000001] * (44 * batch_num) # add 44 epochs + learning_rate = np.append(learning_rate, [0.000001] * (44 * batch_num)) # add 44 epochs args.epochs += 44 step = int(args.start_epoch * batch_num) -- 2.34.1 From cbe4d2535ba49e8e66612c2c700a4e175c40a30d Mon Sep 17 00:00:00 2001 From: lwh Date: Sun, 31 Jul 2022 21:50:42 +0800 Subject: [PATCH 09/27] fix --- src/configs/birealnet34.yaml | 2 +- src/tools/optimizer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 2302744..c329db0 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -28,7 +28,7 @@ keep_bn_fp32: True beta: [ 0.9, 0.999 ] clip_global_norm_value: 5. is_dynamic_loss_scale: True -epochs: 256 +epochs: 300 label_smoothing: 0.1 loss_scale: 1024 weight_decay: 0. diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index 678cdd0..0563239 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -35,8 +35,8 @@ def get_optimizer(args, model, batch_num): params = get_param_groups(model) learning_rate = get_learning_rate(args, batch_num) learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. - learning_rate = np.append(learning_rate, [0.000001] * (44 * batch_num)) # add 44 epochs - args.epochs += 44 + # learning_rate = np.append(learning_rate, [0.000001] * (44 * batch_num)) # add 44 epochs + # args.epochs += 44 step = int(args.start_epoch * batch_num) accumulation_step = int(args.accumulation_step) -- 2.34.1 From 76219081b5080cb11ee5c312234b2bf4a0ce6e02 Mon Sep 17 00:00:00 2001 From: lwh Date: Mon, 1 Aug 2022 23:39:26 +0800 Subject: [PATCH 10/27] fix --- src/configs/birealnet34.yaml | 2 +- src/tools/optimizer.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index c329db0..2302744 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -28,7 +28,7 @@ keep_bn_fp32: True beta: [ 0.9, 0.999 ] clip_global_norm_value: 5. is_dynamic_loss_scale: True -epochs: 300 +epochs: 256 label_smoothing: 0.1 loss_scale: 1024 weight_decay: 0. diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index 0563239..fc4372d 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -35,8 +35,14 @@ def get_optimizer(args, model, batch_num): params = get_param_groups(model) learning_rate = get_learning_rate(args, batch_num) learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. - # learning_rate = np.append(learning_rate, [0.000001] * (44 * batch_num)) # add 44 epochs - # args.epochs += 44 + + additional_list = [] + additional_lr = 0.00000390625 + for additional_epoch in range(44): + for additional_step in range(batch_num): + additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) + learning_rate = np.append(learning_rate, additional_list) # add 44 epochs + args.epochs += 44 step = int(args.start_epoch * batch_num) accumulation_step = int(args.accumulation_step) -- 2.34.1 From 8bec1335ca2d69066f1d5b53cb3e425559d9cb57 Mon Sep 17 00:00:00 2001 From: lwh Date: Thu, 4 Aug 2022 21:35:45 +0800 Subject: [PATCH 11/27] fix --- src/data/imagenet.py | 6 ++++-- src/tools/optimizer.py | 14 +++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/data/imagenet.py b/src/data/imagenet.py index 86f2050..fc474aa 100644 --- a/src/data/imagenet.py +++ b/src/data/imagenet.py @@ -109,8 +109,10 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): # RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count) ] else: - mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] - std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + # mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] + # std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # test transform complete if args.crop: transform_img = [ diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index fc4372d..6eeab7a 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -36,13 +36,13 @@ def get_optimizer(args, model, batch_num): learning_rate = get_learning_rate(args, batch_num) learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. - additional_list = [] - additional_lr = 0.00000390625 - for additional_epoch in range(44): - for additional_step in range(batch_num): - additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) - learning_rate = np.append(learning_rate, additional_list) # add 44 epochs - args.epochs += 44 + # additional_list = [] + # additional_lr = 0.00000390625 + # for additional_epoch in range(44): + # for additional_step in range(batch_num): + # additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) + # learning_rate = np.append(learning_rate, additional_list) # add 44 epochs + # args.epochs += 44 step = int(args.start_epoch * batch_num) accumulation_step = int(args.accumulation_step) -- 2.34.1 From 2819ffe0ca44bbfed639b5d1766baaad36ea5fce Mon Sep 17 00:00:00 2001 From: lwh Date: Thu, 4 Aug 2022 23:30:43 +0800 Subject: [PATCH 12/27] fix --- src/configs/birealnet34.yaml | 2 +- src/data/imagenet.py | 8 +++----- src/tools/optimizer.py | 14 +++++++------- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 2302744..f10e5e4 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -17,7 +17,7 @@ cutmix: 0. optimizer: adam base_lr: 0.001 warmup_lr: 0.000006 -min_lr: 0. +min_lr: 0.0000001 lr_scheduler: lambda_lr warmup_length: 0 diff --git a/src/data/imagenet.py b/src/data/imagenet.py index fc474aa..09ac259 100644 --- a/src/data/imagenet.py +++ b/src/data/imagenet.py @@ -106,13 +106,11 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): transform_img += [ py_vision.ToTensor(), py_vision.Normalize(mean=mean, std=std), - # RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count) + RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count) ] else: - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - # mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] - # std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] + std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # test transform complete if args.crop: transform_img = [ diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index 6eeab7a..bfc8eb3 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -36,13 +36,13 @@ def get_optimizer(args, model, batch_num): learning_rate = get_learning_rate(args, batch_num) learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. - # additional_list = [] - # additional_lr = 0.00000390625 - # for additional_epoch in range(44): - # for additional_step in range(batch_num): - # additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) - # learning_rate = np.append(learning_rate, additional_list) # add 44 epochs - # args.epochs += 44 + additional_list = [] + additional_lr = 0.0000078125 + for additional_epoch in range(44): + for additional_step in range(batch_num): + additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) + learning_rate = np.append(learning_rate, additional_list) # add 44 epochs + args.epochs += 44 step = int(args.start_epoch * batch_num) accumulation_step = int(args.accumulation_step) -- 2.34.1 From 924b37f810678fc0dff3c895c36d791400c03d15 Mon Sep 17 00:00:00 2001 From: lwh Date: Sat, 6 Aug 2022 00:51:45 +0800 Subject: [PATCH 13/27] fix --- src/configs/birealnet34.yaml | 2 +- src/data/imagenet.py | 2 +- src/tools/cell.py | 2 +- src/tools/optimizer.py | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index f10e5e4..9d90cea 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -17,7 +17,7 @@ cutmix: 0. optimizer: adam base_lr: 0.001 warmup_lr: 0.000006 -min_lr: 0.0000001 +min_lr: 0.000001 lr_scheduler: lambda_lr warmup_length: 0 diff --git a/src/data/imagenet.py b/src/data/imagenet.py index 09ac259..4c82c9a 100644 --- a/src/data/imagenet.py +++ b/src/data/imagenet.py @@ -102,7 +102,7 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): vision.RandomHorizontalFlip(prob=0.5), py_vision.ToPIL() ] - # transform_img += [rand_augment_transform(auto_augment, aa_params)] + transform_img += [rand_augment_transform(auto_augment, aa_params)] transform_img += [ py_vision.ToTensor(), py_vision.Normalize(mean=mean, std=std), diff --git a/src/tools/cell.py b/src/tools/cell.py index 4fa2576..9d8de80 100644 --- a/src/tools/cell.py +++ b/src/tools/cell.py @@ -33,7 +33,7 @@ def cast_amp(net): print(f"=> using amp_level {args.amp_level}\n" f"=> change {args.arch} to fp16") net.to_float(mstype.float16) - cell_types = (nn.GELU, nn.Softmax, nn.Conv2d, nn.Conv1d, nn.BatchNorm2d, nn.LayerNorm, nn.SyncBatchNorm) + cell_types = (nn.GELU, nn.Softmax, nn.BatchNorm2d, nn.LayerNorm, nn.SyncBatchNorm) # cell_types = (nn.GELU, nn.Softmax, nn.Conv2d, nn.Conv1d, nn.BatchNorm2d, nn.LayerNorm, nn.ReLU, nn.Dense) print(f"=> cast {cell_types} to fp32 back") do_keep_fp32(net, cell_types) diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index bfc8eb3..c2b68b6 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -37,10 +37,9 @@ def get_optimizer(args, model, batch_num): learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. additional_list = [] - additional_lr = 0.0000078125 for additional_epoch in range(44): for additional_step in range(batch_num): - additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) + additional_list.append(args.min_lr) learning_rate = np.append(learning_rate, additional_list) # add 44 epochs args.epochs += 44 -- 2.34.1 From c8ff18c978e52f18fe97bd1c01c214531848fdcc Mon Sep 17 00:00:00 2001 From: lwh Date: Sat, 6 Aug 2022 20:19:45 +0800 Subject: [PATCH 14/27] fix --- src/data/imagenet.py | 4 ++-- src/models/birealnet/birealnet.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/data/imagenet.py b/src/data/imagenet.py index 4c82c9a..86f2050 100644 --- a/src/data/imagenet.py +++ b/src/data/imagenet.py @@ -102,11 +102,11 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): vision.RandomHorizontalFlip(prob=0.5), py_vision.ToPIL() ] - transform_img += [rand_augment_transform(auto_augment, aa_params)] + # transform_img += [rand_augment_transform(auto_augment, aa_params)] transform_img += [ py_vision.ToTensor(), py_vision.Normalize(mean=mean, std=std), - RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count) + # RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count) ] else: mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index 070fb34..26eb35d 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -5,10 +5,10 @@ from mindspore.common import dtype as mstype __all__ = ['birealnet18', 'birealnet34'] -if os.getenv("DEVICE_TARGET") == "Ascend" and int(os.getenv("DEVICE_NUM")) > 1: - BatchNorm2d = nn.SyncBatchNorm -else: - BatchNorm2d = nn.BatchNorm2d +# if os.getenv("DEVICE_TARGET") == "Ascend" and int(os.getenv("DEVICE_NUM")) > 1: +# BatchNorm2d = nn.SyncBatchNorm +# else: +BatchNorm2d = nn.BatchNorm2d class AdaptiveAvgPool2d(nn.Cell): -- 2.34.1 From 527265e15a8882784d53980008bc8fec97694202 Mon Sep 17 00:00:00 2001 From: lwh Date: Sun, 7 Aug 2022 22:17:48 +0800 Subject: [PATCH 15/27] fix --- src/args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/args.py b/src/args.py index ee343c0..48ccb64 100644 --- a/src/args.py +++ b/src/args.py @@ -75,7 +75,7 @@ def parse_arguments(): parser.add_argument("--num_classes", default=1000, type=int) parser.add_argument("--pretrained", dest="pretrained", default=None, type=str, help="use pre-trained model") parser.add_argument("--config_file", help="Config file to use (see configs dir)", default=None, required=False) - parser.add_argument("--seed", default=0, type=int, help="seed for initializing training. ") + parser.add_argument("--seed", default=42, type=int, help="seed for initializing training. ") parser.add_argument("--save_every", default=10, type=int, help="Save every ___ epochs(default:10)") parser.add_argument("--label_smoothing", type=float, help="Label smoothing to use, default 0.0", default=0.1) parser.add_argument("--image_size", default=224, help="Image Size.", type=int) -- 2.34.1 From 27a28af881b4c8abc0aed544bedb47065f08d862 Mon Sep 17 00:00:00 2001 From: lwh Date: Tue, 9 Aug 2022 00:53:18 +0800 Subject: [PATCH 16/27] fix --- src/tools/optimizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index c2b68b6..d42f744 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -37,9 +37,11 @@ def get_optimizer(args, model, batch_num): learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. additional_list = [] + additional_lr = 0.0000078125 for additional_epoch in range(44): for additional_step in range(batch_num): - additional_list.append(args.min_lr) + # additional_list.append(args.min_lr) + additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) learning_rate = np.append(learning_rate, additional_list) # add 44 epochs args.epochs += 44 -- 2.34.1 From 72fbb5a9458b707ad5521d91cd17b3f142250607 Mon Sep 17 00:00:00 2001 From: lwh Date: Wed, 10 Aug 2022 02:59:38 +0800 Subject: [PATCH 17/27] fix --- src/models/birealnet/birealnet.py | 2 +- src/tools/optimizer.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index 26eb35d..e2e5dbe 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -121,7 +121,7 @@ class BiRealNet(nn.Cell): self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = AdaptiveAvgPool2d() - self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True) + self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True, weight_init='he_uniform', bias_init='he_uniform') def _make_layer(self, block, planes, blocks, stride=1): downsample = None diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index d42f744..8595cca 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -37,11 +37,11 @@ def get_optimizer(args, model, batch_num): learning_rate = learning_rate * args.batch_size * int(os.getenv("DEVICE_NUM", args.device_num)) / 512. additional_list = [] - additional_lr = 0.0000078125 + # additional_lr = 0.0000078125 for additional_epoch in range(44): for additional_step in range(batch_num): - # additional_list.append(args.min_lr) - additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) + additional_list.append(args.min_lr) + # additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) learning_rate = np.append(learning_rate, additional_list) # add 44 epochs args.epochs += 44 -- 2.34.1 From 94ba185f0428fc0adb058402ef90b84948c86ba8 Mon Sep 17 00:00:00 2001 From: lwh Date: Wed, 10 Aug 2022 03:12:16 +0800 Subject: [PATCH 18/27] fix --- src/models/birealnet/birealnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index e2e5dbe..47d773c 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -121,7 +121,7 @@ class BiRealNet(nn.Cell): self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = AdaptiveAvgPool2d() - self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True, weight_init='he_uniform', bias_init='he_uniform') + self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True, weight_init='he_uniform', bias_init='uniform') def _make_layer(self, block, planes, blocks, stride=1): downsample = None -- 2.34.1 From 8cb4c2c1cac6471b73c9396fbc13ad5848f27d68 Mon Sep 17 00:00:00 2001 From: lwh Date: Wed, 10 Aug 2022 20:55:33 +0800 Subject: [PATCH 19/27] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9Dense=E5=B1=82?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/models/birealnet/birealnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/birealnet/birealnet.py b/src/models/birealnet/birealnet.py index 47d773c..26eb35d 100644 --- a/src/models/birealnet/birealnet.py +++ b/src/models/birealnet/birealnet.py @@ -121,7 +121,7 @@ class BiRealNet(nn.Cell): self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = AdaptiveAvgPool2d() - self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True, weight_init='he_uniform', bias_init='uniform') + self.fc = nn.Dense(512 * block.expansion, num_classes, has_bias=True) def _make_layer(self, block, planes, blocks, stride=1): downsample = None -- 2.34.1 From 02e29eda7dcac4b29fcafbfa656a4ab4f2218e14 Mon Sep 17 00:00:00 2001 From: lwh Date: Thu, 11 Aug 2022 23:55:44 +0800 Subject: [PATCH 20/27] fix --- src/configs/birealnet34.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 9d90cea..b77d4b9 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -17,7 +17,7 @@ cutmix: 0. optimizer: adam base_lr: 0.001 warmup_lr: 0.000006 -min_lr: 0.000001 +min_lr: 0.000006 lr_scheduler: lambda_lr warmup_length: 0 -- 2.34.1 From 159bb8ad9f7056c61339b631ba20a0745466e30b Mon Sep 17 00:00:00 2001 From: lwh Date: Sat, 13 Aug 2022 00:21:51 +0800 Subject: [PATCH 21/27] fix --- src/configs/birealnet34.yaml | 2 +- src/tools/optimizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index b77d4b9..9d90cea 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -17,7 +17,7 @@ cutmix: 0. optimizer: adam base_lr: 0.001 warmup_lr: 0.000006 -min_lr: 0.000006 +min_lr: 0.000001 lr_scheduler: lambda_lr warmup_length: 0 diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index 8595cca..c8b9b3b 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -40,7 +40,7 @@ def get_optimizer(args, model, batch_num): # additional_lr = 0.0000078125 for additional_epoch in range(44): for additional_step in range(batch_num): - additional_list.append(args.min_lr) + additional_list.append(args.min_lr * 6) # additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) learning_rate = np.append(learning_rate, additional_list) # add 44 epochs args.epochs += 44 -- 2.34.1 From f81edb0a2adfd6d0ef7a0e21723d915796de1b75 Mon Sep 17 00:00:00 2001 From: lwh Date: Sun, 14 Aug 2022 17:18:26 +0800 Subject: [PATCH 22/27] fix --- src/configs/birealnet34.yaml | 2 +- src/tools/optimizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 9d90cea..014b93a 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -31,7 +31,7 @@ is_dynamic_loss_scale: True epochs: 256 label_smoothing: 0.1 loss_scale: 1024 -weight_decay: 0. +weight_decay: 0.05 momentum: 0.9 batch_size: 128 diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index c8b9b3b..8595cca 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -40,7 +40,7 @@ def get_optimizer(args, model, batch_num): # additional_lr = 0.0000078125 for additional_epoch in range(44): for additional_step in range(batch_num): - additional_list.append(args.min_lr * 6) + additional_list.append(args.min_lr) # additional_list.append(additional_lr * (1.0 - additional_epoch / 44)) learning_rate = np.append(learning_rate, additional_list) # add 44 epochs args.epochs += 44 -- 2.34.1 From cc87f3f804f83385b892ab4fbaf00b09bb9f91d7 Mon Sep 17 00:00:00 2001 From: lwh Date: Mon, 15 Aug 2022 16:24:11 +0800 Subject: [PATCH 23/27] fix --- src/configs/birealnet34.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 014b93a..0bca5e8 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -31,12 +31,12 @@ is_dynamic_loss_scale: True epochs: 256 label_smoothing: 0.1 loss_scale: 1024 -weight_decay: 0.05 +weight_decay: 0. momentum: 0.9 batch_size: 128 # ===== Hardware setup ===== # -num_parallel_workers: 16 +num_parallel_workers: 32 device_target: Ascend # ===== Model config ===== # -- 2.34.1 From 70d0f850ea02be6f6c17d1eddde115f7c7bdcf1e Mon Sep 17 00:00:00 2001 From: lwh Date: Tue, 16 Aug 2022 11:18:20 +0800 Subject: [PATCH 24/27] fix --- src/configs/birealnet34.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 0bca5e8..feebece 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -33,7 +33,7 @@ label_smoothing: 0.1 loss_scale: 1024 weight_decay: 0. momentum: 0.9 -batch_size: 128 +batch_size: 256 # ===== Hardware setup ===== # num_parallel_workers: 32 -- 2.34.1 From e3b143f585ceec7916775365270d752fa85dffd2 Mon Sep 17 00:00:00 2001 From: lwh Date: Wed, 17 Aug 2022 16:02:03 +0800 Subject: [PATCH 25/27] fix --- src/configs/birealnet34.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index feebece..e63fb6c 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -28,9 +28,9 @@ keep_bn_fp32: True beta: [ 0.9, 0.999 ] clip_global_norm_value: 5. is_dynamic_loss_scale: True -epochs: 256 +epochs: 128 label_smoothing: 0.1 -loss_scale: 1024 +loss_scale: 512 weight_decay: 0. momentum: 0.9 batch_size: 256 -- 2.34.1 From 92cad80e356f0b12aecb0ae82968bb16353902e8 Mon Sep 17 00:00:00 2001 From: lwh Date: Wed, 17 Aug 2022 19:07:40 +0800 Subject: [PATCH 26/27] fix --- src/configs/birealnet34.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index e63fb6c..8ab8ba5 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -28,12 +28,12 @@ keep_bn_fp32: True beta: [ 0.9, 0.999 ] clip_global_norm_value: 5. is_dynamic_loss_scale: True -epochs: 128 +epochs: 256 label_smoothing: 0.1 loss_scale: 512 weight_decay: 0. momentum: 0.9 -batch_size: 256 +batch_size: 128 # ===== Hardware setup ===== # num_parallel_workers: 32 -- 2.34.1 From d1ad24c872f9cf6fba0bb2b526b46da4970547d5 Mon Sep 17 00:00:00 2001 From: lwh Date: Wed, 17 Aug 2022 20:49:33 +0800 Subject: [PATCH 27/27] fix --- src/configs/birealnet34.yaml | 2 +- src/tools/optimizer.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/configs/birealnet34.yaml b/src/configs/birealnet34.yaml index 8ab8ba5..0bca5e8 100644 --- a/src/configs/birealnet34.yaml +++ b/src/configs/birealnet34.yaml @@ -30,7 +30,7 @@ clip_global_norm_value: 5. is_dynamic_loss_scale: True epochs: 256 label_smoothing: 0.1 -loss_scale: 512 +loss_scale: 1024 weight_decay: 0. momentum: 0.9 batch_size: 128 diff --git a/src/tools/optimizer.py b/src/tools/optimizer.py index 8595cca..895013a 100644 --- a/src/tools/optimizer.py +++ b/src/tools/optimizer.py @@ -89,20 +89,20 @@ def get_param_groups(network): decay_params = [] no_decay_params = [] - for x in network.trainable_params(): - if len(x.shape) == 4 or x.name=='classifier.0.weight' or x.name == 'classifier.0.bias': - decay_params.append(x) - else: - no_decay_params.append(x) - # for x in network.trainable_params(): - # parameter_name = x.name - # if parameter_name.endswith(".weight"): - # # Dense or Conv's weight using weight decay + # if len(x.shape) == 4 or x.name=='classifier.0.weight' or x.name == 'classifier.0.bias': # decay_params.append(x) # else: - # # all bias not using weight decay - # # bn weight bias not using weight decay, be carefully for now x not include LN # no_decay_params.append(x) + for x in network.trainable_params(): + parameter_name = x.name + if parameter_name.endswith(".weight"): + # Dense or Conv's weight using weight decay + decay_params.append(x) + else: + # all bias not using weight decay + # bn weight bias not using weight decay, be carefully for now x not include LN + no_decay_params.append(x) + return [{'params': no_decay_params, 'weight_decay': 0.0}, {'params': decay_params}] -- 2.34.1