|
- import os
- import sys
- import time
- import math
-
- from bisect import bisect_right
- import numpy as np
- import operator
- from functools import reduce
- import torch.nn as nn
- import torch
- import torch.nn.functional as F
- import torch.nn.init as init
-
-
- __all__ = ['cal_param_size', 'cal_multi_adds', 'AverageMeter',
- 'adjust_lr', 'DistillKL', 'correct_num']
-
- def cal_param_size(model):
- return sum([i.numel() for i in model.parameters()])
-
-
- count_ops = 0
- def measure_layer(layer, x, multi_add=1):
- delta_ops = 0
- type_name = str(layer)[:str(layer).find('(')].strip()
- # print(type_name)
- if type_name in ['Conv2d']:
- out_h = int((x.size()[2] + 2 * layer.padding[0] - layer.kernel_size[0]) //
- layer.stride[0] + 1)
- out_w = int((x.size()[3] + 2 * layer.padding[1] - layer.kernel_size[1]) //
- layer.stride[1] + 1)
- delta_ops = layer.in_channels * layer.out_channels * layer.kernel_size[0] * \
- layer.kernel_size[1] * out_h * out_w // layer.groups * multi_add
-
- elif type_name in ['Linear']:
- weight_ops = layer.weight.numel() * multi_add
- #bias_ops = layer.bias.numel()
- delta_ops = weight_ops + 0#bias_ops
-
- global count_ops
- count_ops += delta_ops
- return
-
- def is_leaf(module):
- return sum(1 for x in module.children()) == 0
-
-
- def should_measure(module):
- if is_leaf(module):
- return True
- return False
-
- def cal_multi_adds(model, shape=(2,3,32,32)):
- global count_ops
- count_ops = 0
- data = torch.zeros(shape)
-
- def new_forward(m):
- def lambda_forward(x):
- measure_layer(m, x)
- return m.old_forward(x)
- return lambda_forward
-
- def modify_forward(model):
- for child in model.children():
- if should_measure(child):
- child.old_forward = child.forward
- child.forward = new_forward(child)
- else:
- modify_forward(child)
-
- def restore_forward(model):
- for child in model.children():
- if is_leaf(child) and hasattr(child, 'old_forward'):
- child.forward = child.old_forward
- child.old_forward = None
- else:
- restore_forward(child)
-
- modify_forward(model)
- model.forward(data)
- restore_forward(model)
-
- return count_ops
-
- def correct_num(output, target, topk=(1,)):
- """Computes the precision@k for the specified values of k"""
- maxk = max(topk)
- batch_size = target.size(0)
-
- _, pred = output.topk(maxk, 1, True, True)
- correct = pred.eq(target.view(-1, 1).expand_as(pred))
-
- res = []
- for k in topk:
- correct_k = correct[:, :k].float().sum()
- res.append(correct_k)
- return res
-
- class DistillKL(nn.Module):
- """Distilling the Knowledge in a Neural Network"""
-
- def __init__(self, T):
- super(DistillKL, self).__init__()
- self.T = T
-
- def forward(self, y_s, y_t):
- p_s = F.log_softmax(y_s / self.T, dim=1)
- p_t = F.softmax(y_t / self.T, dim=1)
- loss = F.kl_div(p_s, p_t, reduction='batchmean') * (self.T ** 2)
- return loss
-
-
-
- def adjust_lr(optimizer, epoch, args, step=0, all_iters_per_epoch=0):
- cur_lr = 0.
- if epoch < args.warmup_epoch:
- cur_lr = args.init_lr * float(1 + step + epoch*all_iters_per_epoch)/(args.warmup_epoch *all_iters_per_epoch)
- else:
- epoch = epoch - args.warmup_epoch
- if args.lr_type == 'multistep':
- cur_lr = args.init_lr * 0.1 ** bisect_right(args.milestones, epoch)
- elif args.lr_type == 'cosine':
- cur_lr = args.init_lr * 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
-
- for param_group in optimizer.param_groups:
- param_group['lr'] = cur_lr
-
- return cur_lr
-
-
- class AverageMeter(object):
- """Computes and stores the average and current value"""
- def __init__(self, name, fmt=':f'):
- self.name = name
- self.fmt = fmt
- self.reset()
-
- def reset(self):
- self.val = 0
- self.avg = 0
- self.sum = 0
- self.count = 0
-
- def update(self, val, n=1):
- self.val = val
- self.sum += val * n
- self.count += n
- self.avg = self.sum / self.count
-
- def __str__(self):
- fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
- return fmtstr.format(**self.__dict__)
|