|
- # -*- coding: utf-8 -*-
-
- from __future__ import print_function, division
-
- import sys
- # sys.path.append('/home/xujiahong/openI_benchmark/BoT_person_reID/')
-
- import yaml
- import copy
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torch.optim import lr_scheduler
- from torchvision import datasets, transforms
- import matplotlib
-
- from data_utils.model_train import ft_net
- from data_utils.label_smooth import LSR_loss
- from data_utils.triplet import TripletLoss
-
- matplotlib.use('agg')
- #import matplotlib.pyplot as plt
- #from PIL import Image
- import time
- import os
- from utils.random_erasing import RandomErasing
- from utils.model_complexity import compute_model_complexity
- import yaml
- from utils.autoaugment import ImageNetPolicy
- from utils.util import save_network, get_stream_logger
- from config.mainconfig import OUTPUT_RESULT_DIR, CONFIG_PATH
- from prepare_dir import prepare_dirs
- import scipy.io
-
- version = torch.__version__
-
-
-
- def train(config_file_path: str, logger):
-
- #phrase yaml file
- with open(config_file_path, encoding='utf-8') as f:
- opts = yaml.load(f, Loader=yaml.SafeLoader)
-
- data_dir = opts['input']['dataset']['data_dir']
- data_name = data_dir.split('/')[-1]
- logger.info("dataset name: %s"%(data_name))
-
- nclass = opts['input']['config']['nclass']
- num_epochs = opts['input']['config']['num_epochs']
- adam = opts['input']['config']['adam']
- name = "trained_" + opts['input']['config']['name']
-
- batchsize = opts['input']['config']['batchsize']
- inputsize = opts['input']['config']['inputsize']
- w = opts['input']['config']['w']
- h = opts['input']['config']['h']
- stride = opts['input']['config']['stride']
- pool = opts['input']['config']['pool']
- erasing_p = opts['input']['config']['erasing_p']
- lr = opts['input']['config']['lr']
- droprate = opts['input']['config']['droprate']
- warm_epoch = opts['input']['config']['warm_epoch']
-
- save_path = OUTPUT_RESULT_DIR
-
- ##############################transform###############################
- if h == w:
- transform_train_list = [
- #transforms.RandomRotation(30),
- transforms.Resize((inputsize, inputsize), interpolation=3),
- transforms.Pad(15),
- #transforms.RandomCrop((256,256)),
- transforms.RandomResizedCrop(size=inputsize, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC)
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
- ]
-
- else:
- transform_train_list = [
- #transforms.RandomRotation(30),
- transforms.Resize((h, w), interpolation=3),
- transforms.Pad(15),
- #transforms.RandomCrop((256,256)),
- transforms.RandomResizedCrop(size=(h, w), scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC)
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
- ]
-
- if erasing_p>0:
- transform_train_list = transform_train_list + [RandomErasing(probability = erasing_p, mean=[0.0, 0.0, 0.0])]
-
- transform_train_list_aug = [ImageNetPolicy()] + transform_train_list
-
- #print(transform_train_list)
- data_transforms = {
- 'train': transforms.Compose( transform_train_list ),
- 'train_aug': transforms.Compose( transform_train_list_aug ),
- }
-
- ######################################################################
- # Load Data and pretrained model on VehicleNet
- image_datasets = {}
-
- image_datasets['train'] =datasets.ImageFolder(os.path.join(data_dir, 'bounding_box_train'), data_transforms['train'])
- dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batchsize,
- shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
- for x in ['train']}
-
- dataset_sizes = {x: len(image_datasets[x]) for x in ['train']}
- use_gpu = torch.cuda.is_available()
- print('use_gpu', use_gpu)
- #device = torch.device("cuda" if use_gpu else "cpu")
-
- model = ft_net(class_num = nclass, droprate = droprate, stride=stride, init_model=None, pool = pool, return_f=True)
- ##########################
- #Put model parameter in front of the optimizer!!!
-
- if use_gpu:
- # if gpu_ids:
- # model = torch.nn.DataParallel(model, device_ids= gpu_ids).cuda()
-
- # ignored_params = list(map(id, model.module.classifier.parameters() ))
- # base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
- # optimizer_ft = optim.SGD([
- # {'params': base_params, 'lr': 0.1*lr},
- # {'params': model.module.classifier.parameters(), 'lr': lr}
- # ], weight_decay=5e-4, momentum=0.9, nesterov=True)
- # else:
- # model = model.cuda()
- # ignored_params = list(map(id, model.classifier.parameters() ))
- # base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
- # optimizer_ft = optim.SGD([
- # {'params': base_params, 'lr': 0.1*lr},
- # {'params': model.classifier.parameters(), 'lr': lr}
- # ], weight_decay=5e-4, momentum=0.9, nesterov=True)
- model = model.cuda()
- ignored_params = list(map(id, model.classifier.parameters() ))
- base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
- optimizer_ft = optim.SGD([
- {'params': base_params, 'lr': 0.1*lr},
- {'params': model.classifier.parameters(), 'lr': lr}
- ], weight_decay=5e-4, momentum=0.9, nesterov=True)
- else:
- ignored_params = list(map(id, model.classifier.parameters() ))
- base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
- optimizer_ft = optim.SGD([
- {'params': base_params, 'lr': 0.1*lr},
- {'params': model.classifier.parameters(), 'lr': lr}
- ], weight_decay=5e-4, momentum=0.9, nesterov=True)
-
- if adam:
- optimizer_ft = optim.Adam(model.parameters(), lr, weight_decay=5e-4)
- # ignored_params = list(map(id, model.classifier.parameters() ))
- # base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
- # optimizer_ft = optim.Adam([
- # {'params': base_params, 'lr': 0.1*lr},
- # {'params': model.classifier.parameters(), 'lr': lr}
- # ], weight_decay=5e-4)
-
- # Decay LR by a factor of 0.1 every 40 epochs
- #exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=40, gamma=0.1)
- exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer_ft, milestones=[35], gamma=0.1)
-
- # criterion = nn.CrossEntropyLoss()
- criterion = [LSR_loss().cuda(), TripletLoss(margin=0.3).cuda()]
-
- # compute model complexity
- if h == w:
- params, FLOPs = compute_model_complexity(model, (1,3,inputsize, inputsize), verbose=False, only_conv_linear=True)
- else:
- params, FLOPs = compute_model_complexity(model, (1,3,h,w), verbose=False, only_conv_linear=True)
- logger.info('number of params (M): %.2f'%(params/1e6))
- logger.info('FLOPs (G): %.2f'%(FLOPs/1e9))
-
- #####################trian model########################################
- y_loss = {} # loss history
- y_loss['train'] = []
- y_err = {}
- y_err['train'] = []
-
- since = time.time()
-
- warm_up = 0.1 # We start from the 0.1*lrRate
- warm_iteration = round(dataset_sizes['train']/batchsize)*warm_epoch # first 5 epoch
-
- best_model_wts = model.state_dict()
- best_loss = 9999
- best_epoch = 0
-
- for epoch in range(num_epochs):
- logger.info('Epoch {}/{}'.format(epoch+1, num_epochs))
- # Each epoch has a training and validation phase
- for phase in ['train']:
- if phase == 'train':
- #exp_lr_scheduler.step()
- model.train(True) # Set model to training mode
- else:
- model.train(False) # Set model to evaluate mode
-
- running_loss = 0.0
- running_corrects = 0.0
- # Iterate over data.
- idx = 0
- #D = [next(iter(dataloaders[phase]))]
- #for data in D:
- for data in dataloaders[phase]:
- idx += 1
- # get the inputs
- inputs, labels = data #the label is vehicleID
- now_batch_size,c,h,w = inputs.shape
- if now_batch_size < batchsize: # skip the last batch
- continue
- #print(inputs.shape)
- # wrap them in Variable
- if use_gpu:
- inputs = inputs.cuda().detach()#uisng detach to show the variable never require gradient
- labels = labels.cuda().detach()
-
- # zero the parameter gradients
- optimizer_ft.zero_grad()
- # forward
- if phase == 'val':
- with torch.no_grad():
- outputs = model(inputs)
- else:
- outputs = model(inputs)
-
- #####outputs = [feature, fc_result]
- # loss = criterion(outputs, labels)
- feature = outputs[1] # = x_s
- fc = outputs[0]
- _, preds = torch.max(fc.data, 1)
-
- loss = (criterion[0](fc, labels) + criterion[1](feature, labels)[0])
-
- if epoch<warm_epoch and phase == 'train':
- warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
- loss *= warm_up
-
- # backward + optimize only if in training phase
- if phase == 'train':
- loss.backward()
- optimizer_ft.step()
- if idx%50 ==0:
- logger.info('Iteration:%d loss:%.4f accuracy:%.4f'%(idx, loss.item(), float(torch.sum(preds == labels.data))/now_batch_size ) )
- # statistics
- if int(version[0])>0 or int(version[2]) > 3: # for the new version like 0.4.0, 0.5.0 and 1.0.0
- running_loss += loss.item() * now_batch_size
- else : # for the old version like 0.3.0 and 0.3.1
- running_loss += loss.data[0] * now_batch_size
- running_corrects += float(torch.sum(preds == labels.data))
-
- del(loss, outputs, inputs, preds)
-
- ###########end for###########################
-
- epoch_loss = running_loss / dataset_sizes[phase]
- epoch_acc = running_corrects / dataset_sizes[phase]
-
- logger.info('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
-
- y_loss[phase].append(epoch_loss)
- y_err[phase].append(1.0-epoch_acc)
- # deep copy the model
- # if len(gpu_ids)>1:
- # save_network(model.module, model_path, name, epoch+1)
- # else:
- # save_network(model, model_path, name, epoch+1)
- save_network(model, save_path, name, epoch+1)
- #draw_curve(epoch)
-
- if phase == 'train':
- exp_lr_scheduler.step()
-
- time_elapsed = time.time() - since
- logger.info('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
-
- if epoch_loss < best_loss:
- best_loss = epoch_loss
- best_epoch = epoch
- best_model_wts = copy.deepcopy(model.state_dict())
-
- logger.info('Best epoch: {:d} Best Train Loss: {:4f}'.format(best_epoch, best_loss))
-
- # load best model weights
- model.load_state_dict(best_model_wts)
- save_network(model, save_path, name, 'last')
- #save error and lossSs
- loss_name = 'train_loss.mat'
- error_name = 'train_error.mat'
- scipy.io.savemat(os.path.join(save_path,loss_name), y_loss)
- scipy.io.savemat(os.path.join(save_path,error_name), y_err)
- #draw_curve(save_path, start_epoch, num_epochs, y_loss, y_err)
- #scipy.io.savemat(os.path.join(save_path,result_name), y_loss)
- logger.info('total train time: %.2f minutes'%(time_elapsed / 60))
- logger.info('total train epochs: %d epochs'%num_epochs)
-
- if __name__ == "__main__":
- prepare_dirs()
- logger = get_stream_logger('TRAIN')
- train(CONFIG_PATH, logger)
-
-
-
-
-
-
-
-
-
|