1y2
/
dcase2020

 
			
							"""
 @file   00_train.py
 @brief  Script for training

"""

########################################################################
# import default python-library
########################################################################
import os
import glob
import sys
########################################################################


########################################################################
# import additional python-library
########################################################################
import numpy
import math
import numpy as np
# import tensorflow.keras
import random

import librosa
import librosa.core
import librosa.feature
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
#import librosa.core
# from import
from tqdm import tqdm
# original lib
import common as com
import keras_model 

########################################################################


########################################################################
# load parameter.yaml
########################################################################
param = com.yaml_load()
########################################################################

# set seed
########################################################################
tf.random.set_seed(1234)
########################################################################


########################################################################
# visualizer
########################################################################
class visualizer(object):
    def __init__(self):
        import matplotlib.pyplot as plt
        self.plt = plt
        self.fig = self.plt.figure(figsize=(30, 10))
        self.plt.subplots_adjust(wspace=0.3, hspace=0.3)

    def loss_plot(self, loss, val_loss):
        """
        Plot loss curve.

        loss : list [ float ]
            training loss time series.
        val_loss : list [ float ]
            validation loss time series.

        return   : None
        """
        ax = self.fig.add_subplot(1, 1, 1)
        ax.cla()
        ax.plot(loss)
        ax.plot(val_loss)
        ax.set_title("Model loss")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("Loss")
        ax.legend(["Train", "Validation"], loc="upper right")

    def save_figure(self, name):
        """
        Save figure.

        name : str
            save png file path.

        return : None
        """
        self.plt.savefig(name)


########################################################################

def file_list_generator(target_dir,
                        dir_name="train",
                        ext="wav"):
    """
    target_dir : str
        base directory path of the dev_data or eval_data
    dir_name : str (default="train")
        directory name containing training data
    ext : str (default="wav")
        file extension of audio files

    return :
        train_files : list [ str ]
            file list of wav files for training
    """
    com.logger.info("target_dir : {}".format(target_dir))

    # generate training list
    if dir_name==None:
        training_list_path = os.path.abspath("{dir}/*.{ext}".format(dir=target_dir, ext=ext))
    else: 
        training_list_path = os.path.abspath("{dir}/{dir_name}/*.{ext}".format(dir=target_dir, dir_name=dir_name, ext=ext))
    files = sorted(glob.glob(training_list_path))
    if len(files) == 0:
        com.logger.exception("no_wav_file!!")

    com.logger.info("train_file num : {num}".format(num=len(files)))
    return files


########################################################################~
# Data Loader
########################################################################
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=32, dim=(32,128), shuffle=True, step=8):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.shuffle = shuffle

        self.data = np.load(self.list_IDs[0] , mmap_mode='r')
        
        self.step = step
        self.indexes_start = np.arange(self.data.shape[1]-self.dim[0]+self.step, step=self.step)
        self.max = len(self.indexes_start)
        self.indexes = np.arange(self.data.shape[0])
        
        self.indexes = np.repeat(self.indexes, self.max )
        self.indexes_start = np.repeat(self.indexes_start, self.data.shape[0])
    
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(self.data.shape[0] * self.max  / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch

        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        indexes_start = self.indexes_start[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X = self.__data_generation(indexes, indexes_start).reshape((self.batch_size, *self.dim, 1))

        return X, X

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            np.random.shuffle(self.indexes_start)


    def __data_generation(self, indexes, index_start):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))

        # Generate data
        for i, (id_file, id_start) in enumerate(zip(indexes, index_start)):

            x = self.data[id_file,]
            length, mels = x.shape

            start = id_start

            start = min(start, length - self.dim[0])
            
            # crop part of sample
            crop = x[start:start+self.dim[0], :]

            X[i,] = crop
        return X
            

########################################################################


########################################################################
# main 00_train.py
########################################################################
if __name__ == "__main__":
    # check mode
    # "development": mode == True
    # "evaluation": mode == False
    # mode, target = com.command_line_chk()
    # if mode is None:
    #     sys.exit(-1)
    
    # make output directory
    os.makedirs(param["model_directory"], exist_ok=True)

    # initialize the visualizer
    visualizer = visualizer()
    mode = True
    target = fan
    # load base_directory list
    dirs = com.select_dirs(param=param, mode=mode, target=target)
    #print(mode, target, dirs)
    
    # loop of the base directory (machine types)
    for idx, target_dir in enumerate(dirs):
        print("\n===========================")
        print("[{idx}/{total}] {dirname}".format(dirname=target_dir, idx=idx+1, total=len(dirs)))

        # set path
        machine_type = os.path.split(target_dir)[1]
        
        model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                    machine_type=machine_type)
        best_model_filepath = "{model}/bestmodel_{machine_type}_".format(model=param["model_directory"],
                                                                    machine_type=machine_type)
        history_img = "{model}/history__{machine_type}.png".format(model=param["model_directory"],
                                                                    machine_type=machine_type)
        features_file_path = "{features}/{machine_type}".format(features=param["features_directory"],
                                                                    machine_type=machine_type)
        features_dir_path = os.path.abspath(features_file_path)
        # print(features_dir_path)
        
        if os.path.exists(model_file_path):
            com.logger.info("model exists")
            continue


        # get features
        # get npy files list (features files)
        list_files_npy_train = file_list_generator(features_dir_path, dir_name="train", ext="npy")
        list_files_npy_val = file_list_generator(features_dir_path, dir_name="val", ext="npy")
        
        if len(list_files_npy_train)==0 or len(list_files_npy_val)==0:
            com.logger.exception("no_npy_files!!")
            sys.exit(-1)  


        shape0_feat = param["autoencoder"]["shape0"]
        shape1_feat = param["feature"]["n_mels"]

        # load data 
        gen_train = DataGenerator(list_files_npy_train, batch_size=param["fit"]["batch_size"], dim=(shape0_feat,shape1_feat), step=param["step"])
        gen_val = DataGenerator(list_files_npy_val,  batch_size=param["fit"]["batch_size"], dim=(shape0_feat,shape1_feat), shuffle=False, step=param["step"])
        
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        session = tf.compat.v1.InteractiveSession(config=config)
        with tf.compat.v1.Session(config=config) as sess:
            # train model
            print("============== MODEL TRAINING ==============")
            
            # checkpoint
            model_checkpoint = ModelCheckpoint(best_model_filepath+"{epoch:02d}_2_27.hdf5", monitor='val_loss', verbose=1, save_best_only=True)
            early = EarlyStopping(monitor='val_loss', mode='min', patience=10, min_delta=0.0001)

            # create model
            model = keras_model.get_model((shape0_feat, shape1_feat), param["autoencoder"]["latentDim"])
            # model.load_weights(best_model_filepath+"31.hdf5")
            model.summary()

            
            #train model
            model.compile(**param["fit"]["compile"])
            history = model.fit_generator(gen_train, 
                                validation_data=gen_val,
                                epochs=param["fit"]["epochs"], 
                                verbose=param["fit"]["verbose"],
                                callbacks=[model_checkpoint, early])
            
            visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
            visualizer.save_figure(history_img)
            model.save(model_file_path)
            com.logger.info("save_model -> {}".format(model_file_path))
            print("============== END TRAINING ==============")