|
- import os
- import numpy as np
- import random
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
- import tensorflow as tf
- from src_npu.network import GraphNet
- from npu_bridge.npu_init import *
- random_seed = 1000
- tf.set_random_seed(random_seed)
- np.random.seed(random_seed)
- os.environ['PYTHONHASHSEED'] = str(random_seed)
- random.seed(random_seed)
-
- flags = tf.app.flags
- # Customize
- flags.DEFINE_string('data_type', 'adapt', 'lgcn, adapt')
- flags.DEFINE_integer('save_checkpoint_steps', 1000, '')
- flags.DEFINE_integer('early_stop_steps', 10, 'val stat stops')
- flags.DEFINE_integer('random_seed', 1000, 'random seed')
- flags.DEFINE_boolean('allow_mix_precision', True, 'allow_mix_precision')
- flags.DEFINE_boolean('allow_auto_tune', False, 'allow_auto_tune')
- flags.DEFINE_boolean('allow_profiling', False, 'allow_profiling')
-
-
- # flags.DEFINE_integer('epoch_num_limit', 200, '# of step to save summary')
- flags.DEFINE_integer('epoch_num_limit', 200, '# of step to save summary')
- flags.DEFINE_float('train_accuracy_limit', 0.81, 'learning rate')
-
- # Ori
- flags.DEFINE_integer('max_step', 10000, '# of step for training')
- flags.DEFINE_integer('summary_interval', 10, '# of step to save summary')
- flags.DEFINE_float('learning_rate', 0.1, 'learning rate')
- flags.DEFINE_boolean('is_train', True, 'is train')
- flags.DEFINE_integer('class_num', 7, 'output class number')
- # Debug
- flags.DEFINE_string('logdir', './logdir', 'Log dir')
- flags.DEFINE_string('modeldir', './modeldir', 'Model dir')
- flags.DEFINE_string('model_name', 'model', 'Model file name')
- flags.DEFINE_integer('reload_step', 0, 'Reload step to continue training')
- flags.DEFINE_integer('test_step', 0, 'Test or predict model at this step')
- # network architecture
- flags.DEFINE_integer('ch_num', 1, 'channel number')
- flags.DEFINE_integer('layer_num', 1, 'block number')
- flags.DEFINE_float('adj_keep_r', 0.53, 'dropout keep rate')
-
- # 0.1
- flags.DEFINE_float('keep_r', 0.14, 'dropout keep rate')
- flags.DEFINE_float('weight_decay', 5e-4, 'Weight for L2 loss on embedding matrix.')
- flags.DEFINE_integer('k', 8, 'top k')
- flags.DEFINE_string('first_conv', 'simple_conv', 'simple_conv, chan_conv')
- flags.DEFINE_string('second_conv', 'simple_conv', 'graph_conv, simple_conv')
- flags.DEFINE_boolean('use_batch', True, 'use batch training')
- flags.DEFINE_integer('batch_size', 2500, 'batch size number')
- flags.DEFINE_integer('center_num', 1500, 'start center number')
-
- def main(_):
- config = tf.ConfigProto()
- custom_op = config.graph_options.rewrite_options.custom_optimizers.add()
- custom_op.name = "NpuOptimizer"
- custom_op.parameter_map["use_off_line"].b = True # 在昇腾AI处理器执行训练
- custom_op.parameter_map["mix_compile_mode"].b = True
- config.graph_options.rewrite_options.remapping = RewriterConfig.OFF # 关闭remap开关
- config.graph_options.rewrite_options.memory_optimization = RewriterConfig.OFF
- if tf.app.flags.FLAGS.allow_auto_tune:
- custom_op.parameter_map["auto_tune_mode"].s = tf.compat.as_bytes("GA")
- if tf.app.flags.FLAGS.allow_mix_precision:
- custom_op.parameter_map["precision_mode"].s = tf.compat.as_bytes("allow_mix_precision")
- if tf.app.flags.FLAGS.allow_profiling:
- work_dir = os.getcwd()
- profiling_dir = os.path.join(work_dir, "npu_profiling")
- if not os.path.exists(profiling_dir):
- os.makedirs(profiling_dir)
- options = '{"output": "%s", \
- "task_trace": "on", \
- "aicpu": "on"}' % (profiling_dir)
-
- custom_op.parameter_map["profiling_mode"].b = True
- custom_op.parameter_map["profiling_options"].s = tf.compat.as_bytes(options)
-
- GraphNet(tf.Session(config=config), tf.app.flags.FLAGS).train()
-
-
- if __name__ == '__main__':
- tf.app.run()
|