#40 master

Merged
unicorn merged 4 commits from master into unicorn-patch-1 1 year ago
  1. +7
    -8
      train_asteroid.py
  2. +7
    -6
      train_clipnorm_new.py
  3. +7
    -6
      train_ln_adam.py

+ 7
- 8
train_asteroid.py View File

@@ -42,7 +42,7 @@ parser.add_argument('--sample_rate', default=8000, type=int,
help='Sample rate')
parser.add_argument('--segment', default=4, type=float, # 取音频的长度,2s。#数据集语音长度要相同
help='Segment length (seconds)')
parser.add_argument('--batch_size', default=3, type=int, # 需要抛弃的音频长度
parser.add_argument('--batch_size', default=2, type=int, # 需要抛弃的音频长度
help='Batch size')

# Network architecture
@@ -62,7 +62,7 @@ parser.add_argument('--norm', default='gln', type=str,
help='gln = "Global Norm", cln = "Cumulative Norm", ln = "Layer Norm"')
parser.add_argument('--dropout', default=0.0, type=float,
help='dropout')
parser.add_argument('--num_layers', default=4, type=int,
parser.add_argument('--num_layers', default=6, type=int,
help='Number of Dual-Path-Block')
parser.add_argument('--K', default=250, type=int,
help='The length of chunk')
@@ -70,7 +70,7 @@ parser.add_argument('--num_spks', default=2, type=int,
help='The number of speakers')

# optimizer
parser.add_argument('--lr', default=0.001, type=float,
parser.add_argument('--lr', default=1e-3, type=float,
help='Init learning rate')
parser.add_argument('--l2', default=1e-5, type=float,
help='weight decay (L2 penalty)')
@@ -110,8 +110,7 @@ def preprocess(args):
print("preprocess done")

def main(args):
# context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=True)
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=True)

if args.run_distribute:
print("distribute")
@@ -162,8 +161,8 @@ def main(args):
tr_dataset = DatasetGenerator(args.train_dir, args.batch_size,
sample_rate=args.sample_rate, segment=args.segment)
tr_loader = ds.GeneratorDataset(tr_dataset, ["mixture", "lens", "sources"],
shuffle=False, num_shards=rank_size, shard_id=rank_id)
tr_loader = tr_loader.batch(4)
shuffle=True, num_shards=rank_size, shard_id=rank_id)
tr_loader = tr_loader.batch(2)
num_steps = tr_loader.get_dataset_size()
end_time = time.perf_counter()
print("preparing data use: {}min".format((end_time - start_time) / 60))
@@ -187,7 +186,7 @@ def main(args):
loss_cb = LossMonitor(1)
cb = [time_cb, loss_cb]

config_ck = CheckpointConfig(save_checkpoint_steps=200, keep_checkpoint_max=5)
config_ck = CheckpointConfig(save_checkpoint_steps=100, keep_checkpoint_max=5)
ckpt_cb = ModelCheckpoint(prefix='DPRNN',
directory=save_ckpt,
config=config_ck)


+ 7
- 6
train_clipnorm_new.py View File

@@ -42,7 +42,7 @@ parser.add_argument('--sample_rate', default=8000, type=int,
help='Sample rate')
parser.add_argument('--segment', default=4, type=float, # 取音频的长度,2s。#数据集语音长度要相同
help='Segment length (seconds)')
parser.add_argument('--batch_size', default=2, type=int, # 需要抛弃的音频长度
parser.add_argument('--batch_size', default=3, type=int, # 需要抛弃的音频长度
help='Batch size')

# Network architecture
@@ -62,7 +62,7 @@ parser.add_argument('--norm', default='gln', type=str,
help='gln = "Global Norm", cln = "Cumulative Norm", ln = "Layer Norm"')
parser.add_argument('--dropout', default=0.0, type=float,
help='dropout')
parser.add_argument('--num_layers', default=6, type=int,
parser.add_argument('--num_layers', default=4, type=int,
help='Number of Dual-Path-Block')
parser.add_argument('--K', default=250, type=int,
help='The length of chunk')
@@ -70,7 +70,7 @@ parser.add_argument('--num_spks', default=2, type=int,
help='The number of speakers')

# optimizer
parser.add_argument('--lr', default=1e-3, type=float,
parser.add_argument('--lr', default=0.001, type=float,
help='Init learning rate')
parser.add_argument('--l2', default=1e-5, type=float,
help='weight decay (L2 penalty)')
@@ -110,7 +110,8 @@ def preprocess(args):
print("preprocess done")

def main(args):
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=True)
# context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=True)
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

if args.run_distribute:
print("distribute")
@@ -159,7 +160,7 @@ def main(args):
tr_dataset = DatasetGenerator(args.train_dir, args.batch_size,
sample_rate=args.sample_rate, segment=args.segment)
tr_loader = ds.GeneratorDataset(tr_dataset, ["mixture", "lens", "sources"],
shuffle=True, num_shards=rank_size, shard_id=rank_id)
shuffle=False, num_shards=rank_size, shard_id=rank_id)
tr_loader = tr_loader.batch(4)
num_steps = tr_loader.get_dataset_size()

@@ -184,7 +185,7 @@ def main(args):
loss_cb = LossMonitor(1)
cb = [time_cb, loss_cb]
config_ck = CheckpointConfig(save_checkpoint_steps=100, keep_checkpoint_max=5)
config_ck = CheckpointConfig(save_checkpoint_steps=200, keep_checkpoint_max=5)
ckpt_cb = ModelCheckpoint(prefix='DPRNN',
directory=save_ckpt,
config=config_ck)


+ 7
- 6
train_ln_adam.py View File

@@ -41,7 +41,7 @@ parser.add_argument('--sample_rate', default=8000, type=int,
help='Sample rate')
parser.add_argument('--segment', default=4, type=float, # 取音频的长度,2s。#数据集语音长度要相同
help='Segment length (seconds)')
parser.add_argument('--batch_size', default=3, type=int, # 需要抛弃的音频长度
parser.add_argument('--batch_size', default=2, type=int, # 需要抛弃的音频长度
help='Batch size')

# Network architecture
@@ -61,7 +61,7 @@ parser.add_argument('--norm', default='gln', type=str,
help='gln = "Global Norm", cln = "Cumulative Norm", ln = "Layer Norm"')
parser.add_argument('--dropout', default=0.0, type=float,
help='dropout')
parser.add_argument('--num_layers', default=4, type=int,
parser.add_argument('--num_layers', default=6, type=int,
help='Number of Dual-Path-Block')
parser.add_argument('--K', default=250, type=int,
help='The length of chunk')
@@ -69,7 +69,7 @@ parser.add_argument('--num_spks', default=2, type=int,
help='The number of speakers')

# optimizer
parser.add_argument('--lr', default=0.001, type=float,
parser.add_argument('--lr', default=1e-3, type=float,
help='Init learning rate')
parser.add_argument('--l2', default=1e-5, type=float,
help='weight decay (L2 penalty)')
@@ -109,7 +109,8 @@ def preprocess(args):
print("preprocess done")

def main(args):
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=True)
# context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=True)
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

if args.run_distribute:
print("distribute")
@@ -154,7 +155,7 @@ def main(args):
tr_dataset = DatasetGenerator(args.train_dir, args.batch_size,
sample_rate=args.sample_rate, segment=args.segment)
tr_loader = ds.GeneratorDataset(tr_dataset, ["mixture", "lens", "sources"],
shuffle=True, num_shards=rank_size, shard_id=rank_id)
shuffle=False, num_shards=rank_size, shard_id=rank_id)
tr_loader = tr_loader.batch(4)
num_steps = tr_loader.get_dataset_size()
# build model
@@ -172,7 +173,7 @@ def main(args):
loss_cb = LossMonitor(1)
cb = [time_cb, loss_cb]

config_ck = CheckpointConfig(save_checkpoint_steps=200, keep_checkpoint_max=5)
config_ck = CheckpointConfig(save_checkpoint_steps=100, keep_checkpoint_max=5)
ckpt_cb = ModelCheckpoint(prefix='DPRNN',
directory=save_ckpt,
config=config_ck)


Loading…
Cancel
Save