LGOODSKY
/
mindcv_fixed
forked from jasonhuang/mindcv_fixed

 
			
			   
				 
					
						
						
							
							import glob
import os
import re
import pandas as pd

clear_empty_urls = True
rm_columns = ['Infer T.', 'Log']

# get all training recips
recipes = sorted(glob.glob('configs/*/*.yaml'))
print('Total number of training recipes: ', len(recipes))
ar = glob.glob('configs/*/*_ascend.yaml')
print('Ascend training recipes: ', len(ar))
gr =  glob.glob('configs/*/*_gpu.yaml')
print('GPU training recipes: ', len(gr))
for item in (set(recipes) - set(ar) - set(gr)):
    print(item)

models_with_train_rec = []
for r in recipes:
    mn = r.split('/')[-2]
    if mn not in models_with_train_rec:
        models_with_train_rec.append(mn)
models_with_train_rec = sorted(models_with_train_rec)

print('\n==> Models with training recipes: ', len(models_with_train_rec))
print(models_with_train_rec)

# get readme file list
config_dirs = sorted([d for d in os.listdir('./configs') if os.path.isdir('configs/'+d)])
print('\nTotal number of config folders: ', len(config_dirs))
print('==> Configs w/o training rec: ', set(config_dirs)- set(models_with_train_rec))
readmes = [f'configs/{d}/README.md' for d in config_dirs]

for readme in readmes:
    if not os.path.exists(readme):
        print('Missing readme: ', readme)

# check yaml and reported performance

# merge readme reported results
print('\r\n ')
output_path = './benchmark_results.md'
fout = open(output_path, 'w')

kw = ['Model', 'Top', 'Download', 'Config']

# process table head
head = '| Model           | Context   |  Top-1 (%)  | Top-5 (%)  |  Params (M)    | Train T. | Infer T. |  Download | Config | Log |'
fout.write(head + '\n')

fout.write("|----------------|------------|-------|-------|----------|-------|--------|---|-----|-----|" + '\n')

attrs = head.replace(' ','')[1:-1].split('|')
print('table attrs: ', attrs)

result_kw = ['Results', 'Benchmark', 'Result'] #TODO: unify this name
head_detect_kw = ['Model', 'Top', 'Config']

# process each model readme
parsed_models = []
parsed_model_specs  = []
for r in readmes:
    state = 0
    print('parsing ', r)
    results = []
    with open(r) as fp:
        for line in fp:
            if state==0:
                for kw in result_kw:
                    if f"##{kw}" in line.strip().replace(' ', ''):
                        state = 1
            # detect head
            elif state==1:
                if '|Model|Context' in line.replace(' ', ''):
                    if len(line.split('|')) == len(head.split('|')):
                        state =2
                    else:
                        print('Detect head, but format is incorrect:')
                        print(line)

            # get table values
            elif state==2:
                if len(line.split('|')) == len(head.split('|')):
                    # try to parse each attr
                    attr_vals = line.replace(' ','')[1:-1].split('|')
                    download, config, log = attr_vals[-4:-1]
                    # clear empty urls
                    if clear_empty_urls:
                        if '.ckpt' not in download:
                            line = line.replace(download, "")
                        if ('.yaml' not in config) and ('.yml' not in config):
                            line = line.replace(config, "")
                        if '.log' not in log:
                            line = line.replace(log, "")
                    # clear empty model
                    if '--' not in line and re.search(r'[a-zA-Z]', attr_vals[0]):
                        results.append(line)
                        fout.write(line)
                        parsed_model_specs.append(line.split('|')[0])
                else:
                    state = 3
                    parsed_models.append(r.split('/')[1])

    if state==0:
        print('Fail to get Results')
    elif state==1:
        print('Fail to get table head')
    elif state==2:
        print('Fail to get table values')

print('Parsed models in benchmark: ', len(parsed_models))
print('Parsed model specs in benchmark: ', len(parsed_model_specs))
print('Readme using inconsistent result table format: \r\n', set(config_dirs) - set(parsed_models))

fout.close()

def md_to_pd(md_fp, md_has_col_name=True, save_csv=False):
    '''assume the first row is the header '''
    # Convert the Markdown table to a list of lists
    with open(md_fp) as f:
        rows = []
        for row in f.readlines():
            if len(row.split('|')) >= 2:
                # Get rid of leading and trailing '|'
                tmp = row[1:-2]

                # Split line and ignore column whitespace
                clean_line = [col.strip() for col in tmp.split('|')]

                # Append clean row data to rows variable
                rows.append(clean_line)

        # Get rid of syntactical sugar to indicate header (2nd row)
        rows = rows[:1] + rows[2:]
    print(rows)
    if md_has_col_name:
        df = pd.DataFrame(data=rows[1:], columns=rows[0])
    else:
        df = pd.DataFrame(rows)

    if save_csv:
        df.to_csv(md_fp.replace('.md', '.csv'), index=False, header=False)
    return df

df = md_to_pd(output_path, save_csv=True)
print(df)

for cn in rm_columns:
    df = df.drop(cn, axis=1)

print(df)

md_doc = df.to_markdown(mode='w', index=False, tablefmt='pipe')

fout = open(output_path, 'w')
fout.write(md_doc)

# write notes
fout.write('\n#### Notes\n')
fout.write('- All models are trained on ImageNet-1K training set and the top-1 accuracy is reported on the validatoin set.\n- Context: device x pieces - G/F, G - graph mode, F - pynative mode with ms function, where D910 is Ascend 910 NPU.')

fout.close()