|
- #!/usr/bin/env python3
- # Copyright (c) Facebook, Inc. and its affiliates.
- #
- # This source code is licensed under the MIT license found in the
- # LICENSE file in the root directory of this source tree.
- """
- Data pre-processing: build vocabularies and binarize training data.
- """
- # import torch
- from nptdms import TdmsFile
- # import matplotlib.pyplot as plt
- import numpy as np
- # import pandas as pd
-
- from scipy import signal
- import argparse
- #import glob
- import os
- import random
-
- import pandas as pd
-
-
- def get_parser():
- parser = argparse.ArgumentParser()
- parser.add_argument(
- "--root", default="D:\Experimental_Data\For_GNN\Data_original\Four_storeys", metavar="DIR", help="root directory containing xlsx files to index"
- )
- parser.add_argument(
- "--save_path", default="D:\Experimental_Data\For_GNN\Data_manifest\Four_storeys", metavar="DIR", help="root directory containing save excel files to index"
- )
-
- parser.add_argument(
- "--ext", default=".tdms", type=str, metavar="EXT", help="extension to look for"
- )
- parser.add_argument(
- "--ext1", default=".xlsx", type=str, metavar="EXT", help="extension to look for"
- )
- parser.add_argument(
- "--scenario", default=1, type=int, metavar="EXT", help="number of scenarios"
- )
- parser.add_argument(
- "--repeat", default=20, type=int, metavar="EXT", help="number of repeat for each scenarios"
- )
- parser.add_argument(
- "--sampling_fre", default=40960, type=int, metavar="EXT", help="number of repeat for each scenarios"
- )
- parser.add_argument(
- "--sampling_num", default=5000, type=int, metavar="EXT", help="number of repeat for each scenarios"
- )
- # parser.add_argument("--seed", default=42, type=int, metavar="N", help="random seed")
- return parser
-
- def find_max_acc(search_path):
- with TdmsFile.open(search_path) as tdms_file:
- for group in tdms_file.groups(): # TdmsFile可以按组名索引来访问TDMS文件中的组,使用groups()方法直接访问所有组
- group_name = group.name
- for channel in group.channels(): # TdmsGroup 可以通过通道名称来索引来访问这个组中的一个通道,使用 channels()方法直接访问所有通道
- channel_name = channel.name
- channel = tdms_file[group_name][channel_name] # 根据索引读取通道
- all_channel_data = channel[:] # 将此通道中所有的数据作为numpy数组获取
- num = np.array(all_channel_data)
- df = pd.DataFrame(num) # 将numpy数组中的数据转换成DataFrame并输出
- data_max = df.max()
- return
-
-
-
-
- def main(args, dest, dest1):
- # assert args.valid_percent >= 0 and args.valid_percent <= 1.0
- dest11=str(dest)
- dest111=str(dest1)
- save_dir = os.path.join(args.save_path, dest11, dest111)
- if not os.path.exists(save_dir):
- os.makedirs(save_dir)
-
- # dir_path = os.path.join(args.save_path, args.dest, args.dest1)
- # dir_path = os.path.realpath(args.root)
- search_path = os.path.join(args.root, dest11, dest111 + args.ext)
- # rand = random.Random(args.seed)
-
- with TdmsFile.open(search_path) as tdms_file:
- for group in tdms_file.groups(): # TdmsFile可以按组名索引来访问TDMS文件中的组,使用groups()方法直接访问所有组
- group_name = group.name
- for channel in group.channels(): # TdmsGroup 可以通过通道名称来索引来访问这个组中的一个通道,使用 channels()方法直接访问所有通道
- channel_name = channel.name
- if channel_name=="Force":
- channel_name = channel.name
- channel = tdms_file[group_name][channel_name] # 根据索引读取通道
- all_channel_data = channel[:] # 将此通道中所有的数据作为numpy数组获取
- num = np.array(all_channel_data)
- df = pd.DataFrame(num) # 将numpy数组中的数据转换成DataFrame并输出
- data_max = df.max()
- data_idx = df.idxmax()[0].item()
- # print("data_idx:",data_idx)
- # print("data_idx:",type(data_idx))
- scale = 1 / (0.2 * data_max)
- df1 = pd.DataFrame(scale) # 将numpy数组中的数据转换成DataFrame并输出
- save_dir1 = os.path.join(save_dir, channel_name)
- df1.to_csv(save_dir1 + ".csv", sep = ',', float_format='%.5f', header=False, index=False)
- continue
- save_dir1 = os.path.join(save_dir, channel_name)
- channel = tdms_file[group_name][channel_name] # 根据索引读取通道
- all_channel_data = channel[:] # 将此通道中所有的数据作为numpy数组获取
- num = np.array(all_channel_data)
- df = pd.DataFrame(num) # 将numpy数组中的数据转换成DataFrame并输出
- sampling_size = df.shape[0]
- if sampling_size == args.sampling_fre:
- writer = pd.ExcelWriter(save_dir1+ args.ext1)
- # df1 = df[data_idx, data_idx + args.sampling_num]
- df1.to_excel(writer, 'sheet_1', float_format='%.5f', header=False, index=False)
- # writer.save()
- writer.close()
- else:
- df = signal.resample(df, args.sampling_fre)
- df = pd.DataFrame(df)
- # print("data_idx:",data_idx)
- # print("data_idx + args.sampling_num:", data_idx + args.sampling_num)
- index = round(data_idx / sampling_size * args.sampling_fre)
- df1 = scale * df[index:index + args.sampling_num]
- writer = pd.ExcelWriter(save_dir1 + args.ext1)
- df1.to_excel(writer, 'sheet_1', float_format='%.5f', header=False, index=False)
- # writer.save()
- writer.close()
-
-
- if __name__ == "__main__":
- parser = get_parser()
- args = parser.parse_args()
-
- for dest in range(0,args.scenario):
- for dest1 in range(1,args.repeat+1):
- print("正在处理第{}个scenario,第{}几个repeat".format(str(dest), str(dest1)))
- main(args, dest, dest1)
- print("数据处理过程已经结束!")
|