|
- import os
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- import torch
- import torch.nn as nn
- from sklearn.preprocessing import StandardScaler, MinMaxScaler
- from torch.utils.data import Dataset, DataLoader
- from tqdm import tqdm
- from pyts.image import GramianAngularField, MarkovTransitionField
-
-
- def split_ndarray_by_rows(input_array, step, sequence_length):
- # Get the number of rows in the input array
- num_rows = input_array.shape[0]
- # Initialize an empty list to store the split matrices
- split_matrices = []
- # Iterate through the rows with the specified step size
- for i in range(0, num_rows, step):
- # Check if there are enough rows left to form another split
- if i + sequence_length <= num_rows:
- # Extract the submatrix based on the current index and target shape
- submatrix = input_array[i:i + sequence_length, :]
- # Append the submatrix to the list of split matrices
- split_matrices.append(submatrix)
-
- return split_matrices
-
-
- def data2classes(used_data):
- fault_dict = {
- "data_motor.csv": ("TYPE1", "TYPE2", "TYPE3", "TYPE4",),
- "data_gearbox.csv": ("TYPE5", "TYPE6", "TYPE7", "TYPE8",
- "TYPE9", "TYPE10", "TYPE11", "TYPE12"),
- "data_leftaxlebox.csv": ("TYPE13", "TYPE14", "TYPE15", "TYPE16",),
- "data_rightaxlebox.csv": (),
- }
- classes = ["TYPE0", ]
- # classes = []
- for d in used_data:
- for t in fault_dict[d]:
- classes.append(t)
- num_classes = len(classes)
- return classes, num_classes
-
-
- def data2features(used_data):
- fault_dict = {
- "data_motor.csv": 9,
- "data_gearbox.csv": 6,
- "data_leftaxlebox.csv": 3,
- "data_rightaxlebox.csv": 3,
- }
- num_feats = 0
- for d in used_data:
- num_feats += fault_dict[d]
- return num_feats
-
-
- class TimeSeriesDataset(Dataset):
- def __init__(self, root_dir, sequence_length, used_data=None, mode="train", img_size=224):
- if used_data is None:
- used_data = ["data_leftaxlebox.csv", ]
- self.used_data = used_data
- self.img_size = img_size
- self.classes, self.num_classes = data2classes(used_data)
- assert mode in ["train", "valid"]
- if mode == "train":
- self.sample_list = ["Sample2", "Sample3"]
- else:
- self.sample_list = ["Sample1"]
-
- self.root_dir = root_dir
- self.sequence_length = sequence_length
- self.class_to_idx = {cls_name: self.classes.index(cls_name) for cls_name in self.classes}
- self.file_list = self._get_file_list()
- data_list = []
- target_list = []
- for f in self.file_list:
- file_path, class_name = f
- if class_name not in self.classes: continue
- data = self._read_merge_csv_files(file_path)
- split_list = split_ndarray_by_rows(data, int(sequence_length / 2), sequence_length)
- data_list.extend(split_list)
-
- target = self.class_to_idx[class_name]
- target_data = np.zeros(self.num_classes)
- target_data[target] = 1
- for _ in range(len(split_list)):
- target_list.append(target_data)
-
- self.data = np.stack(data_list, axis=0)
- # Normalize features
- # self.data = self.scaler.fit_transform(self.data)
- # min = np.min(np.min(self.data, axis=0), axis=0)
- # max = np.max(np.max(self.data, axis=0), axis=0)
- # max_vals = np.amax(self.data, axis=(0,1), keepdims=True)
- # min_vals = np.amin(self.data, axis=(0,1), keepdims=True)
- # var_vals = np.var(self.data, axis=(0,1), keepdims=True)
- # mean_vals = np.mean(self.data, axis=(0,1), keepdims=True)
- # 归一化操作
- # self.data = (self.data - min_vals) / (max_vals - min_vals)
- # self.data = (self.data - mean_vals) / var_vals
- # max = np.max(self.data, axis=0)
- # rag = max - min
- # self.data = (self.data - np.expand_dims(min, axis=(0,1))) / np.expand_dims(rag, axis=(0,1))
- self.target = np.stack(target_list, axis=0)
- self.size = self.target.shape[0]
- # 生成打乱的索引
- indices = np.random.permutation(self.size)
- # 使用相同的索引对两个数组进行打乱
- # [n, d, c]
- self.data = self.data[indices, :] # [n, d, c]
- self.target = self.target[indices, :]
- print(f"{mode} Dataset Size: {self.size}")
-
- self.gasf = GramianAngularField(image_size=self.img_size, method='summation')
- self.gadf = GramianAngularField(image_size=self.img_size, method='difference')
- self.mtf = MarkovTransitionField(image_size=self.img_size)
-
- def _get_file_list(self):
- file_list = []
- for cls_name in self.classes:
- cls_dir = os.path.join(self.root_dir, cls_name)
- for sample in self.sample_list:
- sample_dir = os.path.join(cls_dir, sample)
- # for file_name in os.listdir(cls_dir):
- file_list.append((sample_dir, cls_name))
- return file_list
-
- def _read_merge_csv_files(self, folder_path):
- all_data = []
- for file_name in os.listdir(folder_path):
- if file_name.endswith('.csv') and file_name in self.used_data:
- file_path = os.path.join(folder_path, file_name)
- data = pd.read_csv(file_path) # Read CSV file
- all_data.append(data.values) # Append data as numpy array
- merged_data = np.concatenate(all_data, axis=1) # Stack arrays vertically
- return merged_data
-
- def __len__(self):
- return self.size
-
- def __getitem__(self, idx):
- # Get sequence of data, shape of [d, c]
- sequence_data = self.data[idx, :, :]
- sequence_data = sequence_data.transpose((1, 0)) # [c, d]
- feats = sequence_data.shape[0]
- target_data = np.zeros(shape=(feats * 3, self.img_size, self.img_size), dtype=np.float32)
- for i in range(feats):
- target_data[i, :, :] = self.gasf.fit_transform(sequence_data[i, :].reshape((1,-1)))
- target_data[i+feats, :, :] = self.gasf.fit_transform(sequence_data[i, :].reshape((1,-1)))
- target_data[i+feats*2, :, :] = self.gadf.fit_transform(sequence_data[i, :].reshape((1,-1)))
-
- target_data = torch.tensor(target_data, dtype=torch.float32)
-
- # Get target label
- target = self.target[idx, :]
- target = torch.tensor(target, dtype=torch.float32)
-
- return target_data, target
-
-
- # Example usage
- if __name__ == "__main__":
- # Define parameters
- root_dir = r"D:\1\2024phm\Preliminary stage\Data_Pre Stage\Training data"
- sequence_length = 4096
-
- # Create dataset
- dataset = TimeSeriesDataset(root_dir, sequence_length)
-
- # Create data loader
- dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
-
- # Iterate over batches
- for batch in dataloader:
- sequence_data, target = batch
- # Do something with the data, e.g., train a model
- print("Sequence data:", sequence_data.shape)
- print("Target:", target)
|