|
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- The file structure is as following:
- MRSpineSeg
- |--MRI_train.zip
- |--MRI_spine_seg_raw
- │ └── MRI_train
- │ └── train
- │ ├── Mask
- │ └── MR
- ├── MRI_spine_seg_phase0
- │ ├── images
- │ ├── labels
- │ │ ├── Case129.npy
- │ │ ├── ...
- │ ├── train_list.txt
- │ └── val_list.txt
- └── MRI_train.zip
-
- support:
- 1. download and uncompress the file.
- 2. save the normalized data as the above format.
- 3. split the training data and save the split result in train_list.txt and val_list.txt (we use all the data for training, since this is trainsplit)
-
- """
- import os
- import sys
- import zipfile
- import functools
- import numpy as np
-
- sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".."))
-
- from prepare import Prep
- from preprocess_utils import resample, normalize, label_remap
- from medicalseg.utils import wrapped_partial
-
- urls = {
- "Promise12": {
- "Promise12": ""
- },
- "Prostate_mri": {
- "Prostate_mri": ""
- }, # https://drive.google.com/file/d/1TtrjnlnJ1yqr5m4LUGMelKTQXtvZaru-/view?usp=sharing
- }
-
- dataset_addr = {
- "Promise12": {
- "dataset_root": "data/Promise12",
- "raw_dataset_dir": "Promise12_raw",
- "images_dir":
- ("prostate/TrainingData_Part1", "prostate/TrainingData_Part2",
- "prostate/TrainingData_Part3"),
- "labels_dir": ("prostate/TrainingData_Part1",
- "prostate/TrainingData_Part2",
- "prostate/TrainingData_Part3"),
- "images_dir_test": "prostate/TestData",
- "phase_dir": "Promise12_phase0/",
- "urls": urls["Promise12"],
- "valid_suffix": ("mhd", "mhd"),
- "filter_key": ({
- "segmentation": False
- }, {
- "segmentation": True
- }),
- "uncompress_params": {
- "format": "zip",
- "num_files": 1
- }
- },
- "Prostate_mri": {
- "dataset_root": "data/Prostate_mri",
- "raw_dataset_dir": "Prostate_mri_raw",
- "images_dir": ("Processed_data_nii/BIDMC", "Processed_data_nii/BMC",
- "Processed_data_nii/HK", "Processed_data_nii/I2CVB",
- "Processed_data_nii/RUNMC", "Processed_data_nii/UCL"),
- "labels_dir": ("Processed_data_nii/BIDMC", "Processed_data_nii/BMC",
- "Processed_data_nii/HK", "Processed_data_nii/I2CVB",
- "Processed_data_nii/RUNMC", "Processed_data_nii/UCL"),
- "phase_dir": "Prostate_mri_phase0/",
- "urls": urls["Prostate_mri"],
- "valid_suffix": ("nii.gz", "nii.gz"),
- "filter_key": ({
- "segmentation": False
- }, {
- "segmentation": True
- }),
- "uncompress_params": {
- "format": "zip",
- "num_files": 1
- }
- }
- }
-
- dataset_profile = {
- "Promise12": {
- "modalities": ('MRI-T2', ),
- "labels": {
- 0: "Background",
- 1: "prostate"
- },
- "dataset_name": "Promise12",
- "dataset_description":
- "These cases include a transversal T2-weighted MR image of the prostate. The training set is a representative set of the types of MR images acquired in a clinical setting. The data is multi-center and multi-vendor and has different acquistion protocols (e.g. differences in slice thickness, with/without endorectal coil). The set is selected such that there is a spread in prostate sizes and appearance. For each of the cases in the training set, a reference segmentation is also included.",
- "license_desc": "",
- "dataset_reference": "https://promise12.grand-challenge.org/Details/"
- },
- "Prostate_mri": {
- "modalities": ('MRI-T2', ),
- "labels": {
- 0: "Background",
- 1: "prostate"
- },
- "dataset_name": "Prostate_mri",
- "dataset_description":
- "This is a well-organized multi-site dataset for prostate MRI segmentation, which contains prostate T2-weighted MRI data (with segmentation mask) collected from six different data sources out of three public datasets. ",
- "license_desc": "",
- "dataset_reference": "https://liuquande.github.io/SAML/"
- }
- }
-
-
- class Prep_prostate(Prep):
- def __init__(self,
- dataset_root="data/TemDataSet",
- raw_dataset_dir="TemDataSet_seg_raw/",
- images_dir="train_imgs",
- labels_dir="train_labels",
- phase_dir="phase0",
- urls=None,
- valid_suffix=("nii.gz", "nii.gz"),
- filter_key=(None, None),
- uncompress_params={"format": "zip",
- "num_files": 1},
- images_dir_test=""):
-
- super().__init__(dataset_root, raw_dataset_dir, images_dir, labels_dir,
- phase_dir, urls, valid_suffix, filter_key,
- uncompress_params, images_dir_test)
-
- self.preprocess={"images":[ # todo: make params set automatically
- normalize,
- wrapped_partial(
- resample, new_shape=[512, 512, 24],
- order=1)],
- "labels":[
- wrapped_partial(
- resample, new_shape=[512, 512, 24], order=0)],
- "images_test":[normalize,]}
-
- def generate_txt(self, split=1.0):
- """generate the train_list.txt and val_list.txt"""
-
- txtname = [
- os.path.join(self.phase_path, 'train_list.txt'),
- os.path.join(self.phase_path, 'val_list.txt')
- ]
-
- if self.image_files_test:
- txtname.append(os.path.join(self.phase_path, 'test_list.txt'))
- test_file_npy = os.listdir(self.image_path_test)
-
- image_files_npy = os.listdir(self.image_path)
- label_files_npy = [
- name.replace(".npy", "_segmentation.npy")
- for name in image_files_npy # to have the save order
- ]
-
- self.split_files_txt(
- txtname[0], image_files_npy, label_files_npy, split=split)
- self.split_files_txt(
- txtname[1], image_files_npy, label_files_npy, split=split)
-
- self.split_files_txt(txtname[2], test_file_npy)
-
-
- if __name__ == "__main__":
- # Todo: Prostate_mri have files with same name in different dir, which caused file overlap problem.
- # Todo: MSD_prostate is not supported yet, because it has four channel and resample will have a bug.
- prep = Prep_prostate(**dataset_addr["Promise12"])
- prep.generate_dataset_json(**dataset_profile["Promise12"])
- prep.load_save()
- prep.generate_txt()
|