|
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- Example folder structure, other tasks are similar:
- Task04_Hippocampus
- ├── Task04_Hippocampus_phase0
- │ ├── images # images after preprocessing
- │ │ ├── hippocampus_001.npy
- │ │ ├── ...
- │ │ └── hippocampus_394.npy
- │ ├── labels # labels after preprocessing
- │ │ ├── hippocampus_001.npy
- │ │ ├── ...
- │ │ └── hippocampus_394.npy
- │ ├── train_list.txt
- │ └── val_list.txt
- ├── Task04_Hippocampus_raw
- │ ├── dataset.json
- │ └── Task04_Hippocampus
- │ └── Task04_Hippocampus
- │ ├── dataset.json
- │ ├── imagesTr # training images
- │ │ ├── hippocampus_001.nii.gz
- │ │ ├── ...
- │ │ └── hippocampus_394.nii.gz
- │ ├── imagesTs # testing images
- │ │ ├── hippocampus_002.nii.gz
- │ │ ├── ...
- │ │ └── hippocampus_392.nii.gz
- │ └── labelsTr # training labels
- │ ├── hippocampus_001.nii.gz
- │ ├── ...
- │ └── hippocampus_394.nii.gz
- └── Task04_Hippocampus.tar # zip file
-
- support:
- 1. download and uncompress the file.
- 2. preprocess scans and labels then save as npy.
- 3. update dataset.json
- 4. split the training data and save the split result in train_list.txt and val_list.txt
- """
-
- import os
- import os.path as osp
- import sys
- import zipfile
- import functools
- import numpy as np
-
- sys.path.append(osp.join(osp.dirname(osp.realpath(__file__)), ".."))
-
- from prepare import Prep
- from preprocess_utils import HUNorm, resample, parse_msd_basic_info
- from medicalseg.utils import wrapped_partial
-
- tasks = {
- 1: {
- "Task01_BrainTumour.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/975fea1d4c8549b883b2b4bb7e6a82de84392a6edd054948b46ced0f117fd701?responseContentDisposition=attachment%3B%20filename%3DTask01_BrainTumour.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A50%3A30Z%2F-1%2F%2F283ea6f8700c129903e3278ea38a54eac2cf087e7f65197268739371898aa1b3"
- }, # 4d
- 2: {
- "Task02_Heart.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/44a1e00baf55489db5d95d79f2e56e7230b6f87687604ab0889e0deb45ba289e?responseContentDisposition=attachment%3B%20filename%3DTask02_Heart.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A30%3A22Z%2F-1%2F%2F3c23a084e9bbbc57d8d6435eb014b7fb8c4160395a425bc94da5b55a08fc14de"
- }, # 3d
- 3: {
- "Task03_Liver.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/e641b1b7f364472c885147b6c500842f559ee6ae03494b78b5d140d53db35907?responseContentDisposition=attachment%3B%20filename%3DTask03_Liver.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A49%3A33Z%2F-1%2F%2F83b1b4e70026a2a568dcfbbf60fb06f0ae27a847e7ebe5ba7b2efe60fc6b16a5"
- }, # 3d
- 4: {
- "Task04_Hippocampus.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/1bf93142b1284f69a2a2a4e84248a0fe2bdb76c3b4ba4ddf82754e23d8820dfe?responseContentDisposition=attachment%3B%20filename%3DTask04_Hippocampus.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-02-14T17%3A09%3A53Z%2F-1%2F%2Fc53aa0df7f8810277261a00458d0af93df886c354c27498607bb8e2fb64a3d90"
- }, # 3d
- 5: {
- "Task05_Prostate.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/aca74eceef674a74bff647998413ebf25a33ad44e04643d7b796e05eecbc9891?responseContentDisposition=attachment%3B%20filename%3DTask05_Prostate.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A28%3A58Z%2F-1%2F%2F610d78c178a2f5eeb5d8f6c7ec48ef52f7d6899b5ed8484f213ff1e03d266bd8"
- }, # 4d
- 6: {
- "Task06_Lung.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/c42c621dc5c0490baaec935e1efd899478615f02add040649764c80c5f46805a?responseContentDisposition=attachment%3B%20filename%3DTask06_Lung.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A59%3A27Z%2F-1%2F%2Fd4a6b5b382136af96395a8acc6d18d4e88ac744314c517f19f3a71417be3d12c"
- }, # 3d
- 7: {
- "Task07_Pancreas.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/d94f22313d764d808b15b240da0335a9cf0ca0e806ce418f9213f9db9e56a5a8?responseContentDisposition=attachment%3B%20filename%3DTask07_Pancreas.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A34%3A45Z%2F-1%2F%2F3a17fb265c8fcdac91de8f15e7e2352a31783bbb121755ad27c28685ce047afa"
- }, # 3d
- 8: {
- "Task08_HepaticVessel.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/51ff9421bfa648449f12e65a68862215c6b5b85f91de49aab1c16626c62c3af6?responseContentDisposition=attachment%3B%20filename%3DTask08_HepaticVessel.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A35%3A23Z%2F-1%2F%2Fa664645e0b0c99e351f31352701dbe163de3fbe6e96eac11539629b5e6658360"
- }, # 3d
- 9: {
- "Task09_Spleen.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/c02462f396f14b13a50d2c9ff01f86fc471c7bff8df24994af7bd8b2298dc843?responseContentDisposition=attachment%3B%20filename%3DTask09_Spleen.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A45%3A46Z%2F-1%2F%2Faf6f10f658fbe9569eb423fc1b7bd464aead582ef89cd7c135dcae002bc3cb09"
- }, # 3d
- 10: {
- "Task10_Colon.tar":
- "https://bj.bcebos.com/v1/ai-studio-online/netdisk/062aa5a52cc44597a87f56c5ef1371c7acb52f73a2c946be9fea347dedec5058?responseContentDisposition=attachment%3B%20filename%3DTask10_Colon.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A42%3A03Z%2F-1%2F%2F106546582e748224f0833e100fc74d1bf3ff7fe4f4370d43bb487b10c3f5deae"
- }, # 3d
- }
-
-
- class Prep_msd(Prep):
- def __init__(self, task_id):
- task_name = list(tasks[task_id].keys())[0].split('.')[0]
- print(f"Preparing task {task_id} {task_name}")
- super().__init__(
- dataset_root=f"data/{task_name}",
- raw_dataset_dir=f"{task_name}_raw/",
- images_dir=f"{task_name}/{task_name}/imagesTr",
- labels_dir=f"{task_name}/{task_name}/labelsTr",
- phase_dir=f"{task_name}_phase0/",
- urls=tasks[task_id],
- valid_suffix=("nii.gz", "nii.gz"),
- filter_key=(None, None),
- uncompress_params={"format": "tar",
- "num_files": 1})
-
- self.preprocess = {
- "images": [
- HUNorm, wrapped_partial(
- resample, new_shape=[128, 128, 128], order=1)
- ],
- "labels": [
- wrapped_partial(
- resample, new_shape=[128, 128, 128], order=0),
- ]
- }
-
- def generate_txt(self, train_split=0.75):
- """generate the train_list.txt and val_list.txt"""
-
- txtname = [
- osp.join(self.phase_path, 'train_list.txt'),
- osp.join(self.phase_path, 'val_list.txt')
- ]
-
- image_files_npy = os.listdir(self.image_path)
- label_files_npy = os.listdir(self.label_path)
-
- self.split_files_txt(txtname[0], image_files_npy, label_files_npy,
- train_split)
- self.split_files_txt(txtname[1], image_files_npy, label_files_npy,
- train_split)
-
-
- if __name__ == "__main__":
- if len(sys.argv) != 2:
- print(
- "Please provide task id. Example usage: \n\t python tools/prepare_msd.py 1 # for preparing MSD task 1"
- )
-
- try:
- task_id = int(sys.argv[1])
- except ValueError:
- print(
- f"Expecting number as command line argument, got {sys.argv[1]}. Example usage: \n\t python tools/prepare_msd.py 1 # for preparing MSD task 1"
- )
-
- prep = Prep_msd(task_id)
-
- json_path = osp.join(osp.dirname(prep.image_dir), "dataset.json")
- prep.generate_dataset_json(**parse_msd_basic_info(json_path))
-
- prep.load_save()
- prep.generate_txt()
|