class ToCHWImage(object):
""" convert hwc image to chw image
required keys: image
modified keys: image
"""
def __init__(self, **kwargs):
pass
def __call__(self, data: dict):
img = data['image']
if isinstance(img, Image.Image):
img = np.array(img)
data['image'] = img.transpose((2, 0, 1))
return data
The input for transformation is always a dict, which contain data info like img_path, raw label, etc.
The transformation api should have clarify the required keys in input and the modified or/and added keys in output the data dict.
Available transformations can be checked in mindocr/data/transforms/*_transform.py
# import and check available transforms
from mindocr.data.transforms import general_transforms, det_transforms, rec_transforms
general_transforms.__all__
['DecodeImage', 'NormalizeImage', 'ToCHWImage', 'PackLoaderInputs']
det_transforms.__all__
['DetLabelEncode',
'MakeBorderMap',
'MakeShrinkMap',
'EastRandomCropData',
'PSERandomCrop']
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
import os
# load the label file which has the info of image path and annotation.
# This file is generated from the ic15 annotations using the converter script.
label_fp = '/Users/Samit/Data/datasets/ic15/det/train/train_icdar2015_label.txt'
root_dir = '/Users/Samit/Data/datasets/ic15/det/train'
data_lines = []
with open(label_fp, 'r') as f:
for line in f:
data_lines.append(line)
# just pick one image and its annotation
idx = 3
img_path, annot = data_lines[idx].strip().split('\t')
img_path = os.path.join(root_dir, img_path)
print('img_path', img_path)
print('raw annotation: ', annot)
img_path /Users/Samit/Data/datasets/ic15/det/train/ch4_training_images/img_612.jpg
raw annotation: [{"transcription": "where", "points": [[483, 197], [529, 174], [530, 197], [485, 221]]}, {"transcription": "people", "points": [[531, 168], [607, 136], [608, 166], [532, 198]]}, {"transcription": "meet", "points": [[613, 128], [691, 100], [691, 131], [613, 160]]}, {"transcription": "###", "points": [[695, 299], [888, 315], [931, 635], [737, 618]]}, {"transcription": "###", "points": [[709, 19], [876, 8], [880, 286], [713, 296]]}, {"transcription": "###", "points": [[530, 270], [660, 246], [661, 300], [532, 324]]}, {"transcription": "###", "points": [[113, 356], [181, 359], [180, 387], [112, 385]]}, {"transcription": "###", "points": [[281, 328], [369, 338], [366, 361], [279, 351]]}, {"transcription": "###", "points": [[66, 314], [183, 313], [183, 328], [68, 330]]}]
#img_path = '/Users/Samit/Data/datasets/ic15/det/train/ch4_training_images/img_1.jpg'
decode_image = general_transforms.DecodeImage(img_mode='RGB')
# TODO: check the input keys and output keys for the trans. func.
data = {'img_path': img_path}
data = decode_image(data)
img = data['image']
# visualize
from mindocr.utils.visualize import show_img, show_imgs
show_img(img)
import time
start = time.time()
att = 100
for i in range(att):
img = decode_image(data)['image']
avg = (time.time() - start) / att
print('avg reading time: ', avg)
avg reading time: 0.004545390605926514
data['label'] = annot
decode_image = det_transforms.DetLabelEncode()
data = decode_image(data)
#print(data['polys'])
print(data['texts'])
# visualize
from mindocr.utils.visualize import draw_boxes
res = draw_boxes(data['image'], data['polys'])
show_img(res)
['where', 'people', 'meet', '###', '###', '###', '###', '###', '###']
from mindocr.data.transforms.general_transforms import RandomCropWithBBox
import copy
#crop_data = det_transforms.EastRandomCropData(size=(640, 640))
crop_data = RandomCropWithBBox(crop_size=(640, 640))
show_img(data['image'])
for i in range(2):
data_cache = copy.deepcopy(data)
data_cropped = crop_data(data_cache)
res_crop = draw_boxes(data_cropped['image'], data_cropped['polys'])
show_img(res_crop)
random_color_adj = general_transforms.RandomColorAdjust(brightness=0.4, saturation=0.5)
data_cache = copy.deepcopy(data)
#data_cache['image'] = data_cache['image'][:,:, ::-1]
data_adj = random_color_adj(data_cache)
#print(data_adj)
show_img(data_adj['image'], is_bgr_img=True)
Dear OpenI User
Thank you for your continuous support to the Openl Qizhi Community AI Collaboration Platform. In order to protect your usage rights and ensure network security, we updated the Openl Qizhi Community AI Collaboration Platform Usage Agreement in January 2024. The updated agreement specifies that users are prohibited from using intranet penetration tools. After you click "Agree and continue", you can continue to use our services. Thank you for your cooperation and understanding.
For more agreement content, please refer to the《Openl Qizhi Community AI Collaboration Platform Usage Agreement》