|
- import os
- import random
- import re
-
- #根据数据集的路径和年份和数据类型拼接路径
- def get_dir(devkit_dir,year,type):
- return os.path.join(devkit_dir,'VOC'+year,type)
-
- def walk_dir(devkit_dir,year):
- #获取所有类别的训练和测试的文件名的文件夹路径
- filelist_dir=get_dir(devkit_dir,year,'ImageSets/Main')
- #获取存放标注文件的路径
- annotation_dir=get_dir(devkit_dir,year,'Annotations')
- #获取存放图像文件的路径
- img_dir=get_dir(devkit_dir,year,'JPEGImages')
- #训练和验证的数据list
- trainval_list=[]
- #验证的数据
- val_list=[]
- #训练的数据
- train_list=[]
-
- #用于存放数据来检查是否已存在该路径的名称
- added=set()
- #获取ImgeSets/Main下的所有文件夹
- for _,_,files in os.walk(filelist_dir):
- #获取类别的trainval.txt和测试的test.txt
- for fname in files:
- #清空img_ann_list
- img_ann_list=[]
- #判断是测试数据还是训练数据
- #少一个\
- a=0
- print("fname:",fname)
- if re.match('[a-z]+_trainval.txt',fname):
- img_ann_list=trainval_list
- a=0
- elif re.match('[a-z]+_train.txt',fname):
- img_ann_list=train_list
- a=0
- elif re.match('[a-z]+_val.txt',fname):
- print("yes")
- a=1
- print("a=",a)
- img_ann_list=val_list
- else:
- continue
- #拼接路径,获得文件的相对路径
- fpath=os.path.join(filelist_dir,fname)
- if a==1:
- print("fpathdfsdfsfs:",fpath)
- #读取文件中的内容
- for line in open(fpath):
- #获取文件的名称
- name_prefix=line.strip().split()[0]
- if a==1:
- print("dfsdfsdfsdfdaaaaaaaaa",name_prefix)
- #判断是否已经存在该名称,如果存在就直接跳过下面的添加操作
- #if name_prefix in added:
- # continue
- #添加新数据,用于下次检查
- added.add(name_prefix)
- #根据名称获取标注文件的相对路径
- ann_path=os.path.join(annotation_dir,name_prefix+'.xml')
- if a==1:
- print("eeeeeeeeeeeeeeeeeeeeeeeaaaaaaaa",ann_path)
- #根据名称获取图像的相对路径
- img_path=os.path.join(img_dir,name_prefix+'.jpg')
- if a==1:
- print("ccccccccccccccccccccccccccccceaaaaaaaa",img_path)
- #检查文件是否存在
- assert os.path.isfile(ann_path),'file %s not found.'%ann_path
- assert os.path.isfile(img_path),'file %s not found.'%img_path
- if a==1:
- print("rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrra")
- #生成一个图像列表
- img_ann_list.append((img_path,ann_path))
- if a==1:
- print("valvalvalvalvalvlalvalvla:",img_ann_list)
-
- return trainval_list,train_list,val_list
-
- def prepare_filelist(devkit_dir,years,output_dir):
- trainval_list=[]
- train_list=[]
- val_list=[]
- #获取某年份的数据
- for year in years:
- trainval,train,val=walk_dir(devkit_dir,year)
- trainval_list.extend(trainval)
- train_list.extend(train)
- val_list.extend(val)
- print("vallist",val_list)
- #打乱训练数据
- random.shuffle(trainval_list)
- #保存训练图像列表
- with open(os.path.join(output_dir,'trainval.txt'),'w') as ftrainval:
- for item in trainval_list:
- ftrainval.write(item[0]+' '+item[1]+'\n')
- with open(os.path.join(output_dir,'train.txt'),'w') as ftrain:
- for item in train_list:
- ftrain.write(item[0]+' '+item[1]+'\n')
- with open(os.path.join(output_dir,'val.txt'),'w') as fval:
- for item in val_list:
- fval.write(item[0]+' '+item[1]+'\n')
-
-
- if __name__=='__main__':
- #数据存放的位置
- devkit_dir='VOCdevkit'
- #数据的年份
- years=['2012']
- prepare_filelist(devkit_dir,years,'.')
-
-
-
-
|