Skip to content

Commit 6b42945

Browse files
committed
* 更新制作VOC数据集
1 parent 50bf9ed commit 6b42945

File tree

4 files changed

+47
-93
lines changed

4 files changed

+47
-93
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
# dataset_tools
22
数据集工具合集
33

4+
```bash
5+
python main.py --dataset_type paddle_detection --input_dataset_dir F:\数据集\VOC数据集\箱门检测数据集\ContainVOC --save_dataset_dir E:\Data\VOC数据集\箱门检测数据集\ContainVOC
6+
```

dataset_tools/jade_create_object_dection_datasets.py

Lines changed: 38 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def CreateDarknetVocDataset(dir,save_path,image_files,dataset_type,remove_label=
161161
shutil.copy(os.path.join(dir, DIRECTORY_IMAGES, image_file), save_image_path)
162162
convert_voc_to_yolo(os.path.join(dir,DIRECTORY_ANNOTATIONS,image_file[:-4] + ".xml"),os.path.join(save_label_path,image_file[:-4] + ".txt"),VOC_CLASSES)
163163
else:
164-
print(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml"))
164+
print("未找到类别:{}".format(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml")))
165165

166166

167167

@@ -177,8 +177,38 @@ def CreateDarknetVocDatasets(dir,save_path,rate,VOC_CLASSES):
177177
CreateDarknetVocDataset(dir,save_path,test_image_files,"test",VOC_CLASSES=VOC_CLASSES)
178178

179179

180-
181-
180+
def generate_new_datasets(dir,dataset_name,root_path,image_file_list,no_pretrained_images_dir,output):
181+
Main_path = os.path.join(root_path, "ImageSets", "Main")
182+
for image_file in image_file_list:
183+
is_success = False
184+
img_file = dataset_name + "/" + DIRECTORY_IMAGES + "/" + image_file
185+
xml_file = dataset_name + "/" + DIRECTORY_ANNOTATIONS + "/" + image_file[:-4] + ".xml"
186+
with open(os.path.join(Main_path, output+".txt"), "a") as f:
187+
# with open(os.path.join(Main_path, "train.txt"), "a") as f:
188+
save_image_path = CreateSavePath(os.path.join(root_path,DIRECTORY_IMAGES))
189+
save_xml_path = CreateSavePath(os.path.join(root_path,DIRECTORY_ANNOTATIONS))
190+
with open(os.path.join(dir,DIRECTORY_IMAGES,image_file),"rb") as f2:
191+
if len(f2.read()) == 0 and ReadChinesePath(os.path.join(dir,DIRECTORY_IMAGES,image_file)) != None:
192+
pass
193+
else:
194+
class_name_list = GetXmlClassesNames(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml"))
195+
if len(class_name_list) > 0:
196+
shutil.copy(os.path.join(dir, DIRECTORY_IMAGES, image_file), save_image_path)
197+
shutil.copy(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml"), save_xml_path)
198+
is_success = True
199+
f.write(img_file + " " + xml_file + "\n")
200+
else:
201+
print("未找到类别:{}".format(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml")))
202+
if is_success is False:
203+
shutil.copy(os.path.join(dir, DIRECTORY_IMAGES, image_file),os.path.join(no_pretrained_images_dir, image_file))
204+
try:
205+
os.remove(os.path.join(dir, DIRECTORY_IMAGES, image_file))
206+
os.remove(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml"))
207+
except Exception as e:
208+
print("删除失败,{}".format(e))
209+
pass
210+
print("未找到类别:{}".format(os.path.join(dir, DIRECTORY_ANNOTATIONS, image_file[:-4] + ".xml")))
211+
shutil.copy(os.path.join(Main_path, output+".txt"), os.path.join(Main_path, output+"_var.txt"))
182212

183213

184214
##制作VOC数据集
@@ -190,106 +220,26 @@ def CreateVOCDataset(dir, datasetname,save_path=None,rate=0.95):
190220
:param rate:
191221
:return:
192222
"""
223+
no_pretrained_dir = CreateSavePath(os.path.join(os.path.dirname(dir),"no_pretrained"))
224+
193225
root_path = os.path.join(save_path,datasetname)
194226
dataset_name = datasetname
195227
Annotations = DIRECTORY_ANNOTATIONS
196228
JPEGImages = DIRECTORY_IMAGES
229+
no_pretrained_images_dir = CreateSavePath(os.path.join(no_pretrained_dir,JPEGImages))
197230

198231
if os.path.exists(os.path.join(root_path, "ImageSets", "Main")) is not True:
199232
os.makedirs(os.path.join(root_path, "ImageSets", "Main"))
200233
else:
201234
shutil.rmtree(os.path.join(root_path, "ImageSets", "Main"))
202235
os.makedirs(os.path.join(root_path, "ImageSets", "Main"))
203-
Main_path = os.path.join(root_path, "ImageSets", "Main")
204236
image_files = os.listdir(os.path.join(dir, JPEGImages))
205237
train_image_files = random.sample(image_files, int(len(image_files) *rate))
206238
test_image_files = [file for file in image_files if file not in train_image_files]
207239

208-
for train_image_file in train_image_files:
209-
with open(os.path.join(Main_path, "train_var.txt"), "a") as f:
210-
# with open(os.path.join(Main_path, "train.txt"), "a") as f:
211-
image_file = dataset_name + "/" + JPEGImages + "/" + train_image_file
212-
xml_file = dataset_name + "/" + Annotations + "/" + train_image_file[:-4] + ".xml"
213-
filename = train_image_file[:-4]
214-
save_image_path = CreateSavePath(os.path.join(root_path,DIRECTORY_IMAGES))
215-
save_xml_path = CreateSavePath(os.path.join(root_path,DIRECTORY_ANNOTATIONS))
216-
with open(os.path.join(dir,JPEGImages,train_image_file),"rb") as f2:
217-
if len(f2.read()) == 0:
218-
pass
219-
else:
220-
class_name_list = GetXmlClassesNames(os.path.join(dir, DIRECTORY_ANNOTATIONS, train_image_file[:-4] + ".xml"))
221-
if len(class_name_list) > 0:
222-
shutil.copy(os.path.join(dir, JPEGImages, train_image_file), save_image_path)
223-
shutil.copy(os.path.join(dir, DIRECTORY_ANNOTATIONS, train_image_file[:-4] + ".xml"), save_xml_path)
224-
f.write(filename + "\n")
225-
else:
226-
print(os.path.join(dir, DIRECTORY_ANNOTATIONS, train_image_file[:-4] + ".xml"))
227-
# f.write(image_file + " " + xml_file + "\n")
228-
229-
for test_image_file in test_image_files:
230-
with open(os.path.join(Main_path, "test_var.txt"), "a") as f:
231-
# with open(os.path.join(Main_path, "test.txt"), "a") as f:
232-
image_file = dataset_name + "/" + JPEGImages + "/" + test_image_file
233-
xml_file = dataset_name + "/" + Annotations + "/" + test_image_file[:-4] + ".xml"
234-
filename = test_image_file[:-4]
235-
save_image_path = CreateSavePath(os.path.join(root_path, DIRECTORY_IMAGES))
236-
save_xml_path = CreateSavePath(os.path.join(root_path, DIRECTORY_ANNOTATIONS))
237-
with open(os.path.join(dir,JPEGImages,test_image_file),"rb") as f2:
238-
if len(f2.read()) == 0:
239-
pass
240-
else:
241-
class_name_list = GetXmlClassesNames(
242-
os.path.join(dir, DIRECTORY_ANNOTATIONS, test_image_file[:-4] + ".xml"))
243-
if len(class_name_list) > 0:
244-
shutil.copy(os.path.join(dir, JPEGImages, test_image_file), save_image_path)
245-
shutil.copy(os.path.join(dir, DIRECTORY_ANNOTATIONS, test_image_file[:-4] + ".xml"), save_xml_path)
246-
f.write(filename + "\n")
247-
else:
248-
print(os.path.join(dir, DIRECTORY_ANNOTATIONS, test_image_file[:-4] + ".xml"))
249-
250-
for train_image_file in train_image_files:
251-
with open(os.path.join(Main_path, "train.txt"), "a") as f:
252-
# with open(os.path.join(Main_path, "train.txt"), "a") as f:
253-
image_file = dataset_name + "/" + JPEGImages + "/" + train_image_file
254-
xml_file = dataset_name + "/" + Annotations + "/" + train_image_file[:-4] + ".xml"
255-
filename = train_image_file[:-4]
256-
with open(os.path.join(dir,JPEGImages,train_image_file),"rb") as f2:
257-
if len(f2.read()) == 0:
258-
pass
259-
else:
260-
class_name_list = GetXmlClassesNames(
261-
os.path.join(dir, DIRECTORY_ANNOTATIONS, train_image_file[:-4] + ".xml"))
262-
if len(class_name_list) > 0:
263-
shutil.copy(os.path.join(dir, JPEGImages, train_image_file), save_image_path)
264-
shutil.copy(os.path.join(dir, DIRECTORY_ANNOTATIONS, train_image_file[:-4] + ".xml"), save_xml_path)
265-
f.write(image_file + " " + xml_file + "\n")
266-
else:
267-
print(os.path.join(dir, DIRECTORY_ANNOTATIONS, train_image_file[:-4] + ".xml"))
268-
269-
# f.write(filename + "\n")
270-
271-
272-
for test_image_file in test_image_files:
273-
with open(os.path.join(Main_path, "test.txt"), "a") as f:
274-
# with open(os.path.join(Main_path, "test.txt"), "a") as f:
275-
image_file = dataset_name + "/" + JPEGImages + "/" + test_image_file
276-
xml_file = dataset_name + "/" + Annotations + "/" + test_image_file[:-4] + ".xml"
277-
filename = test_image_file[:-4]
278-
with open(os.path.join(dir,JPEGImages,test_image_file),"rb") as f2:
279-
if len(f2.read()) == 0:
280-
pass
281-
else:
282-
class_name_list = GetXmlClassesNames(
283-
os.path.join(dir, DIRECTORY_ANNOTATIONS, test_image_file[:-4] + ".xml"))
284-
if len(class_name_list) > 0:
285-
shutil.copy(os.path.join(dir, JPEGImages, test_image_file), save_image_path)
286-
shutil.copy(os.path.join(dir, DIRECTORY_ANNOTATIONS, test_image_file[:-4] + ".xml"),
287-
save_xml_path)
288-
f.write(image_file + " " + xml_file + "\n")
289-
else:
290-
print(os.path.join(dir, DIRECTORY_ANNOTATIONS, test_image_file[:-4] + ".xml"))
240+
generate_new_datasets(dir,dataset_name,root_path,train_image_files,no_pretrained_images_dir,"train")
241+
generate_new_datasets(dir,dataset_name,root_path,test_image_files,no_pretrained_images_dir,"test")
291242

292-
# f.write(filename + "\n")
293243

294244
def CreateLabelList(dir):
295245
"""

main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from dataset_tools.jade_create_object_dection_datasets import CreateYearsDatasets
1212

1313
def test_create_paddle_years_datasets(args):
14-
CreateYearsDatasets(args.input_dataset_dir,None,save_path=args.save_dataset_dir,rate=0.95)
14+
CreateYearsDatasets(args.input_dataset_dir,None,save_path=args.save_dataset_dir,rate=0.8)
1515

1616
if __name__ == '__main__':
1717
import argparse
@@ -23,9 +23,9 @@ def test_create_paddle_years_datasets(args):
2323
args = parser.parse_args()
2424
print(list(args.voc_labels))
2525
if args.dataset_type == "paddle_detection":
26-
CreateYearsDatasets(args.input_dataset_dir, None, save_path=args.save_dataset_dir, rate=0.95)
26+
CreateYearsDatasets(args.input_dataset_dir, None, save_path=args.save_dataset_dir, rate=0.8)
2727
elif args.dataset_type == "yolo_detection":
28-
CreateDarknetVocDatasets(args.input_dataset_dir, save_path=args.save_dataset_dir, rate=0.95, VOC_CLASSES=args.voc_labels)
28+
CreateDarknetVocDatasets(args.input_dataset_dir, save_path=args.save_dataset_dir, rate=0.8, VOC_CLASSES=args.voc_labels)
2929

3030
#removeNolabelDatasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集\2022-03-09")
3131
#create_text_detection_datasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集",r"E:\Data\字符检测识别数据集\定制版箱号关键点数据集",0.95)

test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def testCreateYearsDatasets():
1313
"""
1414
制作Paddle Voc 数据集
1515
"""
16-
CreateYearsDatasets(r"F:\数据集\VOC数据集\验残集装箱检测数据集", save_path=r"E:\Data\VOC数据集\验残集装箱检测数据集")
16+
VOC_CLASSES = ["FRONTEND","DOOREND","UPEND","slide","bromine_tank"]
17+
CreateYearsDatasets(r"F:\数据集\VOC数据集\箱门检测数据集\ContainVOC", save_path=r"E:\Data\VOC数据集\箱门检测数据集\ContainVOC")
1718

1819
def testCreateYearsDarknetVocDatasets():
1920
# VOC_CLASSES = ["container"]
@@ -22,4 +23,4 @@ def testCreateYearsDarknetVocDatasets():
2223

2324

2425
if __name__ == '__main__':
25-
testCreateYearsDarknetVocDatasets()
26+
testCreateYearsDatasets()

0 commit comments

Comments
 (0)