Skip to content

Commit 2942c32

Browse files
committed
update dataset 操作
1 parent caefbb4 commit 2942c32

2 files changed

Lines changed: 27 additions & 2 deletions

File tree

dataset_tools/jade_create_paddle_text_detection_datasets.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import shutil
1414
import cv2
1515
import random
16+
from dataset_tools import *
1617

1718

1819
def sortPoints(points, label):
@@ -200,9 +201,32 @@ def removeNolabelDatasets(root_path):
200201
os.remove(os.path.join(root_path, GetLastDir(image_path)[:-4] + ".json"))
201202
os.remove(image_path)
202203
else:
204+
print("清除图片")
203205
os.remove(image_path)
204206
progressBar.update()
205207

208+
def removeNolabelVocDatasets(root_path):
209+
for day in os.listdir(root_path):
210+
if os.path.isdir(os.path.join(root_path,day)):
211+
image_path_list = GetAllImagesPath(os.path.join(root_path, day, DIRECTORY_IMAGES))
212+
progressBar = ProgressBar(len(image_path_list))
213+
for image_path in image_path_list:
214+
if os.path.exists(
215+
os.path.join(root_path, day, DIRECTORY_ANNOTATIONS, GetLastDir(image_path)[:-4] + ".xml")):
216+
imagename, shape, bboxes, labels_text, labels, difficult, truncated = ProcessXml(
217+
os.path.join(root_path, day, DIRECTORY_ANNOTATIONS, GetLastDir(image_path)[:-4] + ".xml"))
218+
if len(bboxes) == 0:
219+
print("清除{},清除{}".format(image_path, os.path.join(root_path, day, DIRECTORY_ANNOTATIONS,
220+
GetLastDir(image_path)[:-4] + ".xml")))
221+
os.remove(
222+
os.path.join(root_path, day, DIRECTORY_ANNOTATIONS, GetLastDir(image_path)[:-4] + ".xml"))
223+
os.remove(image_path)
224+
else:
225+
print("清除{}".format(image_path))
226+
os.remove(image_path)
227+
progressBar.update()
228+
229+
206230

207231
def GetContaNumberPath(image_path_list):
208232
ContaNumber_list = []

main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
if __name__ == '__main__':
1313

1414
#removeNolabelDatasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集\2022-03-09")
15-
create_text_detection_datasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集",r"E:\Data\字符检测识别数据集\定制版箱号关键点数据集",0.95)
15+
#create_text_detection_datasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集",r"E:\Data\字符检测识别数据集\定制版箱号关键点数据集",0.95)
1616
#CreatePaddleOCRDatasets(root_path="E:\Data\字符检测识别数据集\镇江大港厂内车牌关键点检测数据集", save_path="E:\Data\OCR\镇江大港厂内车牌识别数据集",dataset_type="镇江厂内车牌数据集")
17-
# CreateYearsDatasets(r"F:\数据集\VOC数据集\定制版顶相机箱号检测数据集")
17+
removeNolabelVocDatasets(r"F:\数据集\VOC数据集\验残集装箱检测数据集")
18+
CreateYearsDatasets(r"F:\数据集\VOC数据集\验残集装箱检测数据集")

0 commit comments

Comments
 (0)