Skip to content

Commit fb271b6

Browse files
committed
update jade packing
1 parent e2364d3 commit fb271b6

2 files changed

Lines changed: 27 additions & 5 deletions

File tree

dataset_tools/jade_create_paddle_text_detection_datasets.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,12 +205,29 @@ def removeNolabelDatasets(root_path):
205205
os.remove(image_path)
206206
progressBar.update()
207207

208+
209+
def get_no_exists_index(arr1,arr2):
210+
for (i,arr_1) in enumerate(arr1):
211+
if arr_1 not in arr2:
212+
return 0,i
213+
for (i,arr_2) in enumerate(arr2):
214+
if arr_2 not in arr1:
215+
return 1,i
216+
else:
217+
return None,None
218+
208219
def removeNolabelVocDatasets(root_path):
209220
for day in os.listdir(root_path):
210221
if os.path.isdir(os.path.join(root_path,day)):
211222
image_path_list = GetAllImagesPath(os.path.join(root_path, day, DIRECTORY_IMAGES))
212223
progressBar = ProgressBar(len(image_path_list))
224+
xml_path_list = GetFilesWithLastNamePath(os.path.join(root_path,day,DIRECTORY_ANNOTATIONS),".xml")
225+
xml_name_list = []
226+
for xml_path in xml_path_list:
227+
xml_name_list.append(GetLastDir(xml_path).split(".")[0])
228+
image_name_list = []
213229
for image_path in image_path_list:
230+
image_name_list.append(GetLastDir(image_path.split(".")[0]))
214231
if os.path.exists(
215232
os.path.join(root_path, day, DIRECTORY_ANNOTATIONS, GetLastDir(image_path)[:-4] + ".xml")):
216233
imagename, shape, bboxes, labels_text, labels, difficult, truncated = ProcessXml(
@@ -226,8 +243,13 @@ def removeNolabelVocDatasets(root_path):
226243
os.remove(image_path)
227244
progressBar.update()
228245

229-
230-
246+
list_index,index = get_no_exists_index(image_name_list,xml_name_list)
247+
if list_index == 0:
248+
print("需要删除图片,{}".format(image_path_list[index]))
249+
os.remove(image_path_list[index])
250+
elif list_index == 1:
251+
print("需要标注文件,{}".format(xml_path_list[index]))
252+
os.remove(xml_path_list[index])
231253
def GetContaNumberPath(image_path_list):
232254
ContaNumber_list = []
233255
for file in image_path_list:

main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#removeNolabelDatasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集\2022-03-09")
1515
#create_text_detection_datasets(r"F:\数据集\关键点检测数据集\定制版箱号关键点数据集",r"E:\Data\字符检测识别数据集\定制版箱号关键点数据集",0.95)
1616
#CreatePaddleOCRDatasets(root_path="E:\Data\字符检测识别数据集\镇江大港厂内车牌关键点检测数据集", save_path="E:\Data\OCR\镇江大港厂内车牌识别数据集",dataset_type="镇江厂内车牌数据集")
17-
# removeNolabelVocDatasets(r"F:\数据集\VOC数据集\验残集装箱检测数据集")
18-
# CreateYearsDatasets(r"F:\数据集\VOC数据集\验残集装箱检测数据集")
17+
#removeNolabelVocDatasets(r"F:\数据集\验残数据集\残损检测数据集")
18+
CreateYearsDatasets(r"F:\数据集\验残数据集\残损检测数据集")
1919
#create_text_detection_datasets(r"F:\数据集\关键点检测数据集\箱号关键点数据集",r'E:\Data\字符检测识别数据集\箱号关键点数据集')
20-
CreatePaddleOCRDatasets(r'E:\Data\字符检测识别数据集\箱号关键点数据集', save_path="E:\Data\OCR\箱号识别数据集",dataset_type="箱号数据集")
20+
#CreatePaddleOCRDatasets(r'E:\Data\字符检测识别数据集\箱号关键点数据集', save_path="E:\Data\OCR\箱号识别数据集",dataset_type="箱号数据集")

0 commit comments

Comments
 (0)