@@ -295,7 +295,7 @@ def SplitDataSets(image_path_list, ContaNumber_list,split_rate):
295295
296296
297297
298- def CreateTextDetDatasets (root_path , save_root_path , split_rate = 0.9 ):
298+ def CreateTextDetDatasets (root_path , save_root_path , split_rate = 0.9 , max_candidates = 0 ):
299299 ##
300300 removeNolabelDatasets (root_path )
301301 image_path_list = GetAllImagesPath (root_path )
@@ -308,14 +308,15 @@ def CreateTextDetDatasets(root_path, save_root_path, split_rate=0.9):
308308 os .remove (os .path .join (save_path , "test_icdar2015_label.txt" ))
309309 index = 0
310310 ##分割数据集应该是从整体数据集中挑选出重复的数据
311-
312311 train_image_files , test_image_files = SplitDataSets (image_path_list , ContaNumber_list ,split_rate )
313312
314313 progressBar = ProgressBar (len (train_image_files ))
315314 for image_path in train_image_files :
316315 shutil .copyfile (image_path , os .path .join (save_image_path , GetLastDir (image_path )))
317316 result = readjsonContent (os .path .join (root_path , GetLastDir (image_path )[:- 4 ] + ".json" ))
318-
317+ result_list = json .loads (result )
318+ if len (result_list ) > max_candidates :
319+ max_candidates = len (result_list )
319320 with open (os .path .join (save_path , "train_icdar2015_label.txt" ), "ab" ) as f :
320321 content = "image/" + GetLastDir (image_path ) + "\t " + result
321322 f .write ((content + "\n " ).encode ("utf-8" ))
@@ -325,11 +326,15 @@ def CreateTextDetDatasets(root_path, save_root_path, split_rate=0.9):
325326 for image_path in test_image_files :
326327 shutil .copyfile (image_path , os .path .join (save_image_path , GetLastDir (image_path )))
327328 result = readjsonContent (os .path .join (root_path , GetLastDir (image_path )[:- 4 ] + ".json" ))
329+ result_list = json .loads (result )
330+ if len (result_list ) > max_candidates :
331+ max_candidates = len (result_list )
328332 with open (os .path .join (save_path , "test_icdar2015_label.txt" ), "ab" ) as f :
329333 content = "image/" + GetLastDir (image_path ) + "\t " + result
330334 f .write ((content + "\n " ).encode ("utf-8" ))
331335 progresstestBar .update ()
332336 createDatasets (save_root_path )
337+ return max_candidates
333338
334339def create_text_detection_datasets (root_path ,save_path ,split_rate = 0.95 ):
335340 if os .path .exists (save_path ):
@@ -338,5 +343,7 @@ def create_text_detection_datasets(root_path,save_path,split_rate=0.95):
338343 except :
339344 print ("文件夹删除失败,文件夹名称为:{}" .format (save_path ))
340345 file_list = os .listdir (root_path )
346+ max_candidates = 0
341347 for file_name in file_list :
342- CreateTextDetDatasets (os .path .join (root_path , file_name ),save_path ,split_rate )
348+ max_candidates = CreateTextDetDatasets (os .path .join (root_path , file_name ),save_path ,split_rate ,max_candidates )
349+ print (max_candidates )
0 commit comments