@@ -98,6 +98,9 @@ def save_evaluation_results(results: Union["EvaluationResult", Dict[str, Any]],
9898 else :
9999 results_dict = results
100100
101+ # 移除extracted_content和extracted_content_list字段以减少文件大小
102+ results_dict = DataSaver ._remove_content_fields (results_dict )
103+
101104 if format .lower () == "json" :
102105 with open (file_path , 'w' , encoding = 'utf-8' ) as f :
103106 json .dump (results_dict , f , indent = 2 , ensure_ascii = False )
@@ -265,6 +268,30 @@ def _save_jsonl_list(data_list: List[Dict[str, Any]], file_path: Union[str, Path
265268 json .dump (item , f , ensure_ascii = False )
266269 f .write ('\n ' )
267270
271+ @staticmethod
272+ def _remove_content_fields (data : Dict [str , Any ]) -> Dict [str , Any ]:
273+ """移除extracted_content和extracted_content_list字段以减少保存文件大小"""
274+ import copy
275+
276+ cleaned_data = copy .deepcopy (data )
277+
278+ def remove_fields (obj ):
279+ if isinstance (obj , dict ):
280+ # 移除extracted_content和extracted_content_list字段
281+ obj .pop ('extracted_content' , None )
282+ obj .pop ('extracted_content_list' , None )
283+ # 递归处理嵌套字典和列表
284+ for value in obj .values ():
285+ if isinstance (value , (dict , list )):
286+ remove_fields (value )
287+ elif isinstance (obj , list ):
288+ for item in obj :
289+ if isinstance (item , (dict , list )):
290+ remove_fields (item )
291+
292+ remove_fields (cleaned_data )
293+ return cleaned_data
294+
268295 @staticmethod
269296 def append_intermediate_results (results : List [Dict [str , Any ]],
270297 file_path : Union [str , Path ]) -> None :
0 commit comments