@@ -126,32 +126,45 @@ def save_summary_report(results: Union["EvaluationResult", List["EvaluationResul
126126 file_path: Output CSV file path
127127 """
128128 import csv
129-
129+ from importlib import metadata as importlib_metadata
130+
130131 file_path = Path (file_path )
131132 file_path .parent .mkdir (parents = True , exist_ok = True )
132-
133- # Convert EvaluationResult objects to dicts and ensure we have a list
133+
134+ # 转换结果为字典列表
134135 def to_dict_if_needed (item ):
135136 return item .to_dict () if hasattr (item , 'to_dict' ) else item
136-
137+
137138 if isinstance (results , list ):
138139 results_list = [to_dict_if_needed (item ) for item in results ]
139140 else :
140141 results_list = [to_dict_if_needed (results )]
141-
142- # Prepare CSV data
142+
143143 csv_data = []
144-
145144 for result in results_list :
146- # Extract basic info
147145 metadata = result .get ('metadata' , {})
148146 error_analysis = result .get ('error_analysis' , {})
147+
148+ # 获取抽取器版本
149+ extractor_name = metadata .get ('extractor_name' , 'unknown' )
150+ try :
151+ # 映射抽取器名称到包名
152+ package_mapping = {
153+ 'llm-webkit' : 'llm_web_kit' ,
154+ 'magic-html' : 'magic_html' ,
155+ 'trafilatura' : 'trafilatura' ,
156+ 'resiliparse' : 'resiliparse'
157+ }
158+ package_name = package_mapping .get (extractor_name , extractor_name )
159+ extractor_version = importlib_metadata .version (package_name )
160+ except importlib_metadata .PackageNotFoundError :
161+ extractor_version = 'unknown'
149162 row = {
150163 'extractor' : metadata .get ('extractor_name' , 'unknown' ),
151164 'dataset' : metadata .get ('dataset_name' , 'unknown' ),
152165 'total_samples' : metadata .get ('total_samples' , 0 ),
153166 'success_rate' : error_analysis .get ('success_rate' , 0.0 ),
154- 'extractor_version' : metadata . get ( 'version' , 'unknown' )
167+ 'extractor_version' : extractor_version ,
155168 }
156169
157170 # Add all available metrics from overall_metrics
@@ -170,7 +183,7 @@ def get_sort_key(row):
170183 # Write CSV file
171184 if csv_data :
172185 # Define field order: basic info first, then overall, then other metrics alphabetically
173- basic_fields = ['extractor' , 'dataset' , 'total_samples' , 'success_rate' , 'extractor_version ' ]
186+ basic_fields = ['extractor' ,'extractor_version' , 'dataset' , 'total_samples' , 'success_rate' ]
174187
175188 # Get all metric fields from the data
176189 all_fields = set ()
0 commit comments