Skip to content

Commit c791758

Browse files
committed
add extractor version in results
1 parent 839938f commit c791758

3 files changed

Lines changed: 9 additions & 4 deletions

File tree

webmainbench/data/saver.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,12 +146,12 @@ def to_dict_if_needed(item):
146146
# Extract basic info
147147
metadata = result.get('metadata', {})
148148
error_analysis = result.get('error_analysis', {})
149-
150149
row = {
151150
'extractor': metadata.get('extractor_name', 'unknown'),
152151
'dataset': metadata.get('dataset_name', 'unknown'),
153152
'total_samples': metadata.get('total_samples', 0),
154-
'success_rate': error_analysis.get('success_rate', 0.0)
153+
'success_rate': error_analysis.get('success_rate', 0.0),
154+
'extractor_version': metadata.get('version', 'unknown')
155155
}
156156

157157
# Add all available metrics from overall_metrics
@@ -170,7 +170,7 @@ def get_sort_key(row):
170170
# Write CSV file
171171
if csv_data:
172172
# Define field order: basic info first, then overall, then other metrics alphabetically
173-
basic_fields = ['extractor', 'dataset', 'total_samples', 'success_rate']
173+
basic_fields = ['extractor', 'dataset', 'total_samples', 'success_rate','extractor_version']
174174

175175
# Get all metric fields from the data
176176
all_fields = set()

webmainbench/evaluator/evaluator.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class EvaluationResult:
2323
extractor_name: str
2424
timestamp: str
2525
total_samples: int
26+
version: str # 抽取器版本
2627

2728
# Overall metrics
2829
overall_metrics: Dict[str, float]
@@ -48,6 +49,7 @@ def to_dict(self) -> Dict[str, Any]:
4849
"extractor_name": self.extractor_name,
4950
"timestamp": self.timestamp,
5051
"total_samples": self.total_samples,
52+
"version": self.version
5153
},
5254
"overall_metrics": self.overall_metrics,
5355
"sample_results": self.sample_results,
@@ -72,6 +74,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "EvaluationResult":
7274
error_analysis=data.get("error_analysis"),
7375
extractor_config=data.get("extractor_config"),
7476
metric_config=data.get("metric_config"),
77+
version=metadata.get("version", "unknown"),
7578
)
7679

7780

@@ -182,6 +185,8 @@ def evaluate(self,
182185
error_analysis=error_analysis,
183186
extractor_config=extractor.get_config(),
184187
metric_config=self.metric_config,
188+
# 新增:传入抽取器版本(从extractor对象获取)
189+
version=getattr(extractor, 'version', 'unknown'),
185190
)
186191

187192
return evaluation_result

webmainbench/extractors/llm_webkit_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def process_logit(self, prompt_token_ids: List[int], input_ids: List[int], logit
155155
class LlmWebkitExtractor(BaseExtractor):
156156
"""Advanced LLM-WebKit extractor with intelligent content classification."""
157157

158-
version = "2.0.0"
158+
version = "4.0.1"
159159
description = "Advanced LLM-WebKit extractor with intelligent content classification"
160160

161161
# 分类提示模板

0 commit comments

Comments
 (0)