Skip to content

Commit 27245c7

Browse files
authored
Merge pull request #26 from pekopoke/dev
Dev:add gt and pre of code formula table text in result jsonl
2 parents 0051ce9 + 2a8a7e2 commit 27245c7

3 files changed

Lines changed: 32 additions & 10 deletions

File tree

examples/basic_usage.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@ def demo_multi_extraction():
804804
# 配置文件路径
805805
data_dir = Path("../data")
806806
# dataset_path = data_dir / "sample_dataset.jsonl"
807-
dataset_path = "/home/lulindong/Pycharm_projects/cc/WebMainBench_llm-webkit_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl"
807+
dataset_path = "/home/lulindong/Pycharm_projects/cc/WebMainBench_1904_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl"
808808

809809
print(f"📂 数据集文件: {dataset_path}")
810810

@@ -957,7 +957,7 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
957957
print("1. 从真实数据集加载预处理HTML数据...")
958958

959959
# 使用DataLoader加载真实的样本数据
960-
dataset_path = Path("data/WebMainBench_dataset_sample2.jsonl")
960+
dataset_path = Path("/home/lulindong/Pycharm_projects/cc/WebMainBench_1904_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl")
961961
print(f"📂 数据集文件: {dataset_path}")
962962

963963
if not dataset_path.exists():
@@ -969,7 +969,6 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
969969
dataset = DataLoader.load_jsonl(dataset_path, include_results=False)
970970
dataset.name = "real_preprocessed_html_test"
971971
dataset.description = "基于真实数据的预处理HTML功能测试"
972-
973972

974973
print(f"✅ 真实数据集加载成功,包含 {len(dataset)} 个样本")
975974
print("📋 真实数据样本包含:")
@@ -1069,15 +1068,22 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
10691068
print(f" ⏱️ 提取时间: {sample_result.get('extraction_time', 0):.3f}秒")
10701069
else:
10711070
print(f" ❌ 提取失败")
1072-
10731071
# 7. 保存结果
10741072
print(f"\n7. 💾 保存评测结果...")
10751073

10761074
results_dir = Path("results")
10771075
results_dir.mkdir(exist_ok=True)
1078-
1079-
results_path = results_dir / "preprocessed_html_evaluation_results.json"
1080-
report_path = results_dir / "preprocessed_html_evaluation_report.csv"
1076+
# 新增:保存带抽取结果的增强数据集(JSONL格式)
1077+
jsonl_dataset_path = results_dir / f"{extractor.name}_preprocessed_html_dataset_with_results.jsonl"
1078+
DataSaver.save_dataset_with_extraction(
1079+
results=result,
1080+
dataset=dataset, # 原始数据集对象
1081+
file_path=jsonl_dataset_path,
1082+
extractor_name="llm-webkit" # 抽取器名称前缀
1083+
)
1084+
print(f"✅ 带抽取结果的JSONL数据集已保存到: {jsonl_dataset_path}")
1085+
results_path = results_dir / f"{extractor.name}_preprocessed_html_evaluation_results.json"
1086+
report_path = results_dir / f"{extractor.name}_preprocessed_html_evaluation_report.csv"
10811087

10821088
DataSaver.save_evaluation_results(result, results_path)
10831089
DataSaver.save_summary_report(result, report_path)
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
extractor,dataset,total_samples,success_rate,overall,code_edit,formula_edit,table_TEDS,table_edit,text_edit
2-
llm-webkit,real_preprocessed_html_test,2,1.0,0.0091,0.0,0.0,0.0,0.0,0.0453
1+
extractor,dataset,total_samples,success_rate,overall,code_edit,formula_edit,table_TEDS,table_edit,text_edit
2+
llm-webkit,real_preprocessed_html_test,2,1.0,0.0091,0.0,0.0,0.0,0.0,0.0453

webmainbench/data/saver.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ def save_dataset_with_extraction(results: Union["EvaluationResult", Dict[str, An
239239

240240
# Add extraction results if available
241241
extraction_result = extraction_map.get(sample.id)
242+
from webmainbench.metrics.base import BaseMetric
242243
if extraction_result:
243244
# Add extracted content with extractor name prefix
244245
sample_dict[f'{extractor_name}_content'] = extraction_result.get('extracted_content', '')
@@ -251,7 +252,22 @@ def save_dataset_with_extraction(results: Union["EvaluationResult", Dict[str, An
251252
for metric_name, metric_data in metrics.items():
252253
if isinstance(metric_data, dict) and metric_data.get('success', False):
253254
sample_dict[f'{extractor_name}_{metric_name}_score'] = metric_data.get('score', 0)
254-
255+
256+
# 解析预测值(predicted)
257+
predicted_content = extraction_result.get('extracted_content', '')
258+
predicted_parts = BaseMetric._extract_from_markdown(predicted_content) # 关键:解析预测内容
259+
for part_type in ['code', 'formula', 'table', 'text']:
260+
sample_dict[f'{extractor_name}_predicted_{part_type}'] = predicted_parts.get(part_type, '')
261+
262+
# 解析真实值(groundtruth)
263+
groundtruth_content = sample_dict.get('groundtruth_content', '')
264+
groundtruth_parts = BaseMetric._extract_from_markdown(groundtruth_content) # 关键:解析真实内容
265+
for part_type in ['code', 'formula', 'table', 'text']:
266+
sample_dict[f'{extractor_name}_groundtruth_{part_type}'] = groundtruth_parts.get(part_type,
267+
'')
268+
269+
270+
255271
enriched_samples.append(sample_dict)
256272

257273
# Save as JSONL

0 commit comments

Comments
 (0)