Skip to content

Commit 6ace3ce

Browse files
authored
Merge pull request #12 from e06084/main
feat: add save_dataset_with_extraction
2 parents e3acc26 + 917a7c8 commit 6ace3ce

10 files changed

Lines changed: 627 additions & 57 deletions

examples/basic_usage.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,11 +699,76 @@ def hello_world():
699699
print("✅ LLM-WebKit 6项指标评测完成!")
700700

701701

702+
def demo_dataset_with_extraction():
703+
"""演示保存带有抽取内容的数据集"""
704+
print("=== 演示:保存带有抽取内容的数据集 ===")
705+
706+
from webmainbench import DataLoader, DataSaver, Evaluator, ExtractorFactory
707+
from pathlib import Path
708+
709+
# 从文件加载数据集
710+
data_dir = Path("data")
711+
dataset_path = data_dir / "sample_dataset.jsonl"
712+
# dataset_path = "/Users/chupei/Downloads/WebMainBench_dataset_merge_2549.jsonl"
713+
714+
print(f"📂 从文件加载数据集: {dataset_path}")
715+
dataset = DataLoader.load_jsonl(dataset_path, include_results=False)
716+
dataset.name = "WebMainBench_with_extraction"
717+
dataset.description = "演示抽取内容保存的测试数据集"
718+
719+
print(f"📊 加载数据集完成,包含 {len(dataset.samples)} 个样本")
720+
721+
# 创建抽取器并运行评测
722+
try:
723+
extractor = ExtractorFactory.create("llm-webkit", config={"model_path": "/Users/chupei/model/checkpoint-3296"})
724+
print(f"🤖 使用抽取器: {extractor.name}")
725+
except Exception as e:
726+
print(f"⚠️ LLM-WebKit抽取器创建失败,使用mock抽取器: {e}")
727+
extractor = ExtractorFactory.create("mock")
728+
729+
# 运行评测
730+
evaluator = Evaluator()
731+
result = evaluator.evaluate(dataset, extractor)
732+
733+
print(f"✅ 评测完成,总体得分: {result.overall_metrics.get('overall', 0):.4f}")
734+
735+
# 保存带有抽取内容的数据集
736+
results_dir = Path("results")
737+
enriched_dataset_path = results_dir / f"{dataset.name}_with_{extractor.name}_extraction.jsonl"
738+
739+
DataSaver.save_dataset_with_extraction(
740+
results=result,
741+
dataset=dataset,
742+
file_path=enriched_dataset_path,
743+
extractor_name=extractor.name
744+
)
745+
746+
print(f"💾 已保存带有抽取内容的数据集到: {enriched_dataset_path}")
747+
748+
# 保存评测结果和摘要报告
749+
evaluation_results_path = results_dir / f"{dataset.name}_{extractor.name}_evaluation_results.json"
750+
summary_report_path = results_dir / f"{dataset.name}_{extractor.name}_evaluation_report.csv"
751+
752+
DataSaver.save_evaluation_results(result, evaluation_results_path)
753+
DataSaver.save_summary_report(result, summary_report_path)
754+
755+
print(f"📊 已保存评测结果到: {evaluation_results_path}")
756+
print(f"📈 已保存摘要报告到: {summary_report_path}")
757+
758+
# 显示保存的字段信息
759+
print("\n📋 保存的新字段包括:")
760+
print(f" - {extractor.name}_content: 抽取的内容")
761+
print(f" - {extractor.name}_content_list: 抽取的结构化内容列表")
762+
print(f" - {extractor.name}_success: 抽取是否成功")
763+
print(f" - {extractor.name}_time: 抽取耗时")
764+
print(f" - {extractor.name}_*_score: 各项指标分数")
765+
702766
if __name__ == "__main__":
703767
try:
704768
demo_basic_mock_evaluation()
705769
demo_llm_webkit_evaluation() # 使用LLM-WebKit评测示例
706770
demo_extractor_comparison()
771+
demo_dataset_with_extraction() # 演示保存带有抽取内容的数据集
707772
print("\n✅ 示例运行完成!")
708773

709774
except Exception as e:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
extractor,dataset,total_samples,success_rate,overall,code_edit,formula_edit,table_TEDS,table_edit,text_edit
2+
llm-webkit,WebMainBench_with_extraction,4,1.0,0.8597,0.6215,1.0,1.0,0.9073,0.7695

0 commit comments

Comments
 (0)