@@ -699,11 +699,76 @@ def hello_world():
699699 print ("✅ LLM-WebKit 6项指标评测完成!" )
700700
701701
702+ def demo_dataset_with_extraction ():
703+ """演示保存带有抽取内容的数据集"""
704+ print ("=== 演示:保存带有抽取内容的数据集 ===" )
705+
706+ from webmainbench import DataLoader , DataSaver , Evaluator , ExtractorFactory
707+ from pathlib import Path
708+
709+ # 从文件加载数据集
710+ data_dir = Path ("data" )
711+ dataset_path = data_dir / "sample_dataset.jsonl"
712+ # dataset_path = "/Users/chupei/Downloads/WebMainBench_dataset_merge_2549.jsonl"
713+
714+ print (f"📂 从文件加载数据集: { dataset_path } " )
715+ dataset = DataLoader .load_jsonl (dataset_path , include_results = False )
716+ dataset .name = "WebMainBench_with_extraction"
717+ dataset .description = "演示抽取内容保存的测试数据集"
718+
719+ print (f"📊 加载数据集完成,包含 { len (dataset .samples )} 个样本" )
720+
721+ # 创建抽取器并运行评测
722+ try :
723+ extractor = ExtractorFactory .create ("llm-webkit" , config = {"model_path" : "/Users/chupei/model/checkpoint-3296" })
724+ print (f"🤖 使用抽取器: { extractor .name } " )
725+ except Exception as e :
726+ print (f"⚠️ LLM-WebKit抽取器创建失败,使用mock抽取器: { e } " )
727+ extractor = ExtractorFactory .create ("mock" )
728+
729+ # 运行评测
730+ evaluator = Evaluator ()
731+ result = evaluator .evaluate (dataset , extractor )
732+
733+ print (f"✅ 评测完成,总体得分: { result .overall_metrics .get ('overall' , 0 ):.4f} " )
734+
735+ # 保存带有抽取内容的数据集
736+ results_dir = Path ("results" )
737+ enriched_dataset_path = results_dir / f"{ dataset .name } _with_{ extractor .name } _extraction.jsonl"
738+
739+ DataSaver .save_dataset_with_extraction (
740+ results = result ,
741+ dataset = dataset ,
742+ file_path = enriched_dataset_path ,
743+ extractor_name = extractor .name
744+ )
745+
746+ print (f"💾 已保存带有抽取内容的数据集到: { enriched_dataset_path } " )
747+
748+ # 保存评测结果和摘要报告
749+ evaluation_results_path = results_dir / f"{ dataset .name } _{ extractor .name } _evaluation_results.json"
750+ summary_report_path = results_dir / f"{ dataset .name } _{ extractor .name } _evaluation_report.csv"
751+
752+ DataSaver .save_evaluation_results (result , evaluation_results_path )
753+ DataSaver .save_summary_report (result , summary_report_path )
754+
755+ print (f"📊 已保存评测结果到: { evaluation_results_path } " )
756+ print (f"📈 已保存摘要报告到: { summary_report_path } " )
757+
758+ # 显示保存的字段信息
759+ print ("\n 📋 保存的新字段包括:" )
760+ print (f" - { extractor .name } _content: 抽取的内容" )
761+ print (f" - { extractor .name } _content_list: 抽取的结构化内容列表" )
762+ print (f" - { extractor .name } _success: 抽取是否成功" )
763+ print (f" - { extractor .name } _time: 抽取耗时" )
764+ print (f" - { extractor .name } _*_score: 各项指标分数" )
765+
702766if __name__ == "__main__" :
703767 try :
704768 demo_basic_mock_evaluation ()
705769 demo_llm_webkit_evaluation () # 使用LLM-WebKit评测示例
706770 demo_extractor_comparison ()
771+ demo_dataset_with_extraction () # 演示保存带有抽取内容的数据集
707772 print ("\n ✅ 示例运行完成!" )
708773
709774 except Exception as e :
0 commit comments