1+ #!/usr/bin/env python3
2+ """
3+ WebMainBench 基本使用示例
4+ """
5+
6+ import json
7+ from pathlib import Path
8+
9+ # 导入 WebMainBench 模块
10+ from webmainbench import (
11+ DataLoader , DataSaver , BenchmarkDataset , DataSample ,
12+ ExtractorFactory , MainHTMLEvaluator ,
13+ format_results , setup_logging
14+ )
15+
16+
17+ def load_benchdata (dataset_path : str ) -> BenchmarkDataset :
18+ dataset_path = Path (dataset_path )
19+ print (f"📂 数据集文件: { dataset_path } " )
20+
21+ if not dataset_path .exists ():
22+ print (f"❌ 数据文件不存在: { dataset_path } " )
23+ print ("请确保已运行数据提取命令创建样本数据集" )
24+ return
25+
26+ # 加载数据集
27+ dataset = DataLoader .load_jsonl (dataset_path , include_results = False )
28+ dataset .name = "real_preprocessed_html_test"
29+ dataset .description = "基于真实数据的预处理HTML功能测试"
30+ return dataset
31+
32+
33+ def load_extractor (model_path : str ):
34+ extractor = ExtractorFactory .create ("dripper" , config = {"model_path" : model_path })
35+ return extractor
36+
37+
38+ def save_results (result_file : Path , results : list [dict ]):
39+ with result_file .open ("w" , encoding = "utf-8" ) as f :
40+ for res in results :
41+ f .write (json .dumps (res , ensure_ascii = False ) + "\n " )
42+
43+
44+
45+ def demo_llm_webkit_with_preprocessed_html_evaluation ():
46+ """演示LLM-WebKit预处理HTML功能的评测"""
47+
48+ print ("\n === LLM-WebKit 预处理HTML功能演示 ===\n " )
49+
50+ # 设置日志
51+ setup_logging (level = "INFO" )
52+
53+ # 1. 从真实数据集加载包含预处理HTML的数据
54+ print ("1. 从真实数据集加载预处理HTML数据..." )
55+
56+ # 使用DataLoader加载真实的样本数据
57+
58+ dataset = load_benchdata ("data/WebMainBench_llm-webkit_v1_WebMainBench_1827_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl" )
59+ print (f"✅ 真实数据集加载成功,包含 { len (dataset )} 个样本" )
60+
61+
62+
63+ # 2. 创建预处理HTML模式的LLM-WebKit抽取器
64+ print ("2. 创建预处理HTML模式的LLM-WebKit抽取器..." )
65+
66+ model_path = "/home/qiujiuantao/project/html-alg-project/dripper/0.6B_ckpt"
67+ extractor = load_extractor (model_path )
68+ print (f"✅ 抽取器创建成功" )
69+ print (f"📋 配置信息:" )
70+ print (f" - 跳过LLM推理: 是(直接处理预处理HTML)" )
71+ print ()
72+
73+ # 4. 运行评测
74+ print ("4. 开始评测..." )
75+ print ("=" * 50 )
76+
77+ evaluator = MainHTMLEvaluator ()
78+ result = evaluator .evaluate (
79+ dataset = dataset ,
80+ extractor = extractor ,
81+ max_samples = None
82+ )
83+
84+ # 5. 显示评测结果
85+ print ("\n 5. 📊 预处理HTML模式评测结果:" )
86+ print ("=" * 50 )
87+
88+ results_dict = result .to_dict ()
89+ metrics = results_dict .get ('overall_metrics' , {})
90+
91+ # 显示关键指标
92+ print (f"\n 🏆 综合指标:" )
93+ for key in metrics .keys ():
94+ print (f" { key } : { metrics [key ]:.4f} " )
95+
96+ print (f"\n ⚡ 性能统计:" )
97+ sample_results = results_dict .get ('sample_results' , [])
98+ if sample_results :
99+ extraction_times = [s .get ('extraction_time' , 0 ) for s in sample_results if s .get ('extraction_success' )]
100+ if extraction_times :
101+ avg_time = sum (extraction_times ) / len (extraction_times )
102+ print (f" 平均提取时间: { avg_time :.3f} 秒" )
103+ print (f" 处理速度: { 1 / avg_time :.1f} 样本/秒" )
104+
105+ success_count = len ([s for s in sample_results if s .get ('extraction_success' , False )])
106+ print (f" 成功样本数: { success_count } /{ len (dataset )} " )
107+
108+ # 7. 保存结果
109+ print (f"\n 6. 💾 保存评测结果..." )
110+
111+ results_dir = Path ("results" )
112+ results_dir .mkdir (exist_ok = True )
113+ # 新增:保存带抽取结果的增强数据集(JSONL格式)
114+ jsonl_dataset_path = results_dir / f"{ extractor .name } _preprocessed_html_dataset_with_results.jsonl"
115+ save_results (jsonl_dataset_path , result .sample_results )
116+ print (f"✅ 结果已保存到: { jsonl_dataset_path } " )
117+
118+
119+ print (f"✅ 带抽取结果的JSONL数据集已保存到: { jsonl_dataset_path } " )
120+ results_path = results_dir / f"{ extractor .name } _preprocessed_html_evaluation_results.json"
121+ report_path = results_dir / f"{ extractor .name } _preprocessed_html_evaluation_report.csv"
122+
123+ DataSaver .save_evaluation_results (result , results_path )
124+ DataSaver .save_summary_report (result , report_path )
125+
126+ print (f"✅ 详细结果已保存到: { results_path } " )
127+ print (f"✅ CSV报告已保存到: { report_path } " )
128+
129+
130+
131+ if __name__ == "__main__" :
132+ try :
133+ demo_llm_webkit_with_preprocessed_html_evaluation ()
134+ print ("\n ✅ 示例运行完成!" )
135+
136+ except Exception as e :
137+ print (f"\n ❌ 运行出错: { e } " )
138+ import traceback
139+ traceback .print_exc ()
0 commit comments