File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -804,7 +804,7 @@ def demo_multi_extraction():
804804 # 配置文件路径
805805 data_dir = Path ("../data" )
806806 # dataset_path = data_dir / "sample_dataset.jsonl"
807- dataset_path = "/home/lulindong/Pycharm_projects/cc/WebMainBench_2456_v2_WebMainBench_dataset_merge_with_llm_webkit .jsonl"
807+ dataset_path = "/home/lulindong/Pycharm_projects/cc/WebMainBench_2456_v3_WebMainBench_dataset_merge_with_llm_webkit .jsonl"
808808
809809 print (f"📂 数据集文件: { dataset_path } " )
810810
@@ -1090,17 +1090,16 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
10901090
10911091 print (f"✅ 详细结果已保存到: { results_path } " )
10921092 print (f"✅ CSV报告已保存到: { report_path } " )
1093-
10941093
10951094
10961095if __name__ == "__main__" :
10971096 try :
10981097 # demo_basic_mock_evaluation()
10991098 # demo_llm_webkit_evaluation() # 使用LLM-WebKit评测示例
1100- demo_llm_webkit_with_preprocessed_html_evaluation ()
1099+ # demo_llm_webkit_with_preprocessed_html_evaluation()
11011100 # demo_extractor_comparison()
11021101 # demo_dataset_with_extraction() # 演示保存带有抽取内容的数据集
1103- # demo_multi_extraction() # 演示多个抽取器同时评测
1102+ demo_multi_extraction () # 演示多个抽取器同时评测
11041103 print ("\n ✅ 示例运行完成!" )
11051104
11061105 except Exception as e :
Original file line number Diff line number Diff line change @@ -16,7 +16,7 @@ class TrafilaturaInferenceConfig:
1616 favor_precision : bool = True
1717 favor_recall : bool = True
1818 include_comments : bool = False
19- include_tables : bool = False
19+ include_tables : bool = True
2020 # 可根据需要添加更多trafilatura支持的参数
2121 include_images : bool = False
2222 include_links : bool = False
You can’t perform that action at this time.
0 commit comments