@@ -738,7 +738,7 @@ def demo_multi_extraction():
738738 # 配置文件路径
739739 data_dir = Path ("../data" )
740740 # dataset_path = data_dir / "sample_dataset.jsonl"
741- dataset_path = "/home/lulindong/Pycharm_projects/cc/WebMainBench_1904_v1_WebMainBench_dataset_merge_with_llm_webkit .jsonl"
741+ dataset_path = "/home/lulindong/Pycharm_projects/cc/1827_split_jsonl/1-200 .jsonl"
742742
743743 print (f"📂 数据集文件: { dataset_path } " )
744744
@@ -889,7 +889,7 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
889889
890890 # 1. 从真实数据集加载包含预处理HTML的数据
891891 print ("1. 从真实数据集加载预处理HTML数据..." )
892- dataset_path = Path ("data/WebMainBench_1827_v1_WebMainBench_dataset_merge_with_llm_webkit .jsonl" )
892+ dataset_path = Path ("/home/lulindong/Pycharm_projects/cc/1827_split_jsonl/1-200 .jsonl" )
893893 print (f"📂 数据集文件: { dataset_path } " )
894894
895895 # 加载数据集
@@ -939,6 +939,7 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
939939
940940 print (f"\n 📝 内容提取质量:" )
941941 print (f" text_edit: { metrics .get ('text_edit' , 0 ):.4f} " )
942+ print (f" formula_edit: { metrics .get ('formula_edit' , 0 ):.4f} " )
942943 print (f" code_edit: { metrics .get ('code_edit' , 0 ):.4f} " )
943944 print (f" table_edit: { metrics .get ('table_edit' , 0 ):.4f} " )
944945 print (f" table_TEDS: { metrics .get ('table_TEDS' , 0 ):.4f} " )
@@ -984,10 +985,10 @@ def demo_llm_webkit_with_preprocessed_html_evaluation():
984985 try :
985986 # demo_basic_mock_evaluation()
986987 # demo_llm_webkit_evaluation() # 使用LLM-WebKit评测示例
987- demo_llm_webkit_with_preprocessed_html_evaluation ()
988+ # demo_llm_webkit_with_preprocessed_html_evaluation()
988989 # demo_extractor_comparison()
989990 # demo_dataset_with_extraction() # 演示保存带有抽取内容的数据集
990- # demo_multi_extraction() # 演示多个抽取器同时评测
991+ demo_multi_extraction () # 演示多个抽取器同时评测
991992 print ("\n ✅ 示例运行完成!" )
992993
993994 except Exception as e :
0 commit comments