Skip to content

Commit 782c8ff

Browse files
committed
x
1 parent c4f026f commit 782c8ff

5 files changed

Lines changed: 52 additions & 113 deletions
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
extractor,dataset,total_samples,success_rate,overall,code_edit,formula_edit,table_TEDS,table_edit,text_edit
2-
llm-webkit,sample_dataset,4,0.75,0.8667,1.0,1.0,1.0,1.0,0.3333
2+
llm-webkit,sample_dataset,4,0.5,0.9,1.0,1.0,1.0,1.0,0.5

results/sample_dataset_llm-webkit_evaluation_results.json

Lines changed: 19 additions & 107 deletions
Large diffs are not rendered by default.

results/sample_dataset_with_llm-webkit_extraction.jsonl

Lines changed: 4 additions & 4 deletions
Large diffs are not rendered by default.

webmainbench/data/saver.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ def save_evaluation_results(results: Union["EvaluationResult", Dict[str, Any]],
9898
else:
9999
results_dict = results
100100

101+
# 移除extracted_content和extracted_content_list字段以减少文件大小
102+
results_dict = DataSaver._remove_content_fields(results_dict)
103+
101104
if format.lower() == "json":
102105
with open(file_path, 'w', encoding='utf-8') as f:
103106
json.dump(results_dict, f, indent=2, ensure_ascii=False)
@@ -265,6 +268,30 @@ def _save_jsonl_list(data_list: List[Dict[str, Any]], file_path: Union[str, Path
265268
json.dump(item, f, ensure_ascii=False)
266269
f.write('\n')
267270

271+
@staticmethod
272+
def _remove_content_fields(data: Dict[str, Any]) -> Dict[str, Any]:
273+
"""移除extracted_content和extracted_content_list字段以减少保存文件大小"""
274+
import copy
275+
276+
cleaned_data = copy.deepcopy(data)
277+
278+
def remove_fields(obj):
279+
if isinstance(obj, dict):
280+
# 移除extracted_content和extracted_content_list字段
281+
obj.pop('extracted_content', None)
282+
obj.pop('extracted_content_list', None)
283+
# 递归处理嵌套字典和列表
284+
for value in obj.values():
285+
if isinstance(value, (dict, list)):
286+
remove_fields(value)
287+
elif isinstance(obj, list):
288+
for item in obj:
289+
if isinstance(item, (dict, list)):
290+
remove_fields(item)
291+
292+
remove_fields(cleaned_data)
293+
return cleaned_data
294+
268295
@staticmethod
269296
def append_intermediate_results(results: List[Dict[str, Any]],
270297
file_path: Union[str, Path]) -> None:

webmainbench/extractors/llm_webkit_extractor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ def _generate_with_transformers(self, prompt: str) -> str:
459459

460460
except Exception as e:
461461
print(f"⚠️ transformers生成失败: {e}")
462-
return "{}"
462+
raise RuntimeError(f"transformers生成失败: {e}")
463463

464464
def _extract_json_from_text(self, text: str) -> str:
465465
"""从生成的文本中提取JSON"""

0 commit comments

Comments
 (0)