Skip to content

Commit c4f026f

Browse files
committed
feat: update llm-webkit extract
1 parent 07b095c commit c4f026f

1 file changed

Lines changed: 5 additions & 9 deletions

File tree

webmainbench/extractors/llm_webkit_extractor.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -370,20 +370,16 @@ def _load_vllm_model(self):
370370
trust_remote_code=True
371371
)
372372

373-
# vLLM配置
373+
# vLLM配置 - 参考ray_test_qa.py的简化配置
374374
model_kwargs = {
375375
"model": self.inference_config.model_path,
376376
"trust_remote_code": True,
377377
"dtype": self.inference_config.dtype,
378378
"tensor_parallel_size": self.inference_config.tensor_parallel_size,
379-
"max_model_len": self.inference_config.max_tokens,
380-
"max_num_batched_tokens": max(self.inference_config.max_tokens, 8192),
381-
"gpu_memory_utilization": self.inference_config.gpu_memory_utilization,
382-
"enforce_eager": self.inference_config.enforce_eager,
383-
"disable_custom_all_reduce": True,
384-
"load_format": "auto",
385379
}
386380

381+
print(f"🔧 vLLM配置: {model_kwargs}")
382+
387383
self.model = LLM(**model_kwargs)
388384

389385
# 初始化token状态管理器
@@ -397,8 +393,8 @@ def _load_vllm_model(self):
397393
print("✅ vLLM模型加载成功!")
398394

399395
except Exception as e:
400-
print(f"⚠️ vLLM加载失败,回退到transformers: {e}")
401-
self._load_transformers_model()
396+
print(f"vLLM加载失败: {e}")
397+
raise RuntimeError(f"vLLM模型加载失败: {e}")
402398

403399
def _create_prompt(self, simplified_html: str) -> str:
404400
"""创建分类提示."""

0 commit comments

Comments
 (0)