@@ -370,20 +370,16 @@ def _load_vllm_model(self):
370370 trust_remote_code = True
371371 )
372372
373- # vLLM配置
373+ # vLLM配置 - 参考ray_test_qa.py的简化配置
374374 model_kwargs = {
375375 "model" : self .inference_config .model_path ,
376376 "trust_remote_code" : True ,
377377 "dtype" : self .inference_config .dtype ,
378378 "tensor_parallel_size" : self .inference_config .tensor_parallel_size ,
379- "max_model_len" : self .inference_config .max_tokens ,
380- "max_num_batched_tokens" : max (self .inference_config .max_tokens , 8192 ),
381- "gpu_memory_utilization" : self .inference_config .gpu_memory_utilization ,
382- "enforce_eager" : self .inference_config .enforce_eager ,
383- "disable_custom_all_reduce" : True ,
384- "load_format" : "auto" ,
385379 }
386380
381+ print (f"🔧 vLLM配置: { model_kwargs } " )
382+
387383 self .model = LLM (** model_kwargs )
388384
389385 # 初始化token状态管理器
@@ -397,8 +393,8 @@ def _load_vllm_model(self):
397393 print ("✅ vLLM模型加载成功!" )
398394
399395 except Exception as e :
400- print (f"⚠️ vLLM加载失败,回退到transformers : { e } " )
401- self . _load_transformers_model ( )
396+ print (f"❌ vLLM加载失败: { e } " )
397+ raise RuntimeError ( f"vLLM模型加载失败: { e } " )
402398
403399 def _create_prompt (self , simplified_html : str ) -> str :
404400 """创建分类提示."""
0 commit comments