旧镜像需要更新vllm
部署服务的时候其他都跟旧的一样,只不过多加一个--task=classify
python3 -m vllm.entrypoints.openai.api_server \
--model /your_model_path \
--tokenizer /still_your_path \
--tensor-parallel-size=1 --trust-remote-code \
--max-model-len=32000 \
--task=classify \
--served-model-name=your_model_name \
--port=8000
发送请求代码:
async def call_vllm_api_batch_async(prompts, session):
headers = {"Content-Type": "application/json"}
payload = {
"model": "your_model_name",
"input": prompts,
"temperature": 0.01,#我用0.01
}
async with session.post(self.classify_model_api, json=payload, headers=headers) as response:
response.raise_for_status()
data = await response.json()
return data.get("data", [])
batch_size = 100
tasks = []
results = []
final_infs_batched = []
async with aiohttp.ClientSession() as session:
for i in range(0, len(final_infs_with_clean_q), batch_size):
batch = final_infs_with_clean_q[i : i + batch_size]
input_batch = [f"{prompt}:\n\n{item[1]}" for item in batch]
if not input_batch:
continue
tasks.append(call_vllm_api_batch_async(input_batch, session))
if not tasks:
return [], {}
api_outputs = await asyncio.gather(*tasks)
for j, batch_outputs in enumerate(api_outputs):
for k, output in enumerate(batch_outputs):
probs = output['probs']
max_prob = max(probs)
max_index = probs.index(max_prob)
results.append({
"score": max_prob,
"label": max_index
})