```
def chat(client,messages,model):
print()
start=time.time()
answer=[]
for chunk in client.chat.completions.create(
model=model,
messages=messages,
stream=True,
stream_optiOns={'include_usage':True},
temperature=0,
):
if not chunk.choices:
continue
stream=chunk.choices[0].delta.content or ''
if answer or stream.strip():
answer.append(stream)
print(stream,end='',flush=True)
end=time.time()
count=chunk.usage.total_tokens
print(f'\n\n{end-start:.2f} secs, {count} tokens')
return ''.join(answer)
```
最新版客户端,加上参数 stream_optiOns={'include_usage':True},那么在返回的最后一个 chunk 带 token 消耗数量