|
|
|
|
|
|
|
|
|
|
|
|
|
|
kwargs["knowledge"] = "\n".join(knowledges)
|
|
|
kwargs["knowledge"] = "\n".join(knowledges)
|
|
|
gen_conf = dialog.llm_setting
|
|
|
gen_conf = dialog.llm_setting
|
|
|
msg = [{"role": m["role"], "content": m["content"]}
|
|
|
|
|
|
for m in messages if m["role"] != "system"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
|
|
|
|
|
|
msg.extend([{"role": m["role"], "content": m["content"]}
|
|
|
|
|
|
for m in messages if m["role"] != "system"])
|
|
|
used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
|
|
|
used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
|
|
|
|
|
|
assert len(msg) >= 2, f"message_fit_in has bug: {msg}"
|
|
|
|
|
|
|
|
|
if "max_tokens" in gen_conf:
|
|
|
if "max_tokens" in gen_conf:
|
|
|
gen_conf["max_tokens"] = min(
|
|
|
gen_conf["max_tokens"] = min(
|
|
|
gen_conf["max_tokens"],
|
|
|
gen_conf["max_tokens"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if stream:
|
|
|
if stream:
|
|
|
answer = ""
|
|
|
answer = ""
|
|
|
for ans in chat_mdl.chat_streamly(prompt_config["system"].format(**kwargs), msg, gen_conf):
|
|
|
|
|
|
|
|
|
for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf):
|
|
|
answer = ans
|
|
|
answer = ans
|
|
|
yield {"answer": answer, "reference": {}}
|
|
|
yield {"answer": answer, "reference": {}}
|
|
|
yield decorate_answer(answer)
|
|
|
yield decorate_answer(answer)
|
|
|
else:
|
|
|
else:
|
|
|
answer = chat_mdl.chat(
|
|
|
answer = chat_mdl.chat(
|
|
|
prompt_config["system"].format(
|
|
|
|
|
|
**kwargs), msg, gen_conf)
|
|
|
|
|
|
|
|
|
msg[0]["content"], msg[1:], gen_conf)
|
|
|
chat_logger.info("User: {}|Assistant: {}".format(
|
|
|
chat_logger.info("User: {}|Assistant: {}".format(
|
|
|
msg[-1]["content"], answer))
|
|
|
msg[-1]["content"], answer))
|
|
|
yield decorate_answer(answer)
|
|
|
yield decorate_answer(answer)
|