Przeglądaj źródła

limit the system context length of conversation messages. (#962)

### What problem does this PR solve?

#951 

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.7.0
KevinHuSh 1 rok temu
rodzic
commit
daa4799385
No account linked to committer's email address
1 zmienionych plików z 8 dodań i 5 usunięć
  1. 8
    5
      api/db/services/dialog_service.py

+ 8
- 5
api/db/services/dialog_service.py Wyświetl plik

kwargs["knowledge"] = "\n".join(knowledges) kwargs["knowledge"] = "\n".join(knowledges)
gen_conf = dialog.llm_setting gen_conf = dialog.llm_setting
msg = [{"role": m["role"], "content": m["content"]}
for m in messages if m["role"] != "system"]
msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
msg.extend([{"role": m["role"], "content": m["content"]}
for m in messages if m["role"] != "system"])
used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97)) used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
assert len(msg) >= 2, f"message_fit_in has bug: {msg}"
if "max_tokens" in gen_conf: if "max_tokens" in gen_conf:
gen_conf["max_tokens"] = min( gen_conf["max_tokens"] = min(
gen_conf["max_tokens"], gen_conf["max_tokens"],
if stream: if stream:
answer = "" answer = ""
for ans in chat_mdl.chat_streamly(prompt_config["system"].format(**kwargs), msg, gen_conf):
for ans in chat_mdl.chat_streamly(msg[0]["content"], msg[1:], gen_conf):
answer = ans answer = ans
yield {"answer": answer, "reference": {}} yield {"answer": answer, "reference": {}}
yield decorate_answer(answer) yield decorate_answer(answer)
else: else:
answer = chat_mdl.chat( answer = chat_mdl.chat(
prompt_config["system"].format(
**kwargs), msg, gen_conf)
msg[0]["content"], msg[1:], gen_conf)
chat_logger.info("User: {}|Assistant: {}".format( chat_logger.info("User: {}|Assistant: {}".format(
msg[-1]["content"], answer)) msg[-1]["content"], answer))
yield decorate_answer(answer) yield decorate_answer(answer)

Ładowanie…
Anuluj
Zapisz