ソースを参照

Optimize prompt. (#5541)

### What problem does this PR solve?

#5526

### Type of change

- [x] Performance Improvement
tags/v0.17.1
Kevin Hu 8ヶ月前
コミット
7a81fa00e9
コミッターのメールアドレスに関連付けられたアカウントが存在しません
1個のファイルの変更2行の追加2行の削除
  1. 2
    2
      api/db/services/dialog_service.py

+ 2
- 2
api/db/services/dialog_service.py ファイルの表示

used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97)) used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
assert len(msg) >= 2, f"message_fit_in has bug: {msg}" assert len(msg) >= 2, f"message_fit_in has bug: {msg}"
prompt = msg[0]["content"] prompt = msg[0]["content"]
prompt += "\n\n### Query:\n%s" % " ".join(questions)


if "max_tokens" in gen_conf: if "max_tokens" in gen_conf:
gen_conf["max_tokens"] = min( gen_conf["max_tokens"] = min(
max_tokens - used_token_count) max_tokens - used_token_count)


def decorate_answer(answer): def decorate_answer(answer):
nonlocal prompt_config, knowledges, kwargs, kbinfos, prompt, retrieval_ts
nonlocal prompt_config, knowledges, kwargs, kbinfos, prompt, retrieval_ts, questions


refs = [] refs = []
ans = answer.split("</think>") ans = answer.split("</think>")
retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000 retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000
generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000 generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000


prompt += "\n\n### Query:\n%s" % " ".join(questions)
prompt = f"{prompt}\n\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms" prompt = f"{prompt}\n\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms"
return {"answer": think+answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()} return {"answer": think+answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()}



読み込み中…
キャンセル
保存