Browse Source

feat: optimize generation of conversation title (#1075)

tags/0.3.20
takatost 2 years ago
parent
commit
df6604a734
No account linked to committer's email address

+ 2
- 1
.github/workflows/check_no_chinese_comments.py View File

def main(): def main():
has_chinese = False has_chinese = False
excluded_files = ["model_template.py", 'stopwords.py', 'commands.py', excluded_files = ["model_template.py", 'stopwords.py', 'commands.py',
'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py']
'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py',
'prompts.py']


for root, _, files in os.walk("."): for root, _, files in os.walk("."):
for file in files: for file in files:

+ 10
- 2
api/core/generator/llm_generator.py View File

import json
import logging import logging


from langchain.schema import OutputParserException from langchain.schema import OutputParserException
if len(query) > 2000: if len(query) > 2000:
query = query[:300] + "...[TRUNCATED]..." + query[-300:] query = query[:300] + "...[TRUNCATED]..." + query[-300:]


prompt = prompt.format(query=query)
query = query.replace("\n", " ")

prompt += query + "\n"


model_instance = ModelFactory.get_text_generation_model( model_instance = ModelFactory.get_text_generation_model(
tenant_id=tenant_id, tenant_id=tenant_id,
model_kwargs=ModelKwargs( model_kwargs=ModelKwargs(
max_tokens=50
temperature=1,
max_tokens=100
) )
) )


prompts = [PromptMessage(content=prompt)] prompts = [PromptMessage(content=prompt)]
response = model_instance.run(prompts) response = model_instance.run(prompts)
answer = response.content answer = response.content

result_dict = json.loads(answer)
answer = result_dict['Your Output']

return answer.strip() return answer.strip()


@classmethod @classmethod

+ 61
- 7
api/core/prompt/prompts.py View File

CONVERSATION_TITLE_PROMPT = (
"Human:{query}\n-----\n"
"Help me summarize the intent of what the human said and provide a title, the title should not exceed 20 words.\n"
"If what the human said is conducted in English, you should only return an English title.\n"
"If what the human said is conducted in Chinese, you should only return a Chinese title.\n"
"title:"
)
# Written by YORKI MINAKO🤡
CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is.
Notice: the language type user using is abundant, can be English, Chinese, Español, Arabic, Japanese, and etc.
MAKE SURE your output is the SAME language as the user's input!
Your output is restricted only to: (Input language) Intention + Subject(short as possible)

Tip: When the user's question is directed at you (the language model), you can add an emoji to make it more fun.


example 1:
User Input: hi, yesterday i had some burgers.
{
"Language Type": "The user's input is pure English",
"Your Reasoning": "The language of my output must be pure English.",
"Your Output": "sharing yesterday's food"
}

example 2:
User Input: hello
{
"Language Type": "The user's input is written in pure English",
"Your Reasoning": "The language of my output must be pure English.",
"Your Output": "Greeting myself☺️"
}


example 3:
User Input: why mmap file: oom
{
"Language Type": "The user's input is written in pure English",
"Your Reasoning": "The language of my output must be pure English.",
"Your Output": "Asking about the reason for mmap file: oom"
}


example 4:
User Input: www.convinceme.yesterday-you-ate-seafood.tv讲了什么?
{
"Language Type": "The user's input English-Chinese mixed",
"Your Reasoning": "The English-part is an URL, the main intention is still written in Chinese, so the language of my output must be using Chinese.",
"Your Output": "询问网站www.convinceme.yesterday-you-ate-seafood.tv"
}

example 5:
User Input: why小红的年龄is老than小明?
{
"Language Type": "The user's input is English-Chinese mixed",
"Your Reasoning": "The English parts are subjective particles, the main intention is written in Chinese, besides, Chinese occupies a greater \"actual meaning\" than English, so the language of my output must be using Chinese.",
"Your Output": "询问小红和小明的年龄"
}

example 6:
User Input: yo, 你今天咋样?
{
"Language Type": "The user's input is English-Chinese mixed",
"Your Reasoning": "The English-part is a subjective particle, the main intention is written in Chinese, so the language of my output must be using Chinese.",
"Your Output": "查询今日我的状态☺️"
}

User Input:
"""


CONVERSATION_SUMMARY_PROMPT = ( CONVERSATION_SUMMARY_PROMPT = (
"Please generate a short summary of the following conversation.\n" "Please generate a short summary of the following conversation.\n"

Loading…
Cancel
Save