浏览代码

Fix: migrate deprecated Langfuse API from v2 to v3 (#9204)

### What problem does this PR solve?

Fix:

```bash
'Langfuse' object has no attribute 'trace'
```

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.20.1
Yongteng Lei 3 个月前
父节点
当前提交
52a349349d
没有帐户链接到提交者的电子邮件
共有 2 个文件被更改,包括 42 次插入30 次删除
  1. 10
    6
      api/db/services/dialog_service.py
  2. 32
    24
      api/db/services/llm_service.py

+ 10
- 6
api/db/services/dialog_service.py 查看文件

check_llm_ts = timer() check_llm_ts = timer()


langfuse_tracer = None langfuse_tracer = None
trace_context = {}
langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=dialog.tenant_id) langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=dialog.tenant_id)
if langfuse_keys: if langfuse_keys:
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host) langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
if langfuse.auth_check(): if langfuse.auth_check():
langfuse_tracer = langfuse langfuse_tracer = langfuse
langfuse.trace = langfuse_tracer.trace(name=f"{dialog.name}-{llm_model_config['llm_name']}")
trace_id = langfuse_tracer.create_trace_id()
trace_context = {"trace_id": trace_id}


check_langfuse_tracer_ts = timer() check_langfuse_tracer_ts = timer()
kbs, embd_mdl, rerank_mdl, chat_mdl, tts_mdl = get_models(dialog) kbs, embd_mdl, rerank_mdl, chat_mdl, tts_mdl = get_models(dialog)
f" - Token speed: {int(tk_num / (generate_result_time_cost / 1000.0))}/s" f" - Token speed: {int(tk_num / (generate_result_time_cost / 1000.0))}/s"
) )


langfuse_output = "\n" + re.sub(r"^.*?(### Query:.*)", r"\1", prompt, flags=re.DOTALL)
langfuse_output = {"time_elapsed:": re.sub(r"\n", " \n", langfuse_output), "created_at": time.time()}

# Add a condition check to call the end method only if langfuse_tracer exists # Add a condition check to call the end method only if langfuse_tracer exists
if langfuse_tracer and "langfuse_generation" in locals(): if langfuse_tracer and "langfuse_generation" in locals():
langfuse_generation.end(output=langfuse_output)
langfuse_output = "\n" + re.sub(r"^.*?(### Query:.*)", r"\1", prompt, flags=re.DOTALL)
langfuse_output = {"time_elapsed:": re.sub(r"\n", " \n", langfuse_output), "created_at": time.time()}
langfuse_generation.update(output=langfuse_output)
langfuse_generation.end()


return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()} return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()}


if langfuse_tracer: if langfuse_tracer:
langfuse_generation = langfuse_tracer.trace.generation(name="chat", model=llm_model_config["llm_name"], input={"prompt": prompt, "prompt4citation": prompt4citation, "messages": msg})
langfuse_generation = langfuse_tracer.start_generation(
trace_context=trace_context, name="chat", model=llm_model_config["llm_name"], input={"prompt": prompt, "prompt4citation": prompt4citation, "messages": msg}
)


if stream: if stream:
last_ans = "" last_ans = ""

+ 32
- 24
api/db/services/llm_service.py 查看文件

return list(objs) return list(objs)


@staticmethod @staticmethod
def llm_id2llm_type(llm_id: str) ->str|None:
def llm_id2llm_type(llm_id: str) -> str | None:
llm_id, *_ = TenantLLMService.split_model_name_and_factory(llm_id) llm_id, *_ = TenantLLMService.split_model_name_and_factory(llm_id)
llm_factories = settings.FACTORY_LLM_INFOS llm_factories = settings.FACTORY_LLM_INFOS
for llm_factory in llm_factories: for llm_factory in llm_factories:
self.verbose_tool_use = kwargs.get("verbose_tool_use") self.verbose_tool_use = kwargs.get("verbose_tool_use")


langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id) langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
self.langfuse = None
if langfuse_keys: if langfuse_keys:
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host) langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
if langfuse.auth_check(): if langfuse.auth_check():
self.langfuse = langfuse self.langfuse = langfuse
self.trace = self.langfuse.trace(name=f"{self.llm_type}-{self.llm_name}")
else:
self.langfuse = None
trace_id = self.langfuse.create_trace_id()
self.trace_context = {"trace_id": trace_id}


def bind_tools(self, toolcall_session, tools): def bind_tools(self, toolcall_session, tools):
if not self.is_tools: if not self.is_tools:


def encode(self, texts: list): def encode(self, texts: list):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts})


embeddings, used_tokens = self.mdl.encode(texts) embeddings, used_tokens = self.mdl.encode(texts)
llm_name = getattr(self, "llm_name", None) llm_name = getattr(self, "llm_name", None)
logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(usage_details={"total_tokens": used_tokens})
generation.update(usage_details={"total_tokens": used_tokens})
generation.end()


return embeddings, used_tokens return embeddings, used_tokens


def encode_queries(self, query: str): def encode_queries(self, query: str):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="encode_queries", model=self.llm_name, input={"query": query})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode_queries", model=self.llm_name, input={"query": query})


emd, used_tokens = self.mdl.encode_queries(query) emd, used_tokens = self.mdl.encode_queries(query)
llm_name = getattr(self, "llm_name", None) llm_name = getattr(self, "llm_name", None)
logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(usage_details={"total_tokens": used_tokens})
generation.update(usage_details={"total_tokens": used_tokens})
generation.end()


return emd, used_tokens return emd, used_tokens


def similarity(self, query: str, texts: list): def similarity(self, query: str, texts: list):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="similarity", model=self.llm_name, input={"query": query, "texts": texts})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="similarity", model=self.llm_name, input={"query": query, "texts": texts})


sim, used_tokens = self.mdl.similarity(query, texts) sim, used_tokens = self.mdl.similarity(query, texts)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
logging.error("LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(usage_details={"total_tokens": used_tokens})
generation.update(usage_details={"total_tokens": used_tokens})
generation.end()


return sim, used_tokens return sim, used_tokens


def describe(self, image, max_tokens=300): def describe(self, image, max_tokens=300):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="describe", metadata={"model": self.llm_name})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="describe", metadata={"model": self.llm_name})


txt, used_tokens = self.mdl.describe(image) txt, used_tokens = self.mdl.describe(image)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.end()


return txt return txt


def describe_with_prompt(self, image, prompt): def describe_with_prompt(self, image, prompt):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="describe_with_prompt", metadata={"model": self.llm_name, "prompt": prompt})
generation = self.language.start_generation(trace_context=self.trace_context, name="describe_with_prompt", metadata={"model": self.llm_name, "prompt": prompt})


txt, used_tokens = self.mdl.describe_with_prompt(image, prompt) txt, used_tokens = self.mdl.describe_with_prompt(image, prompt)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.end()


return txt return txt


def transcription(self, audio): def transcription(self, audio):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="transcription", metadata={"model": self.llm_name})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="transcription", metadata={"model": self.llm_name})


txt, used_tokens = self.mdl.transcription(audio) txt, used_tokens = self.mdl.transcription(audio)
if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens): if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
logging.error("LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens)) logging.error("LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.end()


return txt return txt


def tts(self, text: str) -> Generator[bytes, None, None]: def tts(self, text: str) -> Generator[bytes, None, None]:
if self.langfuse: if self.langfuse:
span = self.trace.span(name="tts", input={"text": text})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="tts", input={"text": text})


for chunk in self.mdl.tts(text): for chunk in self.mdl.tts(text):
if isinstance(chunk, int): if isinstance(chunk, int):
yield chunk yield chunk


if self.langfuse: if self.langfuse:
span.end()
generation.end()


def _remove_reasoning_content(self, txt: str) -> str: def _remove_reasoning_content(self, txt: str) -> str:
first_think_start = txt.find("<think>") first_think_start = txt.find("<think>")


return txt[last_think_end + len("</think>") :] return txt[last_think_end + len("</think>") :]


def chat(self, system: str, history: list, gen_conf: dict={}, **kwargs) -> str:
def chat(self, system: str, history: list, gen_conf: dict = {}, **kwargs) -> str:
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="chat", model=self.llm_name, input={"system": system, "history": history})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.llm_name, input={"system": system, "history": history})


chat_partial = partial(self.mdl.chat, system, history, gen_conf) chat_partial = partial(self.mdl.chat, system, history, gen_conf)
if self.is_tools and self.mdl.is_tools: if self.is_tools and self.mdl.is_tools:
logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens)) logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))


if self.langfuse: if self.langfuse:
generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.update(output={"output": txt}, usage_details={"total_tokens": used_tokens})
generation.end()


return txt return txt


def chat_streamly(self, system: str, history: list, gen_conf: dict={}, **kwargs):
def chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs):
if self.langfuse: if self.langfuse:
generation = self.trace.generation(name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})


ans = "" ans = ""
chat_partial = partial(self.mdl.chat_streamly, system, history, gen_conf) chat_partial = partial(self.mdl.chat_streamly, system, history, gen_conf)
if isinstance(txt, int): if isinstance(txt, int):
total_tokens = txt total_tokens = txt
if self.langfuse: if self.langfuse:
generation.end(output={"output": ans})
generation.update(output={"output": ans})
generation.end()
break break


if txt.endswith("</think>"): if txt.endswith("</think>"):

正在加载...
取消
保存