浏览代码

Refa: ollama keep alive issue. (#8216)

### What problem does this PR solve?

#8122

### Type of change

- [x] Refactoring
tags/v0.19.1
Kevin Hu 4 个月前
父节点
当前提交
d5236b71f4
没有帐户链接到提交者的电子邮件
共有 2 个文件被更改,包括 6 次插入4 次删除
  1. 2
    2
      rag/llm/chat_model.py
  2. 4
    2
      rag/llm/cv_model.py

+ 2
- 2
rag/llm/chat_model.py 查看文件

@@ -871,7 +871,7 @@ class OllamaChat(Base):
ctx_size = self._calculate_dynamic_ctx(history)

gen_conf["num_ctx"] = ctx_size
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf)
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=-1)
ans = response["message"]["content"].strip()
token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
return ans, token_count
@@ -898,7 +898,7 @@ class OllamaChat(Base):

ans = ""
try:
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options)
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=-1)
for resp in response:
if resp["done"]:
token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)

+ 4
- 2
rag/llm/cv_model.py 查看文件

@@ -506,7 +506,8 @@ class OllamaCV(Base):
response = self.client.chat(
model=self.model_name,
messages=history,
options=options
options=options,
keep_alive=-1
)

ans = response["message"]["content"].strip()
@@ -536,7 +537,8 @@ class OllamaCV(Base):
model=self.model_name,
messages=history,
stream=True,
options=options
options=options,
keep_alive=-1
)
for resp in response:
if resp["done"]:

正在加载...
取消
保存