|
|
|
@@ -663,6 +663,7 @@ class OllamaChat(Base): |
|
|
|
|
|
|
|
self.client = Client(host=base_url) if not key or key == "x" else Client(host=base_url, headers={"Authorization": f"Bearer {key}"}) |
|
|
|
self.model_name = model_name |
|
|
|
self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1))) |
|
|
|
|
|
|
|
def _clean_conf(self, gen_conf): |
|
|
|
options = {} |
|
|
|
@@ -679,7 +680,7 @@ class OllamaChat(Base): |
|
|
|
ctx_size = self._calculate_dynamic_ctx(history) |
|
|
|
|
|
|
|
gen_conf["num_ctx"] = ctx_size |
|
|
|
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=-1) |
|
|
|
response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=self.keep_alive) |
|
|
|
ans = response["message"]["content"].strip() |
|
|
|
token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) |
|
|
|
return ans, token_count |
|
|
|
@@ -706,7 +707,7 @@ class OllamaChat(Base): |
|
|
|
|
|
|
|
ans = "" |
|
|
|
try: |
|
|
|
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=-1) |
|
|
|
response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=self.keep_alive) |
|
|
|
for resp in response: |
|
|
|
if resp["done"]: |
|
|
|
token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0) |