Ver código fonte

Fix: fix error 429 api rate limit when building knowledge graph for all chat model and Mistral embedding model (#9106)

### What problem does this PR solve?

fix error 429 api rate limit when building knowledge graph for all chat
model and Mistral embedding model.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.20.0
謝富祥 3 meses atrás
pai
commit
021e8b57ae
Nenhuma conta vinculada ao e-mail do autor do commit
2 arquivos alterados com 29 adições e 12 exclusões
  1. 1
    1
      rag/llm/chat_model.py
  2. 28
    11
      rag/llm/embedding_model.py

+ 1
- 1
rag/llm/chat_model.py Ver arquivo

@@ -73,7 +73,7 @@ class Base(ABC):

def _get_delay(self):
"""Calculate retry delay time"""
return self.base_delay + random.uniform(10, 150)
return self.base_delay + random.uniform(60, 150)

def _classify_error(self, error):
"""Classify error based on error message content"""

+ 28
- 11
rag/llm/embedding_model.py Ver arquivo

@@ -463,25 +463,42 @@ class MistralEmbed(Base):
self.model_name = model_name

def encode(self, texts: list):
import time
import random
texts = [truncate(t, 8196) for t in texts]
batch_size = 16
ress = []
token_count = 0
for i in range(0, len(texts), batch_size):
res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name)
try:
ress.extend([d.embedding for d in res.data])
token_count += self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
retry_max = 5
while retry_max > 0:
try:
res = self.client.embeddings(input=texts[i : i + batch_size], model=self.model_name)
ress.extend([d.embedding for d in res.data])
token_count += self.total_token_count(res)
break
except Exception as _e:
if retry_max == 1:
log_exception(_e)
delay = random.uniform(20, 60)
time.sleep(delay)
retry_max -= 1
return np.array(ress), token_count

def encode_queries(self, text):
res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name)
try:
return np.array(res.data[0].embedding), self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
import time
import random
retry_max = 5
while retry_max > 0:
try:
res = self.client.embeddings(input=[truncate(text, 8196)], model=self.model_name)
return np.array(res.data[0].embedding), self.total_token_count(res)
except Exception as _e:
if retry_max == 1:
log_exception(_e)
delay = random.randint(20, 60)
time.sleep(delay)
retry_max -= 1


class BedrockEmbed(Base):

Carregando…
Cancelar
Salvar