|
|
|
|
|
|
|
|
|
|
|
|
|
|
def encode(self, texts: list): |
|
|
def encode(self, texts: list): |
|
|
import dashscope |
|
|
import dashscope |
|
|
|
|
|
import time |
|
|
|
|
|
|
|
|
batch_size = 4 |
|
|
batch_size = 4 |
|
|
res = [] |
|
|
res = [] |
|
|
token_count = 0 |
|
|
token_count = 0 |
|
|
texts = [truncate(t, 2048) for t in texts] |
|
|
texts = [truncate(t, 2048) for t in texts] |
|
|
for i in range(0, len(texts), batch_size): |
|
|
for i in range(0, len(texts), batch_size): |
|
|
|
|
|
retry_max = 5 |
|
|
resp = dashscope.TextEmbedding.call(model=self.model_name, input=texts[i : i + batch_size], api_key=self.key, text_type="document") |
|
|
resp = dashscope.TextEmbedding.call(model=self.model_name, input=texts[i : i + batch_size], api_key=self.key, text_type="document") |
|
|
|
|
|
while resp["output"] is None and retry_max > 0: |
|
|
|
|
|
time.sleep(10) |
|
|
|
|
|
resp = dashscope.TextEmbedding.call(model=self.model_name, input=texts[i : i + batch_size], api_key=self.key, text_type="document") |
|
|
|
|
|
retry_max -= 1 |
|
|
|
|
|
if retry_max == 0 and resp["output"] is None: |
|
|
|
|
|
log_exception(ValueError("Retry_max reached, calling embedding model failed")) |
|
|
|
|
|
raise |
|
|
try: |
|
|
try: |
|
|
embds = [[] for _ in range(len(resp["output"]["embeddings"]))] |
|
|
embds = [[] for _ in range(len(resp["output"]["embeddings"]))] |
|
|
for e in resp["output"]["embeddings"]: |
|
|
for e in resp["output"]["embeddings"]: |