Ver código fonte

fix: split chunks return empty strings (#2197)

tags/0.5.1
takatost 1 ano atrás
pai
commit
6cf93379b3
Nenhuma conta vinculada ao e-mail do autor do commit

+ 3
- 1
api/core/indexing_runner.py Ver arquivo

@@ -655,7 +655,9 @@ class IndexingRunner:
else:
page_content = page_content
document_node.page_content = page_content
split_documents.append(document_node)

if document_node.page_content:
split_documents.append(document_node)
all_documents.extend(split_documents)
# processing qa document
if document_form == 'qa_model':

+ 3
- 3
api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py Ver arquivo

@@ -1,7 +1,7 @@
import base64
import copy
import time
from typing import Optional, Tuple
from typing import Optional, Tuple, Union

import numpy as np
import tiktoken
@@ -76,7 +76,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=[""],
texts="",
extra_model_kwargs=extra_model_kwargs
)

@@ -147,7 +147,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
return ai_model_entity.entity

@staticmethod
def _embedding_invoke(model: str, client: AzureOpenAI, texts: list[str],
def _embedding_invoke(model: str, client: AzureOpenAI, texts: Union[list[str], str],
extra_model_kwargs: dict) -> Tuple[list[list[float]], int]:
response = client.embeddings.create(
input=texts,

+ 4
- 1
api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py Ver arquivo

@@ -76,7 +76,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
credentials=credentials,
texts=[""]
texts=[" "]
)

used_tokens += embedding_used_tokens
@@ -131,6 +131,9 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param text: text to tokenize
:return:
"""
if not text:
return Tokens([], [], {})

# initialize client
client = cohere.Client(credentials.get('api_key'))


+ 3
- 3
api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py Ver arquivo

@@ -1,6 +1,6 @@
import base64
import time
from typing import Optional, Tuple
from typing import Optional, Tuple, Union

import numpy as np
import tiktoken
@@ -89,7 +89,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=[""],
texts="",
extra_model_kwargs=extra_model_kwargs
)

@@ -160,7 +160,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))

def _embedding_invoke(self, model: str, client: OpenAI, texts: list[str],
def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str],
extra_model_kwargs: dict) -> Tuple[list[list[float]], int]:
"""
Invoke embedding model

Carregando…
Cancelar
Salvar