| @@ -655,7 +655,9 @@ class IndexingRunner: | |||
| else: | |||
| page_content = page_content | |||
| document_node.page_content = page_content | |||
| split_documents.append(document_node) | |||
| if document_node.page_content: | |||
| split_documents.append(document_node) | |||
| all_documents.extend(split_documents) | |||
| # processing qa document | |||
| if document_form == 'qa_model': | |||
| @@ -1,7 +1,7 @@ | |||
| import base64 | |||
| import copy | |||
| import time | |||
| from typing import Optional, Tuple | |||
| from typing import Optional, Tuple, Union | |||
| import numpy as np | |||
| import tiktoken | |||
| @@ -76,7 +76,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel): | |||
| embeddings_batch, embedding_used_tokens = self._embedding_invoke( | |||
| model=model, | |||
| client=client, | |||
| texts=[""], | |||
| texts="", | |||
| extra_model_kwargs=extra_model_kwargs | |||
| ) | |||
| @@ -147,7 +147,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel): | |||
| return ai_model_entity.entity | |||
| @staticmethod | |||
| def _embedding_invoke(model: str, client: AzureOpenAI, texts: list[str], | |||
| def _embedding_invoke(model: str, client: AzureOpenAI, texts: Union[list[str], str], | |||
| extra_model_kwargs: dict) -> Tuple[list[list[float]], int]: | |||
| response = client.embeddings.create( | |||
| input=texts, | |||
| @@ -76,7 +76,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel): | |||
| embeddings_batch, embedding_used_tokens = self._embedding_invoke( | |||
| model=model, | |||
| credentials=credentials, | |||
| texts=[""] | |||
| texts=[" "] | |||
| ) | |||
| used_tokens += embedding_used_tokens | |||
| @@ -131,6 +131,9 @@ class CohereTextEmbeddingModel(TextEmbeddingModel): | |||
| :param text: text to tokenize | |||
| :return: | |||
| """ | |||
| if not text: | |||
| return Tokens([], [], {}) | |||
| # initialize client | |||
| client = cohere.Client(credentials.get('api_key')) | |||
| @@ -1,6 +1,6 @@ | |||
| import base64 | |||
| import time | |||
| from typing import Optional, Tuple | |||
| from typing import Optional, Tuple, Union | |||
| import numpy as np | |||
| import tiktoken | |||
| @@ -89,7 +89,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel): | |||
| embeddings_batch, embedding_used_tokens = self._embedding_invoke( | |||
| model=model, | |||
| client=client, | |||
| texts=[""], | |||
| texts="", | |||
| extra_model_kwargs=extra_model_kwargs | |||
| ) | |||
| @@ -160,7 +160,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel): | |||
| except Exception as ex: | |||
| raise CredentialsValidateFailedError(str(ex)) | |||
| def _embedding_invoke(self, model: str, client: OpenAI, texts: list[str], | |||
| def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], | |||
| extra_model_kwargs: dict) -> Tuple[list[list[float]], int]: | |||
| """ | |||
| Invoke embedding model | |||