| else: | else: | ||||
| page_content = page_content | page_content = page_content | ||||
| document_node.page_content = page_content | document_node.page_content = page_content | ||||
| split_documents.append(document_node) | |||||
| if document_node.page_content: | |||||
| split_documents.append(document_node) | |||||
| all_documents.extend(split_documents) | all_documents.extend(split_documents) | ||||
| # processing qa document | # processing qa document | ||||
| if document_form == 'qa_model': | if document_form == 'qa_model': |
| import base64 | import base64 | ||||
| import copy | import copy | ||||
| import time | import time | ||||
| from typing import Optional, Tuple | |||||
| from typing import Optional, Tuple, Union | |||||
| import numpy as np | import numpy as np | ||||
| import tiktoken | import tiktoken | ||||
| embeddings_batch, embedding_used_tokens = self._embedding_invoke( | embeddings_batch, embedding_used_tokens = self._embedding_invoke( | ||||
| model=model, | model=model, | ||||
| client=client, | client=client, | ||||
| texts=[""], | |||||
| texts="", | |||||
| extra_model_kwargs=extra_model_kwargs | extra_model_kwargs=extra_model_kwargs | ||||
| ) | ) | ||||
| return ai_model_entity.entity | return ai_model_entity.entity | ||||
| @staticmethod | @staticmethod | ||||
| def _embedding_invoke(model: str, client: AzureOpenAI, texts: list[str], | |||||
| def _embedding_invoke(model: str, client: AzureOpenAI, texts: Union[list[str], str], | |||||
| extra_model_kwargs: dict) -> Tuple[list[list[float]], int]: | extra_model_kwargs: dict) -> Tuple[list[list[float]], int]: | ||||
| response = client.embeddings.create( | response = client.embeddings.create( | ||||
| input=texts, | input=texts, |
| embeddings_batch, embedding_used_tokens = self._embedding_invoke( | embeddings_batch, embedding_used_tokens = self._embedding_invoke( | ||||
| model=model, | model=model, | ||||
| credentials=credentials, | credentials=credentials, | ||||
| texts=[""] | |||||
| texts=[" "] | |||||
| ) | ) | ||||
| used_tokens += embedding_used_tokens | used_tokens += embedding_used_tokens | ||||
| :param text: text to tokenize | :param text: text to tokenize | ||||
| :return: | :return: | ||||
| """ | """ | ||||
| if not text: | |||||
| return Tokens([], [], {}) | |||||
| # initialize client | # initialize client | ||||
| client = cohere.Client(credentials.get('api_key')) | client = cohere.Client(credentials.get('api_key')) | ||||
| import base64 | import base64 | ||||
| import time | import time | ||||
| from typing import Optional, Tuple | |||||
| from typing import Optional, Tuple, Union | |||||
| import numpy as np | import numpy as np | ||||
| import tiktoken | import tiktoken | ||||
| embeddings_batch, embedding_used_tokens = self._embedding_invoke( | embeddings_batch, embedding_used_tokens = self._embedding_invoke( | ||||
| model=model, | model=model, | ||||
| client=client, | client=client, | ||||
| texts=[""], | |||||
| texts="", | |||||
| extra_model_kwargs=extra_model_kwargs | extra_model_kwargs=extra_model_kwargs | ||||
| ) | ) | ||||
| except Exception as ex: | except Exception as ex: | ||||
| raise CredentialsValidateFailedError(str(ex)) | raise CredentialsValidateFailedError(str(ex)) | ||||
| def _embedding_invoke(self, model: str, client: OpenAI, texts: list[str], | |||||
| def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], | |||||
| extra_model_kwargs: dict) -> Tuple[list[list[float]], int]: | extra_model_kwargs: dict) -> Tuple[list[list[float]], int]: | ||||
| """ | """ | ||||
| Invoke embedding model | Invoke embedding model |