| from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer | from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer | ||||
| from core.splitter.text_splitter import ( | from core.splitter.text_splitter import ( | ||||
| TS, | TS, | ||||
| AbstractSet, | |||||
| Collection, | Collection, | ||||
| Literal, | Literal, | ||||
| RecursiveCharacterTextSplitter, | RecursiveCharacterTextSplitter, | ||||
| Set, | |||||
| TokenTextSplitter, | TokenTextSplitter, | ||||
| Type, | |||||
| Union, | Union, | ||||
| ) | ) | ||||
| @classmethod | @classmethod | ||||
| def from_encoder( | def from_encoder( | ||||
| cls: Type[TS], | |||||
| cls: type[TS], | |||||
| embedding_model_instance: Optional[ModelInstance], | embedding_model_instance: Optional[ModelInstance], | ||||
| allowed_special: Union[Literal[all], AbstractSet[str]] = set(), | |||||
| allowed_special: Union[Literal[all], Set[str]] = set(), | |||||
| disallowed_special: Union[Literal[all], Collection[str]] = "all", | disallowed_special: Union[Literal[all], Collection[str]] = "all", | ||||
| **kwargs: Any, | **kwargs: Any, | ||||
| ): | ): |