|  |  | @@ -38,7 +38,7 @@ class Base(ABC): | 
		
	
		
			
			|  |  |  | def __init__(self, key, model_name): | 
		
	
		
			
			|  |  |  | pass | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | raise NotImplementedError("Please implement encode method!") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text: str): | 
		
	
	
		
			
			|  |  | @@ -78,15 +78,16 @@ class DefaultEmbedding(Base): | 
		
	
		
			
			|  |  |  | use_fp16=torch.cuda.is_available()) | 
		
	
		
			
			|  |  |  | self._model = DefaultEmbedding._model | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | texts = [truncate(t, 2048) for t in texts] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for t in texts: | 
		
	
		
			
			|  |  |  | token_count += num_tokens_from_string(t) | 
		
	
		
			
			|  |  |  | res = [] | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res.extend(self._model.encode(texts[i:i + batch_size]).tolist()) | 
		
	
		
			
			|  |  |  | return np.array(res), token_count | 
		
	
		
			
			|  |  |  | ress.extend(self._model.encode(texts[i:i + batch_size]).tolist()) | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text: str): | 
		
	
		
			
			|  |  |  | token_count = num_tokens_from_string(text) | 
		
	
	
		
			
			|  |  | @@ -101,12 +102,18 @@ class OpenAIEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = OpenAI(api_key=key, base_url=base_url) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | # OpenAI requires batch size <=16 | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | texts = [truncate(t, 8191) for t in texts] | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=texts, | 
		
	
		
			
			|  |  |  | model=self.model_name) | 
		
	
		
			
			|  |  |  | return np.array([d.embedding for d in res.data] | 
		
	
		
			
			|  |  |  | ), res.usage.total_tokens | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | total_tokens = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=texts[i:i + batch_size], | 
		
	
		
			
			|  |  |  | model=self.model_name) | 
		
	
		
			
			|  |  |  | ress.extend([d.embedding for d in res.data]) | 
		
	
		
			
			|  |  |  | total_tokens += res.usage.total_tokens | 
		
	
		
			
			|  |  |  | return np.array(ress), total_tokens | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=[truncate(text, 8191)], | 
		
	
	
		
			
			|  |  | @@ -123,12 +130,14 @@ class LocalAIEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = OpenAI(api_key="empty", base_url=base_url) | 
		
	
		
			
			|  |  |  | self.model_name = model_name.split("___")[0] | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=texts, model=self.model_name) | 
		
	
		
			
			|  |  |  | return ( | 
		
	
		
			
			|  |  |  | np.array([d.embedding for d in res.data]), | 
		
	
		
			
			|  |  |  | 1024, | 
		
	
		
			
			|  |  |  | )  # local embedding for LmStudio donot count tokens | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=texts[i:i + batch_size], model=self.model_name) | 
		
	
		
			
			|  |  |  | ress.extend([d.embedding for d in res.data]) | 
		
	
		
			
			|  |  |  | # local embedding for LmStudio donot count tokens | 
		
	
		
			
			|  |  |  | return np.array(ress), 1024 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | embds, cnt = self.encode([text]) | 
		
	
	
		
			
			|  |  | @@ -155,12 +164,12 @@ class BaiChuanEmbed(OpenAIEmbed): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | class QWenEmbed(Base): | 
		
	
		
			
			|  |  |  | def __init__(self, key, model_name="text_embedding_v2", **kwargs): | 
		
	
		
			
			|  |  |  | dashscope.api_key = key | 
		
	
		
			
			|  |  |  | self.key = key | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=10): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | import dashscope | 
		
	
		
			
			|  |  |  | batch_size = min(batch_size, 4) | 
		
	
		
			
			|  |  |  | batch_size = 4 | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | res = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
	
		
			
			|  |  | @@ -169,6 +178,7 @@ class QWenEmbed(Base): | 
		
	
		
			
			|  |  |  | resp = dashscope.TextEmbedding.call( | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | input=texts[i:i + batch_size], | 
		
	
		
			
			|  |  |  | api_key=self.key, | 
		
	
		
			
			|  |  |  | text_type="document" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | embds = [[] for _ in range(len(resp["output"]["embeddings"]))] | 
		
	
	
		
			
			|  |  | @@ -186,6 +196,7 @@ class QWenEmbed(Base): | 
		
	
		
			
			|  |  |  | resp = dashscope.TextEmbedding.call( | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | input=text[:2048], | 
		
	
		
			
			|  |  |  | api_key=self.key, | 
		
	
		
			
			|  |  |  | text_type="query" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return np.array(resp["output"]["embeddings"][0] | 
		
	
	
		
			
			|  |  | @@ -200,7 +211,7 @@ class ZhipuEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = ZhipuAI(api_key=key) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | arr = [] | 
		
	
		
			
			|  |  |  | tks_num = 0 | 
		
	
		
			
			|  |  |  | for txt in texts: | 
		
	
	
		
			
			|  |  | @@ -221,7 +232,7 @@ class OllamaEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = Client(host=kwargs["base_url"]) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | arr = [] | 
		
	
		
			
			|  |  |  | tks_num = 0 | 
		
	
		
			
			|  |  |  | for txt in texts: | 
		
	
	
		
			
			|  |  | @@ -252,13 +263,13 @@ class FastEmbed(Base): | 
		
	
		
			
			|  |  |  | from fastembed import TextEmbedding | 
		
	
		
			
			|  |  |  | self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | # Using the internal tokenizer to encode the texts and get the total | 
		
	
		
			
			|  |  |  | # number of tokens | 
		
	
		
			
			|  |  |  | encodings = self._model.model.tokenizer.encode_batch(texts) | 
		
	
		
			
			|  |  |  | total_tokens = sum(len(e) for e in encodings) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | embeddings = [e.tolist() for e in self._model.embed(texts, batch_size)] | 
		
	
		
			
			|  |  |  | embeddings = [e.tolist() for e in self._model.embed(texts, batch_size=16)] | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | return np.array(embeddings), total_tokens | 
		
	
		
			
			|  |  |  | 
 | 
		
	
	
		
			
			|  |  | @@ -278,11 +289,15 @@ class XinferenceEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = OpenAI(api_key=key, base_url=base_url) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=texts, | 
		
	
		
			
			|  |  |  | model=self.model_name) | 
		
	
		
			
			|  |  |  | return np.array([d.embedding for d in res.data] | 
		
	
		
			
			|  |  |  | ), res.usage.total_tokens | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | total_tokens = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=texts[i:i + batch_size], model=self.model_name) | 
		
	
		
			
			|  |  |  | ress.extend([d.embedding for d in res.data]) | 
		
	
		
			
			|  |  |  | total_tokens += res.usage.total_tokens | 
		
	
		
			
			|  |  |  | return np.array(ress), total_tokens | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings.create(input=[text], | 
		
	
	
		
			
			|  |  | @@ -306,7 +321,8 @@ class YoudaoEmbed(Base): | 
		
	
		
			
			|  |  |  | model_name_or_path=model_name.replace( | 
		
	
		
			
			|  |  |  | "maidalun1020", "InfiniFlow")) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=10): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 10 | 
		
	
		
			
			|  |  |  | res = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for t in texts: | 
		
	
	
		
			
			|  |  | @@ -332,15 +348,21 @@ class JinaEmbed(Base): | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=None): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | texts = [truncate(t, 8196) for t in texts] | 
		
	
		
			
			|  |  |  | data = { | 
		
	
		
			
			|  |  |  | "model": self.model_name, | 
		
	
		
			
			|  |  |  | "input": texts, | 
		
	
		
			
			|  |  |  | 'encoding_type': 'float' | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | res = requests.post(self.base_url, headers=self.headers, json=data).json() | 
		
	
		
			
			|  |  |  | return np.array([d["embedding"] for d in res["data"]]), res["usage"]["total_tokens"] | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | data = { | 
		
	
		
			
			|  |  |  | "model": self.model_name, | 
		
	
		
			
			|  |  |  | "input": texts[i:i + batch_size], | 
		
	
		
			
			|  |  |  | 'encoding_type': 'float' | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | res = requests.post(self.base_url, headers=self.headers, json=data).json() | 
		
	
		
			
			|  |  |  | ress.extend([d["embedding"] for d in res["data"]]) | 
		
	
		
			
			|  |  |  | token_count += res["usage"]["total_tokens"] | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | embds, cnt = self.encode([text]) | 
		
	
	
		
			
			|  |  | @@ -394,12 +416,17 @@ class MistralEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = MistralClient(api_key=key) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | texts = [truncate(t, 8196) for t in texts] | 
		
	
		
			
			|  |  |  | res = self.client.embeddings(input=texts, | 
		
	
		
			
			|  |  |  | model=self.model_name) | 
		
	
		
			
			|  |  |  | return np.array([d.embedding for d in res.data] | 
		
	
		
			
			|  |  |  | ), res.usage.total_tokens | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings(input=texts[i:i + batch_size], | 
		
	
		
			
			|  |  |  | model=self.model_name) | 
		
	
		
			
			|  |  |  | ress.extend([d.embedding for d in res.data]) | 
		
	
		
			
			|  |  |  | token_count += res.usage.total_tokens | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | res = self.client.embeddings(input=[truncate(text, 8196)], | 
		
	
	
		
			
			|  |  | @@ -418,7 +445,7 @@ class BedrockEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = boto3.client(service_name='bedrock-runtime', region_name=self.bedrock_region, | 
		
	
		
			
			|  |  |  | aws_access_key_id=self.bedrock_ak, aws_secret_access_key=self.bedrock_sk) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | texts = [truncate(t, 8196) for t in texts] | 
		
	
		
			
			|  |  |  | embeddings = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
	
		
			
			|  |  | @@ -436,7 +463,6 @@ class BedrockEmbed(Base): | 
		
	
		
			
			|  |  |  | return np.array(embeddings), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | embeddings = [] | 
		
	
		
			
			|  |  |  | token_count = num_tokens_from_string(text) | 
		
	
		
			
			|  |  |  | if self.model_name.split('.')[0] == 'amazon': | 
		
	
	
		
			
			|  |  | @@ -453,20 +479,26 @@ class BedrockEmbed(Base): | 
		
	
		
			
			|  |  |  | class GeminiEmbed(Base): | 
		
	
		
			
			|  |  |  | def __init__(self, key, model_name='models/text-embedding-004', | 
		
	
		
			
			|  |  |  | **kwargs): | 
		
	
		
			
			|  |  |  | genai.configure(api_key=key) | 
		
	
		
			
			|  |  |  | self.key = key | 
		
	
		
			
			|  |  |  | self.model_name = 'models/' + model_name | 
		
	
		
			
			|  |  |  |  | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | texts = [truncate(t, 2048) for t in texts] | 
		
	
		
			
			|  |  |  | token_count = sum(num_tokens_from_string(text) for text in texts) | 
		
	
		
			
			|  |  |  | result = genai.embed_content( | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | content=texts, | 
		
	
		
			
			|  |  |  | task_type="retrieval_document", | 
		
	
		
			
			|  |  |  | title="Embedding of list of strings") | 
		
	
		
			
			|  |  |  | return np.array(result['embedding']),token_count | 
		
	
		
			
			|  |  |  | genai.configure(api_key=self.key) | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | result = genai.embed_content( | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | content=texts[i, i + batch_size], | 
		
	
		
			
			|  |  |  | task_type="retrieval_document", | 
		
	
		
			
			|  |  |  | title="Embedding of single string") | 
		
	
		
			
			|  |  |  | ress.extend(result['embedding']) | 
		
	
		
			
			|  |  |  | return np.array(ress),token_count | 
		
	
		
			
			|  |  |  |  | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | genai.configure(api_key=self.key) | 
		
	
		
			
			|  |  |  | result = genai.embed_content( | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | content=truncate(text,2048), | 
		
	
	
		
			
			|  |  | @@ -495,19 +527,22 @@ class NvidiaEmbed(Base): | 
		
	
		
			
			|  |  |  | if model_name == "snowflake/arctic-embed-l": | 
		
	
		
			
			|  |  |  | self.base_url = "https://ai.api.nvidia.com/v1/retrieval/snowflake/arctic-embed-l/embeddings" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=None): | 
		
	
		
			
			|  |  |  | payload = { | 
		
	
		
			
			|  |  |  | "input": texts, | 
		
	
		
			
			|  |  |  | "input_type": "query", | 
		
	
		
			
			|  |  |  | "model": self.model_name, | 
		
	
		
			
			|  |  |  | "encoding_format": "float", | 
		
	
		
			
			|  |  |  | "truncate": "END", | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | res = requests.post(self.base_url, headers=self.headers, json=payload).json() | 
		
	
		
			
			|  |  |  | return ( | 
		
	
		
			
			|  |  |  | np.array([d["embedding"] for d in res["data"]]), | 
		
	
		
			
			|  |  |  | res["usage"]["total_tokens"], | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | payload = { | 
		
	
		
			
			|  |  |  | "input": texts[i : i + batch_size], | 
		
	
		
			
			|  |  |  | "input_type": "query", | 
		
	
		
			
			|  |  |  | "model": self.model_name, | 
		
	
		
			
			|  |  |  | "encoding_format": "float", | 
		
	
		
			
			|  |  |  | "truncate": "END", | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | res = requests.post(self.base_url, headers=self.headers, json=payload).json() | 
		
	
		
			
			|  |  |  | ress.extend([d["embedding"] for d in res["data"]]) | 
		
	
		
			
			|  |  |  | token_count += res["usage"]["total_tokens"] | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | embds, cnt = self.encode([text]) | 
		
	
	
		
			
			|  |  | @@ -541,16 +576,20 @@ class CoHereEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = Client(api_key=key) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | res = self.client.embed( | 
		
	
		
			
			|  |  |  | texts=texts, | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | input_type="search_query", | 
		
	
		
			
			|  |  |  | embedding_types=["float"], | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return np.array([d for d in res.embeddings.float]), int( | 
		
	
		
			
			|  |  |  | res.meta.billed_units.input_tokens | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.embed( | 
		
	
		
			
			|  |  |  | texts=texts[i : i + batch_size], | 
		
	
		
			
			|  |  |  | model=self.model_name, | 
		
	
		
			
			|  |  |  | input_type="search_document", | 
		
	
		
			
			|  |  |  | embedding_types=["float"], | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ress.extend([d for d in res.embeddings.float]) | 
		
	
		
			
			|  |  |  | token_count += res.meta.billed_units.input_tokens | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | res = self.client.embed( | 
		
	
	
		
			
			|  |  | @@ -599,19 +638,23 @@ class SILICONFLOWEmbed(Base): | 
		
	
		
			
			|  |  |  | self.base_url = base_url | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | payload = { | 
		
	
		
			
			|  |  |  | "model": self.model_name, | 
		
	
		
			
			|  |  |  | "input": texts, | 
		
	
		
			
			|  |  |  | "encoding_format": "float", | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | res = requests.post(self.base_url, json=payload, headers=self.headers).json() | 
		
	
		
			
			|  |  |  | if "data" not in res or not isinstance(res["data"], list) or len(res["data"])!= len(texts): | 
		
	
		
			
			|  |  |  | raise ValueError(f"SILICONFLOWEmbed.encode got invalid response from {self.base_url}") | 
		
	
		
			
			|  |  |  | return ( | 
		
	
		
			
			|  |  |  | np.array([d["embedding"] for d in res["data"]]), | 
		
	
		
			
			|  |  |  | res["usage"]["total_tokens"], | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | texts_batch = texts[i : i + batch_size] | 
		
	
		
			
			|  |  |  | payload = { | 
		
	
		
			
			|  |  |  | "model": self.model_name, | 
		
	
		
			
			|  |  |  | "input": texts_batch, | 
		
	
		
			
			|  |  |  | "encoding_format": "float", | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | res = requests.post(self.base_url, json=payload, headers=self.headers).json() | 
		
	
		
			
			|  |  |  | if "data" not in res or not isinstance(res["data"], list) or len(res["data"]) != len(texts_batch): | 
		
	
		
			
			|  |  |  | raise ValueError(f"SILICONFLOWEmbed.encode got invalid response from {self.base_url}") | 
		
	
		
			
			|  |  |  | ress.extend([d["embedding"] for d in res["data"]]) | 
		
	
		
			
			|  |  |  | token_count += res["usage"]["total_tokens"] | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | payload = { | 
		
	
	
		
			
			|  |  | @@ -632,9 +675,14 @@ class ReplicateEmbed(Base): | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | self.client = Client(api_token=key) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | res = self.client.run(self.model_name, input={"texts": json.dumps(texts)}) | 
		
	
		
			
			|  |  |  | return np.array(res), sum([num_tokens_from_string(text) for text in texts]) | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | token_count = sum([num_tokens_from_string(text) for text in texts]) | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.run(self.model_name, input={"texts": texts[i : i + batch_size]}) | 
		
	
		
			
			|  |  |  | ress.extend(res) | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | res = self.client.embed(self.model_name, input={"texts": [text]}) | 
		
	
	
		
			
			|  |  | @@ -673,11 +721,17 @@ class VoyageEmbed(Base): | 
		
	
		
			
			|  |  |  | self.client = voyageai.Client(api_key=key) | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | res = self.client.embed( | 
		
	
		
			
			|  |  |  | texts=texts, model=self.model_name, input_type="document" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | return np.array(res.embeddings), res.total_tokens | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | batch_size = 16 | 
		
	
		
			
			|  |  |  | ress = [] | 
		
	
		
			
			|  |  |  | token_count = 0 | 
		
	
		
			
			|  |  |  | for i in range(0, len(texts), batch_size): | 
		
	
		
			
			|  |  |  | res = self.client.embed( | 
		
	
		
			
			|  |  |  | texts=texts[i : i + batch_size], model=self.model_name, input_type="document" | 
		
	
		
			
			|  |  |  | ) | 
		
	
		
			
			|  |  |  | ress.extend(res.embeddings) | 
		
	
		
			
			|  |  |  | token_count += res.total_tokens | 
		
	
		
			
			|  |  |  | return np.array(ress), token_count | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode_queries(self, text): | 
		
	
		
			
			|  |  |  | res = self.client.embed( | 
		
	
	
		
			
			|  |  | @@ -694,7 +748,7 @@ class HuggingFaceEmbed(Base): | 
		
	
		
			
			|  |  |  | self.model_name = model_name | 
		
	
		
			
			|  |  |  | self.base_url = base_url or "http://127.0.0.1:8080" | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def encode(self, texts: list, batch_size=16): | 
		
	
		
			
			|  |  |  | def encode(self, texts: list): | 
		
	
		
			
			|  |  |  | embeddings = [] | 
		
	
		
			
			|  |  |  | for text in texts: | 
		
	
		
			
			|  |  |  | response = requests.post( |