|  |  | @@ -16,13 +16,15 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, | 
		
	
		
			
			|  |  |  | FusionExpr | 
		
	
		
			
			|  |  |  | from rag.nlp import is_english, rag_tokenizer | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | ATTEMPT_TIME = 2 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | @singleton | 
		
	
		
			
			|  |  |  | class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | def __init__(self): | 
		
	
		
			
			|  |  |  | self.info = {} | 
		
	
		
			
			|  |  |  | logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") | 
		
	
		
			
			|  |  |  | for _ in range(24): | 
		
	
		
			
			|  |  |  | for _ in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | self.es = Elasticsearch( | 
		
	
		
			
			|  |  |  | settings.ES["hosts"].split(","), | 
		
	
	
		
			
			|  |  | @@ -92,7 +94,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: | 
		
	
		
			
			|  |  |  | s = Index(indexName, self.es) | 
		
	
		
			
			|  |  |  | for i in range(3): | 
		
	
		
			
			|  |  |  | for i in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | return s.exists() | 
		
	
		
			
			|  |  |  | except Exception as e: | 
		
	
	
		
			
			|  |  | @@ -144,9 +146,9 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | if "minimum_should_match" in m.extra_options: | 
		
	
		
			
			|  |  |  | minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%" | 
		
	
		
			
			|  |  |  | bqry.must.append(Q("query_string", fields=m.fields, | 
		
	
		
			
			|  |  |  | type="best_fields", query=m.matching_text, | 
		
	
		
			
			|  |  |  | minimum_should_match=minimum_should_match, | 
		
	
		
			
			|  |  |  | boost=1)) | 
		
	
		
			
			|  |  |  | type="best_fields", query=m.matching_text, | 
		
	
		
			
			|  |  |  | minimum_should_match=minimum_should_match, | 
		
	
		
			
			|  |  |  | boost=1)) | 
		
	
		
			
			|  |  |  | bqry.boost = 1.0 - vector_similarity_weight | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | elif isinstance(m, MatchDenseExpr): | 
		
	
	
		
			
			|  |  | @@ -180,7 +182,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | q = s.to_dict() | 
		
	
		
			
			|  |  |  | logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q)) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for i in range(3): | 
		
	
		
			
			|  |  |  | for i in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | res = self.es.search(index=indexNames, | 
		
	
		
			
			|  |  |  | body=q, | 
		
	
	
		
			
			|  |  | @@ -201,7 +203,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | raise Exception("ESConnection.search timeout.") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None: | 
		
	
		
			
			|  |  |  | for i in range(3): | 
		
	
		
			
			|  |  |  | for i in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | res = self.es.get(index=(indexName), | 
		
	
		
			
			|  |  |  | id=chunkId, source=True, ) | 
		
	
	
		
			
			|  |  | @@ -233,7 +235,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | operations.append(d_copy) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | res = [] | 
		
	
		
			
			|  |  |  | for _ in range(100): | 
		
	
		
			
			|  |  |  | for _ in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | r = self.es.bulk(index=(indexName), operations=operations, | 
		
	
		
			
			|  |  |  | refresh=False, timeout="600s") | 
		
	
	
		
			
			|  |  | @@ -258,7 +260,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | if "id" in condition and isinstance(condition["id"], str): | 
		
	
		
			
			|  |  |  | # update specific single document | 
		
	
		
			
			|  |  |  | chunkId = condition["id"] | 
		
	
		
			
			|  |  |  | for i in range(3): | 
		
	
		
			
			|  |  |  | for i in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | self.es.update(index=indexName, id=chunkId, doc=doc) | 
		
	
		
			
			|  |  |  | return True | 
		
	
	
		
			
			|  |  | @@ -326,7 +328,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | raise Exception("Condition value must be int, str or list.") | 
		
	
		
			
			|  |  |  | logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict())) | 
		
	
		
			
			|  |  |  | for _ in range(10): | 
		
	
		
			
			|  |  |  | for _ in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | res = self.es.delete_by_query( | 
		
	
		
			
			|  |  |  | index=indexName, | 
		
	
	
		
			
			|  |  | @@ -437,7 +439,7 @@ class ESConnection(DocStoreConnection): | 
		
	
		
			
			|  |  |  | sql = sql.replace(p, r, 1) | 
		
	
		
			
			|  |  |  | logging.debug(f"ESConnection.sql to es: {sql}") | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | for i in range(3): | 
		
	
		
			
			|  |  |  | for i in range(ATTEMPT_TIME): | 
		
	
		
			
			|  |  |  | try: | 
		
	
		
			
			|  |  |  | res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, | 
		
	
		
			
			|  |  |  | request_timeout="2s") |