|  |  |  |  |  |  | 
													
												
													
														|  |  | FusionExpr |  |  | FusionExpr | 
													
												
													
														|  |  | from rag.nlp import is_english, rag_tokenizer |  |  | from rag.nlp import is_english, rag_tokenizer | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  |  |  |  | ATTEMPT_TIME = 2 | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | @singleton |  |  | @singleton | 
													
												
													
														|  |  | class ESConnection(DocStoreConnection): |  |  | class ESConnection(DocStoreConnection): | 
													
												
													
														|  |  | def __init__(self): |  |  | def __init__(self): | 
													
												
													
														|  |  | self.info = {} |  |  | self.info = {} | 
													
												
													
														|  |  | logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") |  |  | logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") | 
													
												
													
														|  |  | for _ in range(24): |  |  |  | 
													
												
													
														|  |  |  |  |  | for _ in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | self.es = Elasticsearch( |  |  | self.es = Elasticsearch( | 
													
												
													
														|  |  | settings.ES["hosts"].split(","), |  |  | settings.ES["hosts"].split(","), | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: |  |  | def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: | 
													
												
													
														|  |  | s = Index(indexName, self.es) |  |  | s = Index(indexName, self.es) | 
													
												
													
														|  |  | for i in range(3): |  |  |  | 
													
												
													
														|  |  |  |  |  | for i in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | return s.exists() |  |  | return s.exists() | 
													
												
													
														|  |  | except Exception as e: |  |  | except Exception as e: | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | if "minimum_should_match" in m.extra_options: |  |  | if "minimum_should_match" in m.extra_options: | 
													
												
													
														|  |  | minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%" |  |  | minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%" | 
													
												
													
														|  |  | bqry.must.append(Q("query_string", fields=m.fields, |  |  | bqry.must.append(Q("query_string", fields=m.fields, | 
													
												
													
														|  |  | type="best_fields", query=m.matching_text, |  |  |  | 
													
												
													
														|  |  | minimum_should_match=minimum_should_match, |  |  |  | 
													
												
													
														|  |  | boost=1)) |  |  |  | 
													
												
													
														|  |  |  |  |  | type="best_fields", query=m.matching_text, | 
													
												
													
														|  |  |  |  |  | minimum_should_match=minimum_should_match, | 
													
												
													
														|  |  |  |  |  | boost=1)) | 
													
												
													
														|  |  | bqry.boost = 1.0 - vector_similarity_weight |  |  | bqry.boost = 1.0 - vector_similarity_weight | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | elif isinstance(m, MatchDenseExpr): |  |  | elif isinstance(m, MatchDenseExpr): | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | q = s.to_dict() |  |  | q = s.to_dict() | 
													
												
													
														|  |  | logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q)) |  |  | logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q)) | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | for i in range(3): |  |  |  | 
													
												
													
														|  |  |  |  |  | for i in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | res = self.es.search(index=indexNames, |  |  | res = self.es.search(index=indexNames, | 
													
												
													
														|  |  | body=q, |  |  | body=q, | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | raise Exception("ESConnection.search timeout.") |  |  | raise Exception("ESConnection.search timeout.") | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None: |  |  | def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None: | 
													
												
													
														|  |  | for i in range(3): |  |  |  | 
													
												
													
														|  |  |  |  |  | for i in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | res = self.es.get(index=(indexName), |  |  | res = self.es.get(index=(indexName), | 
													
												
													
														|  |  | id=chunkId, source=True, ) |  |  | id=chunkId, source=True, ) | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | operations.append(d_copy) |  |  | operations.append(d_copy) | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | res = [] |  |  | res = [] | 
													
												
													
														|  |  | for _ in range(100): |  |  |  | 
													
												
													
														|  |  |  |  |  | for _ in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | r = self.es.bulk(index=(indexName), operations=operations, |  |  | r = self.es.bulk(index=(indexName), operations=operations, | 
													
												
													
														|  |  | refresh=False, timeout="600s") |  |  | refresh=False, timeout="600s") | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | if "id" in condition and isinstance(condition["id"], str): |  |  | if "id" in condition and isinstance(condition["id"], str): | 
													
												
													
														|  |  | # update specific single document |  |  | # update specific single document | 
													
												
													
														|  |  | chunkId = condition["id"] |  |  | chunkId = condition["id"] | 
													
												
													
														|  |  | for i in range(3): |  |  |  | 
													
												
													
														|  |  |  |  |  | for i in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | self.es.update(index=indexName, id=chunkId, doc=doc) |  |  | self.es.update(index=indexName, id=chunkId, doc=doc) | 
													
												
													
														|  |  | return True |  |  | return True | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | else: |  |  | else: | 
													
												
													
														|  |  | raise Exception("Condition value must be int, str or list.") |  |  | raise Exception("Condition value must be int, str or list.") | 
													
												
													
														|  |  | logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict())) |  |  | logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict())) | 
													
												
													
														|  |  | for _ in range(10): |  |  |  | 
													
												
													
														|  |  |  |  |  | for _ in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | res = self.es.delete_by_query( |  |  | res = self.es.delete_by_query( | 
													
												
													
														|  |  | index=indexName, |  |  | index=indexName, | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | sql = sql.replace(p, r, 1) |  |  | sql = sql.replace(p, r, 1) | 
													
												
													
														|  |  | logging.debug(f"ESConnection.sql to es: {sql}") |  |  | logging.debug(f"ESConnection.sql to es: {sql}") | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | for i in range(3): |  |  |  | 
													
												
													
														|  |  |  |  |  | for i in range(ATTEMPT_TIME): | 
													
												
													
														|  |  | try: |  |  | try: | 
													
												
													
														|  |  | res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, |  |  | res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, | 
													
												
													
														|  |  | request_timeout="2s") |  |  | request_timeout="2s") |