|  |  | @@ -224,6 +224,8 @@ class Dealer: | 
		
	
		
			
			|  |  |  | def insert_citations(self, answer, chunks, chunk_v, | 
		
	
		
			
			|  |  |  | embd_mdl, tkweight=0.1, vtweight=0.9): | 
		
	
		
			
			|  |  |  | assert len(chunks) == len(chunk_v) | 
		
	
		
			
			|  |  |  | if not chunks: | 
		
	
		
			
			|  |  |  | return answer, set([]) | 
		
	
		
			
			|  |  |  | pieces = re.split(r"(```)", answer) | 
		
	
		
			
			|  |  |  | if len(pieces) >= 3: | 
		
	
		
			
			|  |  |  | i = 0 | 
		
	
	
		
			
			|  |  | @@ -263,7 +265,7 @@ class Dealer: | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | ans_v, _ = embd_mdl.encode(pieces_) | 
		
	
		
			
			|  |  |  | assert len(ans_v[0]) == len(chunk_v[0]), "The dimension of query and chunk do not match: {} vs. {}".format( | 
		
	
		
			
			|  |  |  | len(ans_v[0]), len(chunk_v[0])) | 
		
	
		
			
			|  |  |  | len(ans_v[0]), len(chunk_v[0])) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | chunks_tks = [rag_tokenizer.tokenize(self.qryr.rmWWW(ck)).split(" ") | 
		
	
		
			
			|  |  |  | for ck in chunks] | 
		
	
	
		
			
			|  |  | @@ -360,29 +362,33 @@ class Dealer: | 
		
	
		
			
			|  |  |  | ranks = {"total": 0, "chunks": [], "doc_aggs": {}} | 
		
	
		
			
			|  |  |  | if not question: | 
		
	
		
			
			|  |  |  | return ranks | 
		
	
		
			
			|  |  |  | req = {"kb_ids": kb_ids, "doc_ids": doc_ids, "size": page_size, | 
		
	
		
			
			|  |  |  | RERANK_PAGE_LIMIT = 3 | 
		
	
		
			
			|  |  |  | req = {"kb_ids": kb_ids, "doc_ids": doc_ids, "size": page_size*RERANK_PAGE_LIMIT, | 
		
	
		
			
			|  |  |  | "question": question, "vector": True, "topk": top, | 
		
	
		
			
			|  |  |  | "similarity": similarity_threshold, | 
		
	
		
			
			|  |  |  | "available_int": 1} | 
		
	
		
			
			|  |  |  | if page > RERANK_PAGE_LIMIT: | 
		
	
		
			
			|  |  |  | req["page"] = page | 
		
	
		
			
			|  |  |  | req["size"] = page_size | 
		
	
		
			
			|  |  |  | sres = self.search(req, index_name(tenant_id), embd_mdl, highlight) | 
		
	
		
			
			|  |  |  | ranks["total"] = sres.total | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | if rerank_mdl: | 
		
	
		
			
			|  |  |  | sim, tsim, vsim = self.rerank_by_model(rerank_mdl, | 
		
	
		
			
			|  |  |  | sres, question, 1 - vector_similarity_weight, vector_similarity_weight) | 
		
	
		
			
			|  |  |  | if page <= RERANK_PAGE_LIMIT: | 
		
	
		
			
			|  |  |  | if rerank_mdl: | 
		
	
		
			
			|  |  |  | sim, tsim, vsim = self.rerank_by_model(rerank_mdl, | 
		
	
		
			
			|  |  |  | sres, question, 1 - vector_similarity_weight, vector_similarity_weight) | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | sim, tsim, vsim = self.rerank( | 
		
	
		
			
			|  |  |  | sres, question, 1 - vector_similarity_weight, vector_similarity_weight) | 
		
	
		
			
			|  |  |  | idx = np.argsort(sim * -1)[(page-1)*page_size:page*page_size] | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | sim, tsim, vsim = self.rerank( | 
		
	
		
			
			|  |  |  | sres, question, 1 - vector_similarity_weight, vector_similarity_weight) | 
		
	
		
			
			|  |  |  | idx = np.argsort(sim * -1) | 
		
	
		
			
			|  |  |  | sim = tsim = vsim = [1]*len(sres.ids) | 
		
	
		
			
			|  |  |  | idx = list(range(len(sres.ids))) | 
		
	
		
			
			|  |  |  | 
 | 
		
	
		
			
			|  |  |  | dim = len(sres.query_vector) | 
		
	
		
			
			|  |  |  | start_idx = (page - 1) * page_size | 
		
	
		
			
			|  |  |  | for i in idx: | 
		
	
		
			
			|  |  |  | if sim[i] < similarity_threshold: | 
		
	
		
			
			|  |  |  | break | 
		
	
		
			
			|  |  |  | ranks["total"] += 1 | 
		
	
		
			
			|  |  |  | start_idx -= 1 | 
		
	
		
			
			|  |  |  | if start_idx >= 0: | 
		
	
		
			
			|  |  |  | continue | 
		
	
		
			
			|  |  |  | if len(ranks["chunks"]) >= page_size: | 
		
	
		
			
			|  |  |  | if aggs: | 
		
	
		
			
			|  |  |  | continue | 
		
	
	
		
			
			|  |  | @@ -406,7 +412,10 @@ class Dealer: | 
		
	
		
			
			|  |  |  | "positions": sres.field[id].get("position_int", "").split("\t") | 
		
	
		
			
			|  |  |  | } | 
		
	
		
			
			|  |  |  | if highlight: | 
		
	
		
			
			|  |  |  | d["highlight"] = rmSpace(sres.highlight[id]) | 
		
	
		
			
			|  |  |  | if id in sres.highlight: | 
		
	
		
			
			|  |  |  | d["highlight"] = rmSpace(sres.highlight[id]) | 
		
	
		
			
			|  |  |  | else: | 
		
	
		
			
			|  |  |  | d["highlight"] = d["content_with_weight"] | 
		
	
		
			
			|  |  |  | if len(d["positions"]) % 5 == 0: | 
		
	
		
			
			|  |  |  | poss = [] | 
		
	
		
			
			|  |  |  | for i in range(0, len(d["positions"]), 5): |