Sfoglia il codice sorgente

fix: synonym bug (#3423)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
tags/v0.14.0
Kevin Hu 11 mesi fa
parent
commit
220aaddc62
Nessun account collegato all'indirizzo email del committer
3 ha cambiato i file con 6 aggiunte e 4 eliminazioni
  1. 1
    0
      agent/component/generate.py
  2. 4
    3
      rag/benchmark.py
  3. 1
    1
      rag/nlp/query.py

+ 1
- 0
agent/component/generate.py Vedi File

retrieval_res = [] retrieval_res = []
self._param.inputs = [] self._param.inputs = []
for para in self._param.parameters: for para in self._param.parameters:
if not para.get("component_id"): continue
if para["component_id"].split("@")[0].lower().find("begin") > 0: if para["component_id"].split("@")[0].lower().find("begin") > 0:
cpn_id, key = para["component_id"].split("@") cpn_id, key = para["component_id"].split("@")
for p in self._canvas.get_component(cpn_id)["obj"]._param.query: for p in self._canvas.get_component(cpn_id)["obj"]._param.query:

+ 4
- 3
rag/benchmark.py Vedi File

from api.utils import get_uuid from api.utils import get_uuid
from rag.nlp import tokenize, search from rag.nlp import tokenize, search
from ranx import evaluate from ranx import evaluate
from ranx import Qrels, Run
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm


self.index_name = search.index_name(self.tenant_id) self.index_name = search.index_name(self.tenant_id)
qrels, texts = self.ms_marco_index(file_path, "benchmark_ms_marco_v1.1") qrels, texts = self.ms_marco_index(file_path, "benchmark_ms_marco_v1.1")
run = self._get_retrieval(qrels) run = self._get_retrieval(qrels)
print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"]))
print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr@10"]))
self.save_results(qrels, run, texts, dataset, file_path) self.save_results(qrels, run, texts, dataset, file_path)
if dataset == "trivia_qa": if dataset == "trivia_qa":
self.tenant_id = "benchmark_trivia_qa" self.tenant_id = "benchmark_trivia_qa"
self.index_name = search.index_name(self.tenant_id) self.index_name = search.index_name(self.tenant_id)
qrels, texts = self.trivia_qa_index(file_path, "benchmark_trivia_qa") qrels, texts = self.trivia_qa_index(file_path, "benchmark_trivia_qa")
run = self._get_retrieval(qrels) run = self._get_retrieval(qrels)
print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"]))
print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr@10"]))
self.save_results(qrels, run, texts, dataset, file_path) self.save_results(qrels, run, texts, dataset, file_path)
if dataset == "miracl": if dataset == "miracl":
for lang in ['ar', 'bn', 'de', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th', for lang in ['ar', 'bn', 'de', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th',
os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang), os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang),
"benchmark_miracl_" + lang) "benchmark_miracl_" + lang)
run = self._get_retrieval(qrels) run = self._get_retrieval(qrels)
print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"]))
print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr@10"]))
self.save_results(qrels, run, texts, dataset, file_path) self.save_results(qrels, run, texts, dataset, file_path)





+ 1
- 1
rag/nlp/query.py Vedi File

syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn] syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn]
syns.append(" ".join(syn)) syns.append(" ".join(syn))


q = ["({}^{:.4f}".format(tk, w) + " %s)".format() for (tk, w), syn in zip(tks_w, syns)]
q = ["({}^{:.4f}".format(tk, w) + " {})".format(syn) for (tk, w), syn in zip(tks_w, syns)]
for i in range(1, len(tks_w)): for i in range(1, len(tks_w)):
q.append( q.append(
'"%s %s"^%.4f' '"%s %s"^%.4f'

Loading…
Annulla
Salva