소스 검색

feat: add pgvector full_text_search (#7396)

tags/0.7.2
Byeongjin Kang 1 년 전
부모
커밋
0223fc6fd5
No account linked to committer's email address
2개의 변경된 파일21개의 추가작업 그리고 6개의 파일을 삭제
  1. 21
    2
      api/core/rag/datasource/vdb/pgvector/pgvector.py
  2. 0
    4
      api/tests/integration_tests/vdb/pgvector/test_pgvector.py

+ 21
- 2
api/core/rag/datasource/vdb/pgvector/pgvector.py 파일 보기

@@ -152,8 +152,27 @@ class PGVector(BaseVector):
return docs

def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
# do not support bm25 search
return []
top_k = kwargs.get("top_k", 5)

with self._get_cursor() as cur:
cur.execute(
f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score
FROM {self.table_name}
WHERE to_tsvector(text) @@ plainto_tsquery(%s)
ORDER BY score DESC
LIMIT {top_k}""",
# f"'{query}'" is required in order to account for whitespace in query
(f"'{query}'", f"'{query}'"),
)

docs = []

for record in cur:
metadata, text, score = record
metadata["score"] = score
docs.append(Document(page_content=text, metadata=metadata))

return docs

def delete(self) -> None:
with self._get_cursor() as cur:

+ 0
- 4
api/tests/integration_tests/vdb/pgvector/test_pgvector.py 파일 보기

@@ -21,10 +21,6 @@ class PGVectorTest(AbstractVectorTest):
),
)

def search_by_full_text(self):
hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text())
assert len(hits_by_full_text) == 0


def test_pgvector(setup_mock_redis):
PGVectorTest().run_all_tests()

Loading…
취소
저장