Просмотр исходного кода

Fix: Ensure unique index names for pgvector knowledge tables (#19672)

Co-authored-by: crazywoola <427733928@qq.com>
tags/1.4.0
k-kayashima 5 месяцев назад
Родитель
Сommit
b292990075
Аккаунт пользователя с таким Email не найден
1 измененных файлов: 6 добавлений и 5 удалений
  1. 6
    5
      api/core/rag/datasource/vdb/pgvector/pgvector.py

+ 6
- 5
api/core/rag/datasource/vdb/pgvector/pgvector.py Просмотреть файл

import hashlib
import json import json
import logging import logging
import uuid import uuid
""" """


SQL_CREATE_INDEX = """ SQL_CREATE_INDEX = """
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name}
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx_{index_hash} ON {table_name}
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
""" """


SQL_CREATE_INDEX_PG_BIGM = """ SQL_CREATE_INDEX_PG_BIGM = """
CREATE INDEX IF NOT EXISTS bigm_idx ON {table_name}
CREATE INDEX IF NOT EXISTS bigm_idx_{index_hash} ON {table_name}
USING gin (text gin_bigm_ops); USING gin (text gin_bigm_ops);
""" """


super().__init__(collection_name) super().__init__(collection_name)
self.pool = self._create_connection_pool(config) self.pool = self._create_connection_pool(config)
self.table_name = f"embedding_{collection_name}" self.table_name = f"embedding_{collection_name}"
self.index_hash = hashlib.md5(self.table_name.encode()).hexdigest()[:8]
self.pg_bigm = config.pg_bigm self.pg_bigm = config.pg_bigm


def get_type(self) -> str: def get_type(self) -> str:
# PG hnsw index only support 2000 dimension or less # PG hnsw index only support 2000 dimension or less
# ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing # ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
if dimension <= 2000: if dimension <= 2000:
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name))
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name, index_hash=self.index_hash))
if self.pg_bigm: if self.pg_bigm:
cur.execute("CREATE EXTENSION IF NOT EXISTS pg_bigm")
cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name))
cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name, index_hash=self.index_hash))
redis_client.set(collection_exist_cache_key, 1, ex=3600) redis_client.set(collection_exist_cache_key, 1, ex=3600)





Загрузка…
Отмена
Сохранить