|
|
|
@@ -1,3 +1,4 @@ |
|
|
|
import hashlib |
|
|
|
import json |
|
|
|
import logging |
|
|
|
import uuid |
|
|
|
@@ -61,12 +62,12 @@ CREATE TABLE IF NOT EXISTS {table_name} ( |
|
|
|
""" |
|
|
|
|
|
|
|
SQL_CREATE_INDEX = """ |
|
|
|
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} |
|
|
|
CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx_{index_hash} ON {table_name} |
|
|
|
USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); |
|
|
|
""" |
|
|
|
|
|
|
|
SQL_CREATE_INDEX_PG_BIGM = """ |
|
|
|
CREATE INDEX IF NOT EXISTS bigm_idx ON {table_name} |
|
|
|
CREATE INDEX IF NOT EXISTS bigm_idx_{index_hash} ON {table_name} |
|
|
|
USING gin (text gin_bigm_ops); |
|
|
|
""" |
|
|
|
|
|
|
|
@@ -76,6 +77,7 @@ class PGVector(BaseVector): |
|
|
|
super().__init__(collection_name) |
|
|
|
self.pool = self._create_connection_pool(config) |
|
|
|
self.table_name = f"embedding_{collection_name}" |
|
|
|
self.index_hash = hashlib.md5(self.table_name.encode()).hexdigest()[:8] |
|
|
|
self.pg_bigm = config.pg_bigm |
|
|
|
|
|
|
|
def get_type(self) -> str: |
|
|
|
@@ -256,10 +258,9 @@ class PGVector(BaseVector): |
|
|
|
# PG hnsw index only support 2000 dimension or less |
|
|
|
# ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing |
|
|
|
if dimension <= 2000: |
|
|
|
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name)) |
|
|
|
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name, index_hash=self.index_hash)) |
|
|
|
if self.pg_bigm: |
|
|
|
cur.execute("CREATE EXTENSION IF NOT EXISTS pg_bigm") |
|
|
|
cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name)) |
|
|
|
cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name, index_hash=self.index_hash)) |
|
|
|
redis_client.set(collection_exist_cache_key, 1, ex=3600) |
|
|
|
|
|
|
|
|