| def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: | def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: | ||||
| top_k = kwargs.get("top_k", 4) | top_k = kwargs.get("top_k", 4) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| score_threshold = float(kwargs.get("score_threshold") or 0.0) | score_threshold = float(kwargs.get("score_threshold") or 0.0) | ||||
| with self._get_cursor() as cur: | with self._get_cursor() as cur: | ||||
| query_vector_str = json.dumps(query_vector) | query_vector_str = json.dumps(query_vector) | ||||
| def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: | def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: | ||||
| top_k = kwargs.get("top_k", 4) | top_k = kwargs.get("top_k", 4) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| with self._get_cursor() as cur: | with self._get_cursor() as cur: | ||||
| cur.execute( | cur.execute( | ||||
| f"""SELECT id, vector, page_content, metadata_, | f"""SELECT id, vector, page_content, metadata_, |
| def _search(self, dist: str, order: SortOrder, **kwargs: Any) -> list[Document]: | def _search(self, dist: str, order: SortOrder, **kwargs: Any) -> list[Document]: | ||||
| top_k = kwargs.get("top_k", 4) | top_k = kwargs.get("top_k", 4) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| score_threshold = float(kwargs.get("score_threshold") or 0.0) | score_threshold = float(kwargs.get("score_threshold") or 0.0) | ||||
| where_str = ( | where_str = ( | ||||
| f"WHERE dist < {1 - score_threshold}" | f"WHERE dist < {1 - score_threshold}" |
| :return: List of Documents that are nearest to the query vector. | :return: List of Documents that are nearest to the query vector. | ||||
| """ | """ | ||||
| top_k = kwargs.get("top_k", 4) | top_k = kwargs.get("top_k", 4) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| with self._get_cursor() as cur: | with self._get_cursor() as cur: | ||||
| cur.execute( | cur.execute( | ||||
| f"SELECT meta, text, embedding <=> %s AS distance FROM {self.table_name}" | f"SELECT meta, text, embedding <=> %s AS distance FROM {self.table_name}" | ||||
| def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: | def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: | ||||
| top_k = kwargs.get("top_k", 5) | top_k = kwargs.get("top_k", 5) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| with self._get_cursor() as cur: | with self._get_cursor() as cur: | ||||
| cur.execute( | cur.execute( | ||||
| f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), plainto_tsquery(%s)) AS score | f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), plainto_tsquery(%s)) AS score |
| :return: List of Documents that are nearest to the query vector. | :return: List of Documents that are nearest to the query vector. | ||||
| """ | """ | ||||
| top_k = kwargs.get("top_k", 4) | top_k = kwargs.get("top_k", 4) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| with self._get_cursor() as cur: | with self._get_cursor() as cur: | ||||
| cur.execute( | cur.execute( | ||||
| def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: | def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: | ||||
| top_k = kwargs.get("top_k", 5) | top_k = kwargs.get("top_k", 5) | ||||
| if not isinstance(top_k, int) or top_k <= 0: | |||||
| raise ValueError("top_k must be a positive integer") | |||||
| with self._get_cursor() as cur: | with self._get_cursor() as cur: | ||||
| if self.pg_bigm: | if self.pg_bigm: | ||||
| cur.execute("SET pg_bigm.similarity_limit TO 0.000001") | cur.execute("SET pg_bigm.similarity_limit TO 0.000001") |