Browse Source

fix multiple metadata filter's confusing setting (#16771)

tags/1.2.0
Jyong 7 months ago
parent
commit
6a857e01f6
No account linked to committer's email address

+ 24
- 9
api/core/rag/retrieval/dataset_retrieval.py View File

) )
if automatic_metadata_filters: if automatic_metadata_filters:
conditions = [] conditions = []
for filter in automatic_metadata_filters:
for sequence, filter in enumerate(automatic_metadata_filters):
self._process_metadata_filter_func( self._process_metadata_filter_func(
sequence,
filter.get("condition"), # type: ignore filter.get("condition"), # type: ignore
filter.get("metadata_name"), # type: ignore filter.get("metadata_name"), # type: ignore
filter.get("value"), filter.get("value"),
elif metadata_filtering_mode == "manual": elif metadata_filtering_mode == "manual":
if metadata_filtering_conditions: if metadata_filtering_conditions:
metadata_condition = MetadataCondition(**metadata_filtering_conditions.model_dump()) metadata_condition = MetadataCondition(**metadata_filtering_conditions.model_dump())
for condition in metadata_filtering_conditions.conditions: # type: ignore
for sequence, condition in enumerate(metadata_filtering_conditions.conditions): # type: ignore
metadata_name = condition.name metadata_name = condition.name
expected_value = condition.value expected_value = condition.value
if expected_value is not None or condition.comparison_operator in ("empty", "not empty"): if expected_value is not None or condition.comparison_operator in ("empty", "not empty"):
if isinstance(expected_value, str): if isinstance(expected_value, str):
expected_value = self._replace_metadata_filter_value(expected_value, inputs) expected_value = self._replace_metadata_filter_value(expected_value, inputs)
filters = self._process_metadata_filter_func( filters = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, filters
sequence,
condition.comparison_operator,
metadata_name,
expected_value,
filters,
) )
else: else:
raise ValueError("Invalid metadata filtering mode") raise ValueError("Invalid metadata filtering mode")
return None return None
return automatic_metadata_filters return automatic_metadata_filters


def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: Optional[Any], filters: list):
def _process_metadata_filter_func(
self, sequence: int, condition: str, metadata_name: str, value: Optional[Any], filters: list
):
key = f"{metadata_name}_{sequence}"
key_value = f"{metadata_name}_{sequence}_value"
match condition: match condition:
case "contains": case "contains":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}%")
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
)
) )
case "not contains": case "not contains":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key NOT LIKE :value")).params(
key=metadata_name, value=f"%{value}%"
(text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
) )
) )
case "start with": case "start with":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"{value}%")
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"{value}%"}
)
) )


case "end with": case "end with":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}")
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}"}
)
) )
case "is" | "=": case "is" | "=":
if isinstance(value, str): if isinstance(value, str):

+ 28
- 13
api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py View File

automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data) automatic_metadata_filters = self._automatic_metadata_filter_func(dataset_ids, query, node_data)
if automatic_metadata_filters: if automatic_metadata_filters:
conditions = [] conditions = []
for filter in automatic_metadata_filters:
for sequence, filter in enumerate(automatic_metadata_filters):
self._process_metadata_filter_func( self._process_metadata_filter_func(
sequence,
filter.get("condition", ""), filter.get("condition", ""),
filter.get("metadata_name", ""), filter.get("metadata_name", ""),
filter.get("value"), filter.get("value"),
if node_data.metadata_filtering_conditions: if node_data.metadata_filtering_conditions:
metadata_condition = MetadataCondition(**node_data.metadata_filtering_conditions.model_dump()) metadata_condition = MetadataCondition(**node_data.metadata_filtering_conditions.model_dump())
if node_data.metadata_filtering_conditions: if node_data.metadata_filtering_conditions:
for condition in node_data.metadata_filtering_conditions.conditions: # type: ignore
for sequence, condition in enumerate(node_data.metadata_filtering_conditions.conditions): # type: ignore
metadata_name = condition.name metadata_name = condition.name
expected_value = condition.value expected_value = condition.value
if expected_value is not None or condition.comparison_operator in ("empty", "not empty"): if expected_value is not None or condition.comparison_operator in ("empty", "not empty"):
expected_value = self.graph_runtime_state.variable_pool.convert_template( expected_value = self.graph_runtime_state.variable_pool.convert_template(
expected_value expected_value
).value[0] ).value[0]
if expected_value.value_type == "number":
expected_value = expected_value.value
elif expected_value.value_type == "string":
expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip()
if expected_value.value_type == "number": # type: ignore
expected_value = expected_value.value # type: ignore
elif expected_value.value_type == "string": # type: ignore
expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore
else: else:
raise ValueError("Invalid expected metadata value type") raise ValueError("Invalid expected metadata value type")
filters = self._process_metadata_filter_func( filters = self._process_metadata_filter_func(
condition.comparison_operator, metadata_name, expected_value, filters
sequence,
condition.comparison_operator,
metadata_name,
expected_value,
filters,
) )
else: else:
raise ValueError("Invalid metadata filtering mode") raise ValueError("Invalid metadata filtering mode")
return [] return []
return automatic_metadata_filters return automatic_metadata_filters


def _process_metadata_filter_func(self, condition: str, metadata_name: str, value: Optional[str], filters: list):
def _process_metadata_filter_func(
self, sequence: int, condition: str, metadata_name: str, value: Optional[Any], filters: list
):
key = f"{metadata_name}_{sequence}"
key_value = f"{metadata_name}_{sequence}_value"
match condition: match condition:
case "contains": case "contains":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}%")
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
)
) )
case "not contains": case "not contains":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key NOT LIKE :value")).params(
key=metadata_name, value=f"%{value}%"
(text(f"documents.doc_metadata ->> :{key} NOT LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}%"}
) )
) )
case "start with": case "start with":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"{value}%")
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"{value}%"}
)
) )
case "end with": case "end with":
filters.append( filters.append(
(text("documents.doc_metadata ->> :key LIKE :value")).params(key=metadata_name, value=f"%{value}")
(text(f"documents.doc_metadata ->> :{key} LIKE :{key_value}")).params(
**{key: metadata_name, key_value: f"%{value}"}
)
) )
case "=" | "is": case "=" | "is":
if isinstance(value, str): if isinstance(value, str):

Loading…
Cancel
Save