| 
                        123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 | 
                        - import json
 - from collections.abc import Mapping
 - from typing import Any
 - 
 - from opentelemetry.trace import Link, Status, StatusCode
 - 
 - from core.ops.aliyun_trace.entities.semconv import (
 -     GEN_AI_FRAMEWORK,
 -     GEN_AI_SESSION_ID,
 -     GEN_AI_SPAN_KIND,
 -     GEN_AI_USER_ID,
 -     INPUT_VALUE,
 -     OUTPUT_VALUE,
 -     GenAISpanKind,
 - )
 - from core.rag.models.document import Document
 - from core.workflow.entities import WorkflowNodeExecution
 - from core.workflow.enums import WorkflowNodeExecutionStatus
 - from extensions.ext_database import db
 - from models import EndUser
 - 
 - # Constants
 - DEFAULT_JSON_ENSURE_ASCII = False
 - DEFAULT_FRAMEWORK_NAME = "dify"
 - 
 - 
 - def get_user_id_from_message_data(message_data) -> str:
 -     user_id = message_data.from_account_id
 -     if message_data.from_end_user_id:
 -         end_user_data: EndUser | None = (
 -             db.session.query(EndUser).where(EndUser.id == message_data.from_end_user_id).first()
 -         )
 -         if end_user_data is not None:
 -             user_id = end_user_data.session_id
 -     return user_id
 - 
 - 
 - def create_status_from_error(error: str | None) -> Status:
 -     if error:
 -         return Status(StatusCode.ERROR, error)
 -     return Status(StatusCode.OK)
 - 
 - 
 - def get_workflow_node_status(node_execution: WorkflowNodeExecution) -> Status:
 -     if node_execution.status == WorkflowNodeExecutionStatus.SUCCEEDED:
 -         return Status(StatusCode.OK)
 -     if node_execution.status in [WorkflowNodeExecutionStatus.FAILED, WorkflowNodeExecutionStatus.EXCEPTION]:
 -         return Status(StatusCode.ERROR, str(node_execution.error))
 -     return Status(StatusCode.UNSET)
 - 
 - 
 - def create_links_from_trace_id(trace_id: str | None) -> list[Link]:
 -     from core.ops.aliyun_trace.data_exporter.traceclient import create_link
 - 
 -     links = []
 -     if trace_id:
 -         links.append(create_link(trace_id_str=trace_id))
 -     return links
 - 
 - 
 - def extract_retrieval_documents(documents: list[Document]) -> list[dict[str, Any]]:
 -     documents_data = []
 -     for document in documents:
 -         document_data = {
 -             "content": document.page_content,
 -             "metadata": {
 -                 "dataset_id": document.metadata.get("dataset_id"),
 -                 "doc_id": document.metadata.get("doc_id"),
 -                 "document_id": document.metadata.get("document_id"),
 -             },
 -             "score": document.metadata.get("score"),
 -         }
 -         documents_data.append(document_data)
 -     return documents_data
 - 
 - 
 - def serialize_json_data(data: Any, ensure_ascii: bool = DEFAULT_JSON_ENSURE_ASCII) -> str:
 -     return json.dumps(data, ensure_ascii=ensure_ascii)
 - 
 - 
 - def create_common_span_attributes(
 -     session_id: str = "",
 -     user_id: str = "",
 -     span_kind: str = GenAISpanKind.CHAIN,
 -     framework: str = DEFAULT_FRAMEWORK_NAME,
 -     inputs: str = "",
 -     outputs: str = "",
 - ) -> dict[str, Any]:
 -     return {
 -         GEN_AI_SESSION_ID: session_id,
 -         GEN_AI_USER_ID: user_id,
 -         GEN_AI_SPAN_KIND: span_kind,
 -         GEN_AI_FRAMEWORK: framework,
 -         INPUT_VALUE: inputs,
 -         OUTPUT_VALUE: outputs,
 -     }
 - 
 - 
 - def format_retrieval_documents(retrieval_documents: list) -> list:
 -     try:
 -         if not isinstance(retrieval_documents, list):
 -             return []
 - 
 -         semantic_documents = []
 -         for doc in retrieval_documents:
 -             if not isinstance(doc, dict):
 -                 continue
 - 
 -             metadata = doc.get("metadata", {})
 -             content = doc.get("content", "")
 -             title = doc.get("title", "")
 -             score = metadata.get("score", 0.0)
 -             document_id = metadata.get("document_id", "")
 - 
 -             semantic_metadata = {}
 -             if title:
 -                 semantic_metadata["title"] = title
 -             if metadata.get("source"):
 -                 semantic_metadata["source"] = metadata["source"]
 -             elif metadata.get("_source"):
 -                 semantic_metadata["source"] = metadata["_source"]
 -             if metadata.get("doc_metadata"):
 -                 doc_metadata = metadata["doc_metadata"]
 -                 if isinstance(doc_metadata, dict):
 -                     semantic_metadata.update(doc_metadata)
 - 
 -             semantic_doc = {
 -                 "document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id}
 -             }
 -             semantic_documents.append(semantic_doc)
 - 
 -         return semantic_documents
 -     except Exception:
 -         return []
 - 
 - 
 - def format_input_messages(process_data: Mapping[str, Any]) -> str:
 -     try:
 -         if not isinstance(process_data, dict):
 -             return serialize_json_data([])
 - 
 -         prompts = process_data.get("prompts", [])
 -         if not prompts:
 -             return serialize_json_data([])
 - 
 -         valid_roles = {"system", "user", "assistant", "tool"}
 -         input_messages = []
 -         for prompt in prompts:
 -             if not isinstance(prompt, dict):
 -                 continue
 - 
 -             role = prompt.get("role", "")
 -             text = prompt.get("text", "")
 - 
 -             if not role or role not in valid_roles:
 -                 continue
 - 
 -             if text:
 -                 message = {"role": role, "parts": [{"type": "text", "content": text}]}
 -                 input_messages.append(message)
 - 
 -         return serialize_json_data(input_messages)
 -     except Exception:
 -         return serialize_json_data([])
 - 
 - 
 - def format_output_messages(outputs: Mapping[str, Any]) -> str:
 -     try:
 -         if not isinstance(outputs, dict):
 -             return serialize_json_data([])
 - 
 -         text = outputs.get("text", "")
 -         finish_reason = outputs.get("finish_reason", "")
 - 
 -         if not text:
 -             return serialize_json_data([])
 - 
 -         valid_finish_reasons = {"stop", "length", "content_filter", "tool_call", "error"}
 -         if finish_reason not in valid_finish_reasons:
 -             finish_reason = "stop"
 - 
 -         output_message = {
 -             "role": "assistant",
 -             "parts": [{"type": "text", "content": text}],
 -             "finish_reason": finish_reason,
 -         }
 - 
 -         return serialize_json_data([output_message])
 -     except Exception:
 -         return serialize_json_data([])
 
 
  |