Procházet zdrojové kódy

update gen_ai semconv for aliyun trace (#26288)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
tags/1.9.1
heyszt před 1 měsícem
rodič
revize
4da93ba579
Žádný účet není propojen s e-mailovou adresou tvůrce revize

+ 23
- 17
api/core/ops/aliyun_trace/aliyun_trace.py Zobrazit soubor

from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData, TraceMetadata from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData, TraceMetadata
from core.ops.aliyun_trace.entities.semconv import ( from core.ops.aliyun_trace.entities.semconv import (
GEN_AI_COMPLETION, GEN_AI_COMPLETION,
GEN_AI_MODEL_NAME,
GEN_AI_INPUT_MESSAGE,
GEN_AI_OUTPUT_MESSAGE,
GEN_AI_PROMPT, GEN_AI_PROMPT,
GEN_AI_PROMPT_TEMPLATE_TEMPLATE,
GEN_AI_PROMPT_TEMPLATE_VARIABLE,
GEN_AI_PROVIDER_NAME,
GEN_AI_REQUEST_MODEL,
GEN_AI_RESPONSE_FINISH_REASON, GEN_AI_RESPONSE_FINISH_REASON,
GEN_AI_SYSTEM,
GEN_AI_USAGE_INPUT_TOKENS, GEN_AI_USAGE_INPUT_TOKENS,
GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS,
GEN_AI_USAGE_TOTAL_TOKENS, GEN_AI_USAGE_TOTAL_TOKENS,
create_links_from_trace_id, create_links_from_trace_id,
create_status_from_error, create_status_from_error,
extract_retrieval_documents, extract_retrieval_documents,
format_input_messages,
format_output_messages,
format_retrieval_documents,
get_user_id_from_message_data, get_user_id_from_message_data,
get_workflow_node_status, get_workflow_node_status,
serialize_json_data, serialize_json_data,
) )
self.trace_client.add_span(message_span) self.trace_client.add_span(message_span)


app_model_config = getattr(message_data, "app_model_config", {})
pre_prompt = getattr(app_model_config, "pre_prompt", "")
inputs_data = getattr(message_data, "inputs", {})

llm_span = SpanData( llm_span = SpanData(
trace_id=trace_metadata.trace_id, trace_id=trace_metadata.trace_id,
parent_span_id=message_span_id, parent_span_id=message_span_id,
inputs=inputs_json, inputs=inputs_json,
outputs=outputs_str, outputs=outputs_str,
), ),
GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "",
GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "",
GEN_AI_REQUEST_MODEL: trace_info.metadata.get("ls_model_name") or "",
GEN_AI_PROVIDER_NAME: trace_info.metadata.get("ls_provider") or "",
GEN_AI_USAGE_INPUT_TOKENS: str(trace_info.message_tokens), GEN_AI_USAGE_INPUT_TOKENS: str(trace_info.message_tokens),
GEN_AI_USAGE_OUTPUT_TOKENS: str(trace_info.answer_tokens), GEN_AI_USAGE_OUTPUT_TOKENS: str(trace_info.answer_tokens),
GEN_AI_USAGE_TOTAL_TOKENS: str(trace_info.total_tokens), GEN_AI_USAGE_TOTAL_TOKENS: str(trace_info.total_tokens),
GEN_AI_PROMPT_TEMPLATE_VARIABLE: serialize_json_data(inputs_data),
GEN_AI_PROMPT_TEMPLATE_TEMPLATE: pre_prompt,
GEN_AI_PROMPT: inputs_json, GEN_AI_PROMPT: inputs_json,
GEN_AI_COMPLETION: outputs_str, GEN_AI_COMPLETION: outputs_str,
}, },
input_value = str(node_execution.inputs.get("query", "")) if node_execution.inputs else "" input_value = str(node_execution.inputs.get("query", "")) if node_execution.inputs else ""
output_value = serialize_json_data(node_execution.outputs.get("result", [])) if node_execution.outputs else "" output_value = serialize_json_data(node_execution.outputs.get("result", [])) if node_execution.outputs else ""


retrieval_documents = node_execution.outputs.get("result", []) if node_execution.outputs else []
semantic_retrieval_documents = format_retrieval_documents(retrieval_documents)
semantic_retrieval_documents_json = serialize_json_data(semantic_retrieval_documents)

return SpanData( return SpanData(
trace_id=trace_metadata.trace_id, trace_id=trace_metadata.trace_id,
parent_span_id=trace_metadata.workflow_span_id, parent_span_id=trace_metadata.workflow_span_id,
outputs=output_value, outputs=output_value,
), ),
RETRIEVAL_QUERY: input_value, RETRIEVAL_QUERY: input_value,
RETRIEVAL_DOCUMENT: output_value,
RETRIEVAL_DOCUMENT: semantic_retrieval_documents_json,
}, },
status=get_workflow_node_status(node_execution), status=get_workflow_node_status(node_execution),
links=trace_metadata.links, links=trace_metadata.links,
prompts_json = serialize_json_data(process_data.get("prompts", [])) prompts_json = serialize_json_data(process_data.get("prompts", []))
text_output = str(outputs.get("text", "")) text_output = str(outputs.get("text", ""))


gen_ai_input_message = format_input_messages(process_data)
gen_ai_output_message = format_output_messages(outputs)

return SpanData( return SpanData(
trace_id=trace_metadata.trace_id, trace_id=trace_metadata.trace_id,
parent_span_id=trace_metadata.workflow_span_id, parent_span_id=trace_metadata.workflow_span_id,
inputs=prompts_json, inputs=prompts_json,
outputs=text_output, outputs=text_output,
), ),
GEN_AI_MODEL_NAME: process_data.get("model_name") or "",
GEN_AI_SYSTEM: process_data.get("model_provider") or "",
GEN_AI_REQUEST_MODEL: process_data.get("model_name") or "",
GEN_AI_PROVIDER_NAME: process_data.get("model_provider") or "",
GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)), GEN_AI_USAGE_INPUT_TOKENS: str(usage_data.get("prompt_tokens", 0)),
GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)), GEN_AI_USAGE_OUTPUT_TOKENS: str(usage_data.get("completion_tokens", 0)),
GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)), GEN_AI_USAGE_TOTAL_TOKENS: str(usage_data.get("total_tokens", 0)),
GEN_AI_PROMPT: prompts_json, GEN_AI_PROMPT: prompts_json,
GEN_AI_COMPLETION: text_output, GEN_AI_COMPLETION: text_output,
GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason") or "", GEN_AI_RESPONSE_FINISH_REASON: outputs.get("finish_reason") or "",
GEN_AI_INPUT_MESSAGE: gen_ai_input_message,
GEN_AI_OUTPUT_MESSAGE: gen_ai_output_message,
}, },
status=get_workflow_node_status(node_execution), status=get_workflow_node_status(node_execution),
links=trace_metadata.links, links=trace_metadata.links,
inputs=inputs_json, inputs=inputs_json,
outputs=suggested_question_json, outputs=suggested_question_json,
), ),
GEN_AI_MODEL_NAME: trace_info.metadata.get("ls_model_name") or "",
GEN_AI_SYSTEM: trace_info.metadata.get("ls_provider") or "",
GEN_AI_REQUEST_MODEL: trace_info.metadata.get("ls_model_name") or "",
GEN_AI_PROVIDER_NAME: trace_info.metadata.get("ls_provider") or "",
GEN_AI_PROMPT: inputs_json, GEN_AI_PROMPT: inputs_json,
GEN_AI_COMPLETION: suggested_question_json, GEN_AI_COMPLETION: suggested_question_json,
}, },

+ 5
- 4
api/core/ops/aliyun_trace/entities/semconv.py Zobrazit soubor

RETRIEVAL_DOCUMENT: Final[str] = "retrieval.document" RETRIEVAL_DOCUMENT: Final[str] = "retrieval.document"


# LLM attributes # LLM attributes
GEN_AI_MODEL_NAME: Final[str] = "gen_ai.model_name"
GEN_AI_SYSTEM: Final[str] = "gen_ai.system"
GEN_AI_REQUEST_MODEL: Final[str] = "gen_ai.request.model"
GEN_AI_PROVIDER_NAME: Final[str] = "gen_ai.provider.name"
GEN_AI_USAGE_INPUT_TOKENS: Final[str] = "gen_ai.usage.input_tokens" GEN_AI_USAGE_INPUT_TOKENS: Final[str] = "gen_ai.usage.input_tokens"
GEN_AI_USAGE_OUTPUT_TOKENS: Final[str] = "gen_ai.usage.output_tokens" GEN_AI_USAGE_OUTPUT_TOKENS: Final[str] = "gen_ai.usage.output_tokens"
GEN_AI_USAGE_TOTAL_TOKENS: Final[str] = "gen_ai.usage.total_tokens" GEN_AI_USAGE_TOTAL_TOKENS: Final[str] = "gen_ai.usage.total_tokens"
GEN_AI_PROMPT_TEMPLATE_TEMPLATE: Final[str] = "gen_ai.prompt_template.template"
GEN_AI_PROMPT_TEMPLATE_VARIABLE: Final[str] = "gen_ai.prompt_template.variable"
GEN_AI_PROMPT: Final[str] = "gen_ai.prompt" GEN_AI_PROMPT: Final[str] = "gen_ai.prompt"
GEN_AI_COMPLETION: Final[str] = "gen_ai.completion" GEN_AI_COMPLETION: Final[str] = "gen_ai.completion"
GEN_AI_RESPONSE_FINISH_REASON: Final[str] = "gen_ai.response.finish_reason" GEN_AI_RESPONSE_FINISH_REASON: Final[str] = "gen_ai.response.finish_reason"


GEN_AI_INPUT_MESSAGE: Final[str] = "gen_ai.input.messages"
GEN_AI_OUTPUT_MESSAGE: Final[str] = "gen_ai.output.messages"

# Tool attributes # Tool attributes
TOOL_NAME: Final[str] = "tool.name" TOOL_NAME: Final[str] = "tool.name"
TOOL_DESCRIPTION: Final[str] = "tool.description" TOOL_DESCRIPTION: Final[str] = "tool.description"

+ 95
- 0
api/core/ops/aliyun_trace/utils.py Zobrazit soubor

import json import json
from collections.abc import Mapping
from typing import Any from typing import Any


from opentelemetry.trace import Link, Status, StatusCode from opentelemetry.trace import Link, Status, StatusCode
INPUT_VALUE: inputs, INPUT_VALUE: inputs,
OUTPUT_VALUE: outputs, OUTPUT_VALUE: outputs,
} }


def format_retrieval_documents(retrieval_documents: list) -> list:
try:
if not isinstance(retrieval_documents, list):
return []

semantic_documents = []
for doc in retrieval_documents:
if not isinstance(doc, dict):
continue

metadata = doc.get("metadata", {})
content = doc.get("content", "")
title = doc.get("title", "")
score = metadata.get("score", 0.0)
document_id = metadata.get("document_id", "")

semantic_metadata = {}
if title:
semantic_metadata["title"] = title
if metadata.get("source"):
semantic_metadata["source"] = metadata["source"]
elif metadata.get("_source"):
semantic_metadata["source"] = metadata["_source"]
if metadata.get("doc_metadata"):
doc_metadata = metadata["doc_metadata"]
if isinstance(doc_metadata, dict):
semantic_metadata.update(doc_metadata)

semantic_doc = {
"document": {"content": content, "metadata": semantic_metadata, "score": score, "id": document_id}
}
semantic_documents.append(semantic_doc)

return semantic_documents
except Exception:
return []


def format_input_messages(process_data: Mapping[str, Any]) -> str:
try:
if not isinstance(process_data, dict):
return serialize_json_data([])

prompts = process_data.get("prompts", [])
if not prompts:
return serialize_json_data([])

valid_roles = {"system", "user", "assistant", "tool"}
input_messages = []
for prompt in prompts:
if not isinstance(prompt, dict):
continue

role = prompt.get("role", "")
text = prompt.get("text", "")

if not role or role not in valid_roles:
continue

if text:
message = {"role": role, "parts": [{"type": "text", "content": text}]}
input_messages.append(message)

return serialize_json_data(input_messages)
except Exception:
return serialize_json_data([])


def format_output_messages(outputs: Mapping[str, Any]) -> str:
try:
if not isinstance(outputs, dict):
return serialize_json_data([])

text = outputs.get("text", "")
finish_reason = outputs.get("finish_reason", "")

if not text:
return serialize_json_data([])

valid_finish_reasons = {"stop", "length", "content_filter", "tool_call", "error"}
if finish_reason not in valid_finish_reasons:
finish_reason = "stop"

output_message = {
"role": "assistant",
"parts": [{"type": "text", "content": text}],
"finish_reason": finish_reason,
}

return serialize_json_data([output_message])
except Exception:
return serialize_json_data([])

Načítá se…
Zrušit
Uložit