Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

workflow_draft_variable_service.py 42KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056
  1. import dataclasses
  2. import json
  3. import logging
  4. from collections.abc import Mapping, Sequence
  5. from concurrent.futures import ThreadPoolExecutor
  6. from enum import StrEnum
  7. from typing import Any, ClassVar
  8. from sqlalchemy import Engine, orm, select
  9. from sqlalchemy.dialects.postgresql import insert
  10. from sqlalchemy.orm import Session, sessionmaker
  11. from sqlalchemy.sql.expression import and_, or_
  12. from configs import dify_config
  13. from core.app.entities.app_invoke_entities import InvokeFrom
  14. from core.file.models import File
  15. from core.variables import Segment, StringSegment, Variable
  16. from core.variables.consts import SELECTORS_LENGTH
  17. from core.variables.segments import (
  18. ArrayFileSegment,
  19. FileSegment,
  20. )
  21. from core.variables.types import SegmentType
  22. from core.variables.utils import dumps_with_segments
  23. from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID
  24. from core.workflow.enums import SystemVariableKey
  25. from core.workflow.nodes import NodeType
  26. from core.workflow.nodes.variable_assigner.common.helpers import get_updated_variables
  27. from core.workflow.variable_loader import VariableLoader
  28. from extensions.ext_storage import storage
  29. from factories.file_factory import StorageKeyLoader
  30. from factories.variable_factory import build_segment, segment_to_variable
  31. from libs.datetime_utils import naive_utc_now
  32. from libs.uuid_utils import uuidv7
  33. from models import App, Conversation
  34. from models.account import Account
  35. from models.enums import DraftVariableType
  36. from models.workflow import Workflow, WorkflowDraftVariable, WorkflowDraftVariableFile, is_system_variable_editable
  37. from repositories.factory import DifyAPIRepositoryFactory
  38. from services.file_service import FileService
  39. from services.variable_truncator import VariableTruncator
  40. logger = logging.getLogger(__name__)
  41. @dataclasses.dataclass(frozen=True)
  42. class WorkflowDraftVariableList:
  43. variables: list[WorkflowDraftVariable]
  44. total: int | None = None
  45. @dataclasses.dataclass(frozen=True)
  46. class DraftVarFileDeletion:
  47. draft_var_id: str
  48. draft_var_file_id: str
  49. class WorkflowDraftVariableError(Exception):
  50. pass
  51. class VariableResetError(WorkflowDraftVariableError):
  52. pass
  53. class UpdateNotSupportedError(WorkflowDraftVariableError):
  54. pass
  55. class DraftVarLoader(VariableLoader):
  56. # This implements the VariableLoader interface for loading draft variables.
  57. #
  58. # ref: core.workflow.variable_loader.VariableLoader
  59. # Database engine used for loading variables.
  60. _engine: Engine
  61. # Application ID for which variables are being loaded.
  62. _app_id: str
  63. _tenant_id: str
  64. _fallback_variables: Sequence[Variable]
  65. def __init__(
  66. self,
  67. engine: Engine,
  68. app_id: str,
  69. tenant_id: str,
  70. fallback_variables: Sequence[Variable] | None = None,
  71. ):
  72. self._engine = engine
  73. self._app_id = app_id
  74. self._tenant_id = tenant_id
  75. self._fallback_variables = fallback_variables or []
  76. def _selector_to_tuple(self, selector: Sequence[str]) -> tuple[str, str]:
  77. return (selector[0], selector[1])
  78. def load_variables(self, selectors: list[list[str]]) -> list[Variable]:
  79. if not selectors:
  80. return []
  81. # Map each selector (as a tuple via `_selector_to_tuple`) to its corresponding Variable instance.
  82. variable_by_selector: dict[tuple[str, str], Variable] = {}
  83. with Session(bind=self._engine, expire_on_commit=False) as session:
  84. srv = WorkflowDraftVariableService(session)
  85. draft_vars = srv.get_draft_variables_by_selectors(self._app_id, selectors)
  86. # Important:
  87. files: list[File] = []
  88. # FileSegment and ArrayFileSegment are not subject to offloading, so their values
  89. # can be safely accessed before any offloading logic is applied.
  90. for draft_var in draft_vars:
  91. value = draft_var.get_value()
  92. if isinstance(value, FileSegment):
  93. files.append(value.value)
  94. elif isinstance(value, ArrayFileSegment):
  95. files.extend(value.value)
  96. with Session(bind=self._engine) as session:
  97. storage_key_loader = StorageKeyLoader(session, tenant_id=self._tenant_id)
  98. storage_key_loader.load_storage_keys(files)
  99. offloaded_draft_vars = []
  100. for draft_var in draft_vars:
  101. if draft_var.is_truncated():
  102. offloaded_draft_vars.append(draft_var)
  103. continue
  104. segment = draft_var.get_value()
  105. variable = segment_to_variable(
  106. segment=segment,
  107. selector=draft_var.get_selector(),
  108. id=draft_var.id,
  109. name=draft_var.name,
  110. description=draft_var.description,
  111. )
  112. selector_tuple = self._selector_to_tuple(variable.selector)
  113. variable_by_selector[selector_tuple] = variable
  114. # Load offloaded variables using multithreading.
  115. # This approach reduces loading time by querying external systems concurrently.
  116. with ThreadPoolExecutor(max_workers=10) as executor:
  117. offloaded_variables = executor.map(self._load_offloaded_variable, offloaded_draft_vars)
  118. for selector, variable in offloaded_variables:
  119. variable_by_selector[selector] = variable
  120. return list(variable_by_selector.values())
  121. def _load_offloaded_variable(self, draft_var: WorkflowDraftVariable) -> tuple[tuple[str, str], Variable]:
  122. # This logic is closely tied to `WorkflowDraftVaribleService._try_offload_large_variable`
  123. # and must remain synchronized with it.
  124. # Ideally, these should be co-located for better maintainability.
  125. # However, due to the current code structure, this is not straightforward.
  126. variable_file = draft_var.variable_file
  127. assert variable_file is not None
  128. upload_file = variable_file.upload_file
  129. assert upload_file is not None
  130. content = storage.load(upload_file.key)
  131. if variable_file.value_type == SegmentType.STRING:
  132. # The inferenced type is StringSegment, which is not correct inside this function.
  133. segment: Segment = StringSegment(value=content.decode())
  134. variable = segment_to_variable(
  135. segment=segment,
  136. selector=draft_var.get_selector(),
  137. id=draft_var.id,
  138. name=draft_var.name,
  139. description=draft_var.description,
  140. )
  141. return (draft_var.node_id, draft_var.name), variable
  142. deserialized = json.loads(content)
  143. segment = WorkflowDraftVariable.build_segment_with_type(variable_file.value_type, deserialized)
  144. variable = segment_to_variable(
  145. segment=segment,
  146. selector=draft_var.get_selector(),
  147. id=draft_var.id,
  148. name=draft_var.name,
  149. description=draft_var.description,
  150. )
  151. # No special handling needed for ArrayFileSegment, as we do not offload ArrayFileSegment
  152. return (draft_var.node_id, draft_var.name), variable
  153. class WorkflowDraftVariableService:
  154. _session: Session
  155. def __init__(self, session: Session):
  156. """
  157. Initialize the WorkflowDraftVariableService with a SQLAlchemy session.
  158. Args:
  159. session (Session): The SQLAlchemy session used to execute database queries.
  160. The provided session must be bound to an `Engine` object, not a specific `Connection`.
  161. Raises:
  162. AssertionError: If the provided session is not bound to an `Engine` object.
  163. """
  164. self._session = session
  165. engine = session.get_bind()
  166. # Ensure the session is bound to a engine.
  167. assert isinstance(engine, Engine)
  168. session_maker = sessionmaker(bind=engine, expire_on_commit=False)
  169. self._api_node_execution_repo = DifyAPIRepositoryFactory.create_api_workflow_node_execution_repository(
  170. session_maker
  171. )
  172. def get_variable(self, variable_id: str) -> WorkflowDraftVariable | None:
  173. return (
  174. self._session.query(WorkflowDraftVariable)
  175. .options(orm.selectinload(WorkflowDraftVariable.variable_file))
  176. .where(WorkflowDraftVariable.id == variable_id)
  177. .first()
  178. )
  179. def get_draft_variables_by_selectors(
  180. self,
  181. app_id: str,
  182. selectors: Sequence[list[str]],
  183. ) -> list[WorkflowDraftVariable]:
  184. """
  185. Retrieve WorkflowDraftVariable instances based on app_id and selectors.
  186. The returned WorkflowDraftVariable objects are guaranteed to have their
  187. associated variable_file and variable_file.upload_file relationships preloaded.
  188. """
  189. ors = []
  190. for selector in selectors:
  191. assert len(selector) >= SELECTORS_LENGTH, f"Invalid selector to get: {selector}"
  192. node_id, name = selector[:2]
  193. ors.append(and_(WorkflowDraftVariable.node_id == node_id, WorkflowDraftVariable.name == name))
  194. # NOTE(QuantumGhost): Although the number of `or` expressions may be large, as long as
  195. # each expression includes conditions on both `node_id` and `name` (which are covered by the unique index),
  196. # PostgreSQL can efficiently retrieve the results using a bitmap index scan.
  197. #
  198. # Alternatively, a `SELECT` statement could be constructed for each selector and
  199. # combined using `UNION` to fetch all rows.
  200. # Benchmarking indicates that both approaches yield comparable performance.
  201. variables = (
  202. self._session.query(WorkflowDraftVariable)
  203. .options(
  204. orm.selectinload(WorkflowDraftVariable.variable_file).selectinload(
  205. WorkflowDraftVariableFile.upload_file
  206. )
  207. )
  208. .where(WorkflowDraftVariable.app_id == app_id, or_(*ors))
  209. .all()
  210. )
  211. return variables
  212. def list_variables_without_values(self, app_id: str, page: int, limit: int) -> WorkflowDraftVariableList:
  213. criteria = WorkflowDraftVariable.app_id == app_id
  214. total = None
  215. query = self._session.query(WorkflowDraftVariable).where(criteria)
  216. if page == 1:
  217. total = query.count()
  218. variables = (
  219. # Do not load the `value` field
  220. query.options(
  221. orm.defer(WorkflowDraftVariable.value, raiseload=True),
  222. )
  223. .order_by(WorkflowDraftVariable.created_at.desc())
  224. .limit(limit)
  225. .offset((page - 1) * limit)
  226. .all()
  227. )
  228. return WorkflowDraftVariableList(variables=variables, total=total)
  229. def _list_node_variables(self, app_id: str, node_id: str) -> WorkflowDraftVariableList:
  230. criteria = (
  231. WorkflowDraftVariable.app_id == app_id,
  232. WorkflowDraftVariable.node_id == node_id,
  233. )
  234. query = self._session.query(WorkflowDraftVariable).where(*criteria)
  235. variables = (
  236. query.options(orm.selectinload(WorkflowDraftVariable.variable_file))
  237. .order_by(WorkflowDraftVariable.created_at.desc())
  238. .all()
  239. )
  240. return WorkflowDraftVariableList(variables=variables)
  241. def list_node_variables(self, app_id: str, node_id: str) -> WorkflowDraftVariableList:
  242. return self._list_node_variables(app_id, node_id)
  243. def list_conversation_variables(self, app_id: str) -> WorkflowDraftVariableList:
  244. return self._list_node_variables(app_id, CONVERSATION_VARIABLE_NODE_ID)
  245. def list_system_variables(self, app_id: str) -> WorkflowDraftVariableList:
  246. return self._list_node_variables(app_id, SYSTEM_VARIABLE_NODE_ID)
  247. def get_conversation_variable(self, app_id: str, name: str) -> WorkflowDraftVariable | None:
  248. return self._get_variable(app_id=app_id, node_id=CONVERSATION_VARIABLE_NODE_ID, name=name)
  249. def get_system_variable(self, app_id: str, name: str) -> WorkflowDraftVariable | None:
  250. return self._get_variable(app_id=app_id, node_id=SYSTEM_VARIABLE_NODE_ID, name=name)
  251. def get_node_variable(self, app_id: str, node_id: str, name: str) -> WorkflowDraftVariable | None:
  252. return self._get_variable(app_id, node_id, name)
  253. def _get_variable(self, app_id: str, node_id: str, name: str) -> WorkflowDraftVariable | None:
  254. variable = (
  255. self._session.query(WorkflowDraftVariable)
  256. .options(orm.selectinload(WorkflowDraftVariable.variable_file))
  257. .where(
  258. WorkflowDraftVariable.app_id == app_id,
  259. WorkflowDraftVariable.node_id == node_id,
  260. WorkflowDraftVariable.name == name,
  261. )
  262. .first()
  263. )
  264. return variable
  265. def update_variable(
  266. self,
  267. variable: WorkflowDraftVariable,
  268. name: str | None = None,
  269. value: Segment | None = None,
  270. ) -> WorkflowDraftVariable:
  271. if not variable.editable:
  272. raise UpdateNotSupportedError(f"variable not support updating, id={variable.id}")
  273. if name is not None:
  274. variable.set_name(name)
  275. if value is not None:
  276. variable.set_value(value)
  277. variable.last_edited_at = naive_utc_now()
  278. self._session.flush()
  279. return variable
  280. def _reset_conv_var(self, workflow: Workflow, variable: WorkflowDraftVariable) -> WorkflowDraftVariable | None:
  281. conv_var_by_name = {i.name: i for i in workflow.conversation_variables}
  282. conv_var = conv_var_by_name.get(variable.name)
  283. if conv_var is None:
  284. self._session.delete(instance=variable)
  285. self._session.flush()
  286. logger.warning(
  287. "Conversation variable not found for draft variable, id=%s, name=%s", variable.id, variable.name
  288. )
  289. return None
  290. variable.set_value(conv_var)
  291. variable.last_edited_at = None
  292. self._session.add(variable)
  293. self._session.flush()
  294. return variable
  295. def _reset_node_var_or_sys_var(
  296. self, workflow: Workflow, variable: WorkflowDraftVariable
  297. ) -> WorkflowDraftVariable | None:
  298. # If a variable does not allow updating, it makes no sense to reset it.
  299. if not variable.editable:
  300. return variable
  301. # No execution record for this variable, delete the variable instead.
  302. if variable.node_execution_id is None:
  303. self._session.delete(instance=variable)
  304. self._session.flush()
  305. logger.warning("draft variable has no node_execution_id, id=%s, name=%s", variable.id, variable.name)
  306. return None
  307. node_exec = self._api_node_execution_repo.get_execution_by_id(variable.node_execution_id)
  308. if node_exec is None:
  309. logger.warning(
  310. "Node exectution not found for draft variable, id=%s, name=%s, node_execution_id=%s",
  311. variable.id,
  312. variable.name,
  313. variable.node_execution_id,
  314. )
  315. self._session.delete(instance=variable)
  316. self._session.flush()
  317. return None
  318. outputs_dict = node_exec.load_full_outputs(self._session, storage) or {}
  319. # a sentinel value used to check the absent of the output variable key.
  320. absent = object()
  321. if variable.get_variable_type() == DraftVariableType.NODE:
  322. # Get node type for proper value extraction
  323. node_config = workflow.get_node_config_by_id(variable.node_id)
  324. node_type = workflow.get_node_type_from_node_config(node_config)
  325. # Note: Based on the implementation in `_build_from_variable_assigner_mapping`,
  326. # VariableAssignerNode (both v1 and v2) can only create conversation draft variables.
  327. # For consistency, we should simply return when processing VARIABLE_ASSIGNER nodes.
  328. #
  329. # This implementation must remain synchronized with the `_build_from_variable_assigner_mapping`
  330. # and `save` methods.
  331. if node_type == NodeType.VARIABLE_ASSIGNER:
  332. return variable
  333. output_value = outputs_dict.get(variable.name, absent)
  334. else:
  335. output_value = outputs_dict.get(f"sys.{variable.name}", absent)
  336. # We cannot use `is None` to check the existence of an output variable here as
  337. # the value of the output may be `None`.
  338. if output_value is absent:
  339. # If variable not found in execution data, delete the variable
  340. self._session.delete(instance=variable)
  341. self._session.flush()
  342. return None
  343. value_seg = WorkflowDraftVariable.build_segment_with_type(variable.value_type, output_value)
  344. # Extract variable value using unified logic
  345. variable.set_value(value_seg)
  346. variable.last_edited_at = None # Reset to indicate this is a reset operation
  347. self._session.flush()
  348. return variable
  349. def reset_variable(self, workflow: Workflow, variable: WorkflowDraftVariable) -> WorkflowDraftVariable | None:
  350. variable_type = variable.get_variable_type()
  351. if variable_type == DraftVariableType.SYS and not is_system_variable_editable(variable.name):
  352. raise VariableResetError(f"cannot reset system variable, variable_id={variable.id}")
  353. if variable_type == DraftVariableType.CONVERSATION:
  354. return self._reset_conv_var(workflow, variable)
  355. else:
  356. return self._reset_node_var_or_sys_var(workflow, variable)
  357. def delete_variable(self, variable: WorkflowDraftVariable):
  358. if not variable.is_truncated():
  359. self._session.delete(variable)
  360. return
  361. variable_query = (
  362. select(WorkflowDraftVariable)
  363. .options(
  364. orm.selectinload(WorkflowDraftVariable.variable_file).selectinload(
  365. WorkflowDraftVariableFile.upload_file
  366. ),
  367. )
  368. .where(WorkflowDraftVariable.id == variable.id)
  369. )
  370. variable_reloaded = self._session.execute(variable_query).scalars().first()
  371. if variable_reloaded is None:
  372. logger.warning("Associated WorkflowDraftVariable not found, draft_var_id=%s", variable.id)
  373. self._session.delete(variable)
  374. return
  375. variable_file = variable_reloaded.variable_file
  376. if variable_file is None:
  377. logger.warning(
  378. "Associated WorkflowDraftVariableFile not found, draft_var_id=%s, file_id=%s",
  379. variable_reloaded.id,
  380. variable_reloaded.file_id,
  381. )
  382. self._session.delete(variable)
  383. return
  384. upload_file = variable_file.upload_file
  385. if upload_file is None:
  386. logger.warning(
  387. "Associated UploadFile not found, draft_var_id=%s, file_id=%s, upload_file_id=%s",
  388. variable_reloaded.id,
  389. variable_reloaded.file_id,
  390. variable_file.upload_file_id,
  391. )
  392. self._session.delete(variable)
  393. self._session.delete(variable_file)
  394. return
  395. storage.delete(upload_file.key)
  396. self._session.delete(upload_file)
  397. self._session.delete(upload_file)
  398. self._session.delete(variable)
  399. def delete_workflow_variables(self, app_id: str):
  400. (
  401. self._session.query(WorkflowDraftVariable)
  402. .where(WorkflowDraftVariable.app_id == app_id)
  403. .delete(synchronize_session=False)
  404. )
  405. def delete_workflow_draft_variable_file(self, deletions: list[DraftVarFileDeletion]):
  406. variable_files_query = (
  407. select(WorkflowDraftVariableFile)
  408. .options(orm.selectinload(WorkflowDraftVariableFile.upload_file))
  409. .where(WorkflowDraftVariableFile.id.in_([i.draft_var_file_id for i in deletions]))
  410. )
  411. variable_files = self._session.execute(variable_files_query).scalars().all()
  412. variable_files_by_id = {i.id: i for i in variable_files}
  413. for i in deletions:
  414. variable_file = variable_files_by_id.get(i.draft_var_file_id)
  415. if variable_file is None:
  416. logger.warning(
  417. "Associated WorkflowDraftVariableFile not found, draft_var_id=%s, file_id=%s",
  418. i.draft_var_id,
  419. i.draft_var_file_id,
  420. )
  421. continue
  422. upload_file = variable_file.upload_file
  423. if upload_file is None:
  424. logger.warning(
  425. "Associated UploadFile not found, draft_var_id=%s, file_id=%s, upload_file_id=%s",
  426. i.draft_var_id,
  427. i.draft_var_file_id,
  428. variable_file.upload_file_id,
  429. )
  430. self._session.delete(variable_file)
  431. else:
  432. storage.delete(upload_file.key)
  433. self._session.delete(upload_file)
  434. self._session.delete(variable_file)
  435. def delete_node_variables(self, app_id: str, node_id: str):
  436. return self._delete_node_variables(app_id, node_id)
  437. def _delete_node_variables(self, app_id: str, node_id: str):
  438. self._session.query(WorkflowDraftVariable).where(
  439. WorkflowDraftVariable.app_id == app_id,
  440. WorkflowDraftVariable.node_id == node_id,
  441. ).delete()
  442. def _get_conversation_id_from_draft_variable(self, app_id: str) -> str | None:
  443. draft_var = self._get_variable(
  444. app_id=app_id,
  445. node_id=SYSTEM_VARIABLE_NODE_ID,
  446. name=str(SystemVariableKey.CONVERSATION_ID),
  447. )
  448. if draft_var is None:
  449. return None
  450. segment = draft_var.get_value()
  451. if not isinstance(segment, StringSegment):
  452. logger.warning(
  453. "sys.conversation_id variable is not a string: app_id=%s, id=%s",
  454. app_id,
  455. draft_var.id,
  456. )
  457. return None
  458. return segment.value
  459. def get_or_create_conversation(
  460. self,
  461. account_id: str,
  462. app: App,
  463. workflow: Workflow,
  464. ) -> str:
  465. """
  466. get_or_create_conversation creates and returns the ID of a conversation for debugging.
  467. If a conversation already exists, as determined by the following criteria, its ID is returned:
  468. - The system variable `sys.conversation_id` exists in the draft variable table, and
  469. - A corresponding conversation record is found in the database.
  470. If no such conversation exists, a new conversation is created and its ID is returned.
  471. """
  472. conv_id = self._get_conversation_id_from_draft_variable(workflow.app_id)
  473. if conv_id is not None:
  474. conversation = (
  475. self._session.query(Conversation)
  476. .where(
  477. Conversation.id == conv_id,
  478. Conversation.app_id == workflow.app_id,
  479. )
  480. .first()
  481. )
  482. # Only return the conversation ID if it exists and is valid (has a correspond conversation record in DB).
  483. if conversation is not None:
  484. return conv_id
  485. conversation = Conversation(
  486. app_id=workflow.app_id,
  487. app_model_config_id=app.app_model_config_id,
  488. model_provider=None,
  489. model_id="",
  490. override_model_configs=None,
  491. mode=app.mode,
  492. name="Draft Debugging Conversation",
  493. inputs={},
  494. introduction="",
  495. system_instruction="",
  496. system_instruction_tokens=0,
  497. status="normal",
  498. invoke_from=InvokeFrom.DEBUGGER.value,
  499. from_source="console",
  500. from_end_user_id=None,
  501. from_account_id=account_id,
  502. )
  503. self._session.add(conversation)
  504. self._session.flush()
  505. return conversation.id
  506. def prefill_conversation_variable_default_values(self, workflow: Workflow):
  507. """"""
  508. draft_conv_vars: list[WorkflowDraftVariable] = []
  509. for conv_var in workflow.conversation_variables:
  510. draft_var = WorkflowDraftVariable.new_conversation_variable(
  511. app_id=workflow.app_id,
  512. name=conv_var.name,
  513. value=conv_var,
  514. description=conv_var.description,
  515. )
  516. draft_conv_vars.append(draft_var)
  517. _batch_upsert_draft_variable(
  518. self._session,
  519. draft_conv_vars,
  520. policy=_UpsertPolicy.IGNORE,
  521. )
  522. class _UpsertPolicy(StrEnum):
  523. IGNORE = "ignore"
  524. OVERWRITE = "overwrite"
  525. def _batch_upsert_draft_variable(
  526. session: Session,
  527. draft_vars: Sequence[WorkflowDraftVariable],
  528. policy: _UpsertPolicy = _UpsertPolicy.OVERWRITE,
  529. ):
  530. if not draft_vars:
  531. return None
  532. # Although we could use SQLAlchemy ORM operations here, we choose not to for several reasons:
  533. #
  534. # 1. The variable saving process involves writing multiple rows to the
  535. # `workflow_draft_variables` table. Batch insertion significantly improves performance.
  536. # 2. Using the ORM would require either:
  537. #
  538. # a. Checking for the existence of each variable before insertion,
  539. # resulting in 2n SQL statements for n variables and potential concurrency issues.
  540. # b. Attempting insertion first, then updating if a unique index violation occurs,
  541. # which still results in n to 2n SQL statements.
  542. #
  543. # Both approaches are inefficient and suboptimal.
  544. # 3. We do not need to retrieve the results of the SQL execution or populate ORM
  545. # model instances with the returned values.
  546. # 4. Batch insertion with `ON CONFLICT DO UPDATE` allows us to insert or update all
  547. # variables in a single SQL statement, avoiding the issues above.
  548. #
  549. # For these reasons, we use the SQLAlchemy query builder and rely on dialect-specific
  550. # insert operations instead of the ORM layer.
  551. stmt = insert(WorkflowDraftVariable).values([_model_to_insertion_dict(v) for v in draft_vars])
  552. if policy == _UpsertPolicy.OVERWRITE:
  553. stmt = stmt.on_conflict_do_update(
  554. index_elements=WorkflowDraftVariable.unique_app_id_node_id_name(),
  555. set_={
  556. # Refresh creation timestamp to ensure updated variables
  557. # appear first in chronologically sorted result sets.
  558. "created_at": stmt.excluded.created_at,
  559. "updated_at": stmt.excluded.updated_at,
  560. "last_edited_at": stmt.excluded.last_edited_at,
  561. "description": stmt.excluded.description,
  562. "value_type": stmt.excluded.value_type,
  563. "value": stmt.excluded.value,
  564. "visible": stmt.excluded.visible,
  565. "editable": stmt.excluded.editable,
  566. "node_execution_id": stmt.excluded.node_execution_id,
  567. "file_id": stmt.excluded.file_id,
  568. },
  569. )
  570. elif policy == _UpsertPolicy.IGNORE:
  571. stmt = stmt.on_conflict_do_nothing(index_elements=WorkflowDraftVariable.unique_app_id_node_id_name())
  572. else:
  573. raise Exception("Invalid value for update policy.")
  574. session.execute(stmt)
  575. def _model_to_insertion_dict(model: WorkflowDraftVariable) -> dict[str, Any]:
  576. d: dict[str, Any] = {
  577. "app_id": model.app_id,
  578. "last_edited_at": None,
  579. "node_id": model.node_id,
  580. "name": model.name,
  581. "selector": model.selector,
  582. "value_type": model.value_type,
  583. "value": model.value,
  584. "node_execution_id": model.node_execution_id,
  585. "file_id": model.file_id,
  586. }
  587. if model.visible is not None:
  588. d["visible"] = model.visible
  589. if model.editable is not None:
  590. d["editable"] = model.editable
  591. if model.created_at is not None:
  592. d["created_at"] = model.created_at
  593. if model.updated_at is not None:
  594. d["updated_at"] = model.updated_at
  595. if model.description is not None:
  596. d["description"] = model.description
  597. return d
  598. def _build_segment_for_serialized_values(v: Any) -> Segment:
  599. """
  600. Reconstructs Segment objects from serialized values, with special handling
  601. for FileSegment and ArrayFileSegment types.
  602. This function should only be used when:
  603. 1. No explicit type information is available
  604. 2. The input value is in serialized form (dict or list)
  605. It detects potential file objects in the serialized data and properly rebuilds the
  606. appropriate segment type.
  607. """
  608. return build_segment(WorkflowDraftVariable.rebuild_file_types(v))
  609. def _make_filename_trans_table() -> dict[int, str]:
  610. linux_chars = ["/", "\x00"]
  611. windows_chars = [
  612. "<",
  613. ">",
  614. ":",
  615. '"',
  616. "/",
  617. "\\",
  618. "|",
  619. "?",
  620. "*",
  621. ]
  622. windows_chars.extend(chr(i) for i in range(32))
  623. trans_table = dict.fromkeys(linux_chars + windows_chars, "_")
  624. return str.maketrans(trans_table)
  625. _FILENAME_TRANS_TABLE = _make_filename_trans_table()
  626. class DraftVariableSaver:
  627. # _DUMMY_OUTPUT_IDENTITY is a placeholder output for workflow nodes.
  628. # Its sole possible value is `None`.
  629. #
  630. # This is used to signal the execution of a workflow node when it has no other outputs.
  631. _DUMMY_OUTPUT_IDENTITY: ClassVar[str] = "__dummy__"
  632. _DUMMY_OUTPUT_VALUE: ClassVar[None] = None
  633. # _EXCLUDE_VARIABLE_NAMES_MAPPING maps node types and versions to variable names that
  634. # should be excluded when saving draft variables. This prevents certain internal or
  635. # technical variables from being exposed in the draft environment, particularly those
  636. # that aren't meant to be directly edited or viewed by users.
  637. _EXCLUDE_VARIABLE_NAMES_MAPPING: dict[NodeType, frozenset[str]] = {
  638. NodeType.LLM: frozenset(["finish_reason"]),
  639. NodeType.LOOP: frozenset(["loop_round"]),
  640. }
  641. # Database session used for persisting draft variables.
  642. _session: Session
  643. # The application ID associated with the draft variables.
  644. # This should match the `Workflow.app_id` of the workflow to which the current node belongs.
  645. _app_id: str
  646. # The ID of the node for which DraftVariableSaver is saving output variables.
  647. _node_id: str
  648. # The type of the current node (see NodeType).
  649. _node_type: NodeType
  650. #
  651. _node_execution_id: str
  652. # _enclosing_node_id identifies the container node that the current node belongs to.
  653. # For example, if the current node is an LLM node inside an Iteration node
  654. # or Loop node, then `_enclosing_node_id` refers to the ID of
  655. # the containing Iteration or Loop node.
  656. #
  657. # If the current node is not nested within another node, `_enclosing_node_id` is
  658. # `None`.
  659. _enclosing_node_id: str | None
  660. def __init__(
  661. self,
  662. session: Session,
  663. app_id: str,
  664. node_id: str,
  665. node_type: NodeType,
  666. node_execution_id: str,
  667. user: Account,
  668. enclosing_node_id: str | None = None,
  669. ):
  670. # Important: `node_execution_id` parameter refers to the primary key (`id`) of the
  671. # WorkflowNodeExecutionModel/WorkflowNodeExecution, not their `node_execution_id`
  672. # field. These are distinct database fields with different purposes.
  673. self._session = session
  674. self._app_id = app_id
  675. self._node_id = node_id
  676. self._node_type = node_type
  677. self._node_execution_id = node_execution_id
  678. self._user = user
  679. self._enclosing_node_id = enclosing_node_id
  680. def _create_dummy_output_variable(self):
  681. return WorkflowDraftVariable.new_node_variable(
  682. app_id=self._app_id,
  683. node_id=self._node_id,
  684. name=self._DUMMY_OUTPUT_IDENTITY,
  685. node_execution_id=self._node_execution_id,
  686. value=build_segment(self._DUMMY_OUTPUT_VALUE),
  687. visible=False,
  688. editable=False,
  689. )
  690. def _should_save_output_variables_for_draft(self) -> bool:
  691. if self._enclosing_node_id is not None and self._node_type != NodeType.VARIABLE_ASSIGNER:
  692. # Currently we do not save output variables for nodes inside loop or iteration.
  693. return False
  694. return True
  695. def _build_from_variable_assigner_mapping(self, process_data: Mapping[str, Any]) -> list[WorkflowDraftVariable]:
  696. draft_vars: list[WorkflowDraftVariable] = []
  697. updated_variables = get_updated_variables(process_data) or []
  698. for item in updated_variables:
  699. selector = item.selector
  700. if len(selector) < SELECTORS_LENGTH:
  701. raise Exception("selector too short")
  702. # NOTE(QuantumGhost): only the following two kinds of variable could be updated by
  703. # VariableAssigner: ConversationVariable and iteration variable.
  704. # We only save conversation variable here.
  705. if selector[0] != CONVERSATION_VARIABLE_NODE_ID:
  706. continue
  707. segment = WorkflowDraftVariable.build_segment_with_type(segment_type=item.value_type, value=item.new_value)
  708. draft_vars.append(
  709. WorkflowDraftVariable.new_conversation_variable(
  710. app_id=self._app_id,
  711. name=item.name,
  712. value=segment,
  713. )
  714. )
  715. # Add a dummy output variable to indicate that this node is executed.
  716. draft_vars.append(self._create_dummy_output_variable())
  717. return draft_vars
  718. def _build_variables_from_start_mapping(self, output: Mapping[str, Any]) -> list[WorkflowDraftVariable]:
  719. draft_vars = []
  720. has_non_sys_variables = False
  721. for name, value in output.items():
  722. value_seg = _build_segment_for_serialized_values(value)
  723. node_id, name = self._normalize_variable_for_start_node(name)
  724. # If node_id is not `sys`, it means that the variable is a user-defined input field
  725. # in `Start` node.
  726. if node_id != SYSTEM_VARIABLE_NODE_ID:
  727. draft_vars.append(
  728. WorkflowDraftVariable.new_node_variable(
  729. app_id=self._app_id,
  730. node_id=self._node_id,
  731. name=name,
  732. node_execution_id=self._node_execution_id,
  733. value=value_seg,
  734. visible=True,
  735. editable=True,
  736. )
  737. )
  738. has_non_sys_variables = True
  739. else:
  740. if name == SystemVariableKey.FILES:
  741. # Here we know the type of variable must be `array[file]`, we
  742. # just build files from the value.
  743. files = [File.model_validate(v) for v in value]
  744. if files:
  745. value_seg = WorkflowDraftVariable.build_segment_with_type(SegmentType.ARRAY_FILE, files)
  746. else:
  747. value_seg = ArrayFileSegment(value=[])
  748. draft_vars.append(
  749. WorkflowDraftVariable.new_sys_variable(
  750. app_id=self._app_id,
  751. name=name,
  752. node_execution_id=self._node_execution_id,
  753. value=value_seg,
  754. editable=self._should_variable_be_editable(node_id, name),
  755. )
  756. )
  757. if not has_non_sys_variables:
  758. draft_vars.append(self._create_dummy_output_variable())
  759. return draft_vars
  760. def _normalize_variable_for_start_node(self, name: str) -> tuple[str, str]:
  761. if not name.startswith(f"{SYSTEM_VARIABLE_NODE_ID}."):
  762. return self._node_id, name
  763. _, name_ = name.split(".", maxsplit=1)
  764. return SYSTEM_VARIABLE_NODE_ID, name_
  765. def _build_variables_from_mapping(self, output: Mapping[str, Any]) -> list[WorkflowDraftVariable]:
  766. draft_vars = []
  767. for name, value in output.items():
  768. if not self._should_variable_be_saved(name):
  769. logger.debug(
  770. "Skip saving variable as it has been excluded by its node_type, name=%s, node_type=%s",
  771. name,
  772. self._node_type,
  773. )
  774. continue
  775. if isinstance(value, Segment):
  776. value_seg = value
  777. else:
  778. value_seg = _build_segment_for_serialized_values(value)
  779. draft_vars.append(
  780. self._create_draft_variable(
  781. name=name,
  782. value=value_seg,
  783. visible=True,
  784. editable=True,
  785. ),
  786. # WorkflowDraftVariable.new_node_variable(
  787. # app_id=self._app_id,
  788. # node_id=self._node_id,
  789. # name=name,
  790. # node_execution_id=self._node_execution_id,
  791. # value=value_seg,
  792. # visible=self._should_variable_be_visible(self._node_id, self._node_type, name),
  793. # )
  794. )
  795. return draft_vars
  796. def _generate_filename(self, name: str):
  797. node_id_escaped = self._node_id.translate(_FILENAME_TRANS_TABLE)
  798. return f"{node_id_escaped}-{name}"
  799. def _try_offload_large_variable(
  800. self,
  801. name: str,
  802. value_seg: Segment,
  803. ) -> tuple[Segment, WorkflowDraftVariableFile] | None:
  804. # This logic is closely tied to `DraftVarLoader._load_offloaded_variable` and must remain
  805. # synchronized with it.
  806. # Ideally, these should be co-located for better maintainability.
  807. # However, due to the current code structure, this is not straightforward.
  808. truncator = VariableTruncator(
  809. max_size_bytes=dify_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE,
  810. array_element_limit=dify_config.WORKFLOW_VARIABLE_TRUNCATION_ARRAY_LENGTH,
  811. string_length_limit=dify_config.WORKFLOW_VARIABLE_TRUNCATION_STRING_LENGTH,
  812. )
  813. truncation_result = truncator.truncate(value_seg)
  814. if not truncation_result.truncated:
  815. return None
  816. original_length = None
  817. if isinstance(value_seg.value, (list, dict)):
  818. original_length = len(value_seg.value)
  819. # Prepare content for storage
  820. if isinstance(value_seg, StringSegment):
  821. # For string types, store as plain text
  822. original_content_serialized = value_seg.value
  823. content_type = "text/plain"
  824. filename = f"{self._generate_filename(name)}.txt"
  825. else:
  826. # For other types, store as JSON
  827. original_content_serialized = dumps_with_segments(value_seg.value, ensure_ascii=False)
  828. content_type = "application/json"
  829. filename = f"{self._generate_filename(name)}.json"
  830. original_size = len(original_content_serialized.encode("utf-8"))
  831. bind = self._session.get_bind()
  832. assert isinstance(bind, Engine)
  833. file_srv = FileService(bind)
  834. upload_file = file_srv.upload_file(
  835. filename=filename,
  836. content=original_content_serialized.encode(),
  837. mimetype=content_type,
  838. user=self._user,
  839. )
  840. # Create WorkflowDraftVariableFile record
  841. variable_file = WorkflowDraftVariableFile(
  842. id=uuidv7(),
  843. upload_file_id=upload_file.id,
  844. size=original_size,
  845. length=original_length,
  846. value_type=value_seg.value_type,
  847. app_id=self._app_id,
  848. tenant_id=self._user.current_tenant_id,
  849. user_id=self._user.id,
  850. )
  851. engine = bind = self._session.get_bind()
  852. assert isinstance(engine, Engine)
  853. with Session(bind=engine, expire_on_commit=False) as session:
  854. session.add(variable_file)
  855. session.commit()
  856. return truncation_result.result, variable_file
  857. def _create_draft_variable(
  858. self,
  859. *,
  860. name: str,
  861. value: Segment,
  862. visible: bool = True,
  863. editable: bool = True,
  864. ) -> WorkflowDraftVariable:
  865. """Create a draft variable with large variable handling and truncation."""
  866. # Handle Segment values
  867. offload_result = self._try_offload_large_variable(name, value)
  868. if offload_result is None:
  869. # Create the draft variable
  870. draft_var = WorkflowDraftVariable.new_node_variable(
  871. app_id=self._app_id,
  872. node_id=self._node_id,
  873. name=name,
  874. node_execution_id=self._node_execution_id,
  875. value=value,
  876. visible=visible,
  877. editable=editable,
  878. )
  879. return draft_var
  880. else:
  881. truncated, var_file = offload_result
  882. # Create the draft variable
  883. draft_var = WorkflowDraftVariable.new_node_variable(
  884. app_id=self._app_id,
  885. node_id=self._node_id,
  886. name=name,
  887. node_execution_id=self._node_execution_id,
  888. value=truncated,
  889. visible=visible,
  890. editable=False,
  891. file_id=var_file.id,
  892. )
  893. return draft_var
  894. def save(
  895. self,
  896. process_data: Mapping[str, Any] | None = None,
  897. outputs: Mapping[str, Any] | None = None,
  898. ):
  899. draft_vars: list[WorkflowDraftVariable] = []
  900. if outputs is None:
  901. outputs = {}
  902. if process_data is None:
  903. process_data = {}
  904. if not self._should_save_output_variables_for_draft():
  905. return
  906. if self._node_type == NodeType.VARIABLE_ASSIGNER:
  907. draft_vars = self._build_from_variable_assigner_mapping(process_data=process_data)
  908. elif self._node_type == NodeType.START:
  909. draft_vars = self._build_variables_from_start_mapping(outputs)
  910. else:
  911. draft_vars = self._build_variables_from_mapping(outputs)
  912. _batch_upsert_draft_variable(self._session, draft_vars)
  913. @staticmethod
  914. def _should_variable_be_editable(node_id: str, name: str) -> bool:
  915. if node_id in (CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID):
  916. return False
  917. if node_id == SYSTEM_VARIABLE_NODE_ID and not is_system_variable_editable(name):
  918. return False
  919. return True
  920. @staticmethod
  921. def _should_variable_be_visible(node_id: str, node_type: NodeType, name: str) -> bool:
  922. if node_type in NodeType.IF_ELSE:
  923. return False
  924. if node_id == SYSTEM_VARIABLE_NODE_ID and not is_system_variable_editable(name):
  925. return False
  926. return True
  927. def _should_variable_be_saved(self, name: str) -> bool:
  928. exclude_var_names = self._EXCLUDE_VARIABLE_NAMES_MAPPING.get(self._node_type)
  929. if exclude_var_names is None:
  930. return True
  931. return name not in exclude_var_names