You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rag_pipeline_dsl_service.py 38KB

5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
5 月之前
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. import base64
  2. import hashlib
  3. import logging
  4. import uuid
  5. from collections.abc import Mapping
  6. from enum import StrEnum
  7. from typing import Optional
  8. from urllib.parse import urlparse
  9. from uuid import uuid4
  10. import yaml # type: ignore
  11. from Crypto.Cipher import AES
  12. from Crypto.Util.Padding import pad, unpad
  13. from packaging import version
  14. from pydantic import BaseModel, Field
  15. from sqlalchemy import select
  16. from sqlalchemy.orm import Session
  17. from core.helper import ssrf_proxy
  18. from core.model_runtime.utils.encoders import jsonable_encoder
  19. from core.plugin.entities.plugin import PluginDependency
  20. from core.workflow.nodes.enums import NodeType
  21. from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData
  22. from core.workflow.nodes.llm.entities import LLMNodeData
  23. from core.workflow.nodes.parameter_extractor.entities import ParameterExtractorNodeData
  24. from core.workflow.nodes.question_classifier.entities import QuestionClassifierNodeData
  25. from core.workflow.nodes.tool.entities import ToolNodeData
  26. from extensions.ext_database import db
  27. from extensions.ext_redis import redis_client
  28. from factories import variable_factory
  29. from models import Account
  30. from models.dataset import Dataset, DatasetCollectionBinding, Pipeline
  31. from models.workflow import Workflow
  32. from services.entities.knowledge_entities.rag_pipeline_entities import KnowledgeConfiguration
  33. from services.plugin.dependencies_analysis import DependenciesAnalysisService
  34. from services.rag_pipeline.rag_pipeline import RagPipelineService
  35. logger = logging.getLogger(__name__)
  36. IMPORT_INFO_REDIS_KEY_PREFIX = "app_import_info:"
  37. CHECK_DEPENDENCIES_REDIS_KEY_PREFIX = "app_check_dependencies:"
  38. IMPORT_INFO_REDIS_EXPIRY = 10 * 60 # 10 minutes
  39. DSL_MAX_SIZE = 10 * 1024 * 1024 # 10MB
  40. CURRENT_DSL_VERSION = "0.1.0"
  41. class ImportMode(StrEnum):
  42. YAML_CONTENT = "yaml-content"
  43. YAML_URL = "yaml-url"
  44. class ImportStatus(StrEnum):
  45. COMPLETED = "completed"
  46. COMPLETED_WITH_WARNINGS = "completed-with-warnings"
  47. PENDING = "pending"
  48. FAILED = "failed"
  49. class RagPipelineImportInfo(BaseModel):
  50. id: str
  51. status: ImportStatus
  52. pipeline_id: Optional[str] = None
  53. current_dsl_version: str = CURRENT_DSL_VERSION
  54. imported_dsl_version: str = ""
  55. error: str = ""
  56. dataset_id: Optional[str] = None
  57. class CheckDependenciesResult(BaseModel):
  58. leaked_dependencies: list[PluginDependency] = Field(default_factory=list)
  59. def _check_version_compatibility(imported_version: str) -> ImportStatus:
  60. """Determine import status based on version comparison"""
  61. try:
  62. current_ver = version.parse(CURRENT_DSL_VERSION)
  63. imported_ver = version.parse(imported_version)
  64. except version.InvalidVersion:
  65. return ImportStatus.FAILED
  66. # If imported version is newer than current, always return PENDING
  67. if imported_ver > current_ver:
  68. return ImportStatus.PENDING
  69. # If imported version is older than current's major, return PENDING
  70. if imported_ver.major < current_ver.major:
  71. return ImportStatus.PENDING
  72. # If imported version is older than current's minor, return COMPLETED_WITH_WARNINGS
  73. if imported_ver.minor < current_ver.minor:
  74. return ImportStatus.COMPLETED_WITH_WARNINGS
  75. # If imported version equals or is older than current's micro, return COMPLETED
  76. return ImportStatus.COMPLETED
  77. class RagPipelinePendingData(BaseModel):
  78. import_mode: str
  79. yaml_content: str
  80. name: str | None
  81. description: str | None
  82. icon_type: str | None
  83. icon: str | None
  84. icon_background: str | None
  85. pipeline_id: str | None
  86. class CheckDependenciesPendingData(BaseModel):
  87. dependencies: list[PluginDependency]
  88. pipeline_id: str | None
  89. class RagPipelineDslService:
  90. def __init__(self, session: Session):
  91. self._session = session
  92. def import_rag_pipeline(
  93. self,
  94. *,
  95. account: Account,
  96. import_mode: str,
  97. yaml_content: Optional[str] = None,
  98. yaml_url: Optional[str] = None,
  99. pipeline_id: Optional[str] = None,
  100. dataset: Optional[Dataset] = None,
  101. ) -> RagPipelineImportInfo:
  102. """Import an app from YAML content or URL."""
  103. import_id = str(uuid.uuid4())
  104. # Validate import mode
  105. try:
  106. mode = ImportMode(import_mode)
  107. except ValueError:
  108. raise ValueError(f"Invalid import_mode: {import_mode}")
  109. # Get YAML content
  110. content: str = ""
  111. if mode == ImportMode.YAML_URL:
  112. if not yaml_url:
  113. return RagPipelineImportInfo(
  114. id=import_id,
  115. status=ImportStatus.FAILED,
  116. error="yaml_url is required when import_mode is yaml-url",
  117. )
  118. try:
  119. parsed_url = urlparse(yaml_url)
  120. if (
  121. parsed_url.scheme == "https"
  122. and parsed_url.netloc == "github.com"
  123. and parsed_url.path.endswith((".yml", ".yaml"))
  124. ):
  125. yaml_url = yaml_url.replace("https://github.com", "https://raw.githubusercontent.com")
  126. yaml_url = yaml_url.replace("/blob/", "/")
  127. response = ssrf_proxy.get(yaml_url.strip(), follow_redirects=True, timeout=(10, 10))
  128. response.raise_for_status()
  129. content = response.content.decode()
  130. if len(content) > DSL_MAX_SIZE:
  131. return RagPipelineImportInfo(
  132. id=import_id,
  133. status=ImportStatus.FAILED,
  134. error="File size exceeds the limit of 10MB",
  135. )
  136. if not content:
  137. return RagPipelineImportInfo(
  138. id=import_id,
  139. status=ImportStatus.FAILED,
  140. error="Empty content from url",
  141. )
  142. except Exception as e:
  143. return RagPipelineImportInfo(
  144. id=import_id,
  145. status=ImportStatus.FAILED,
  146. error=f"Error fetching YAML from URL: {str(e)}",
  147. )
  148. elif mode == ImportMode.YAML_CONTENT:
  149. if not yaml_content:
  150. return RagPipelineImportInfo(
  151. id=import_id,
  152. status=ImportStatus.FAILED,
  153. error="yaml_content is required when import_mode is yaml-content",
  154. )
  155. content = yaml_content
  156. # Process YAML content
  157. try:
  158. # Parse YAML to validate format
  159. data = yaml.safe_load(content)
  160. if not isinstance(data, dict):
  161. return RagPipelineImportInfo(
  162. id=import_id,
  163. status=ImportStatus.FAILED,
  164. error="Invalid YAML format: content must be a mapping",
  165. )
  166. # Validate and fix DSL version
  167. if not data.get("version"):
  168. data["version"] = "0.1.0"
  169. if not data.get("kind") or data.get("kind") != "rag-pipeline":
  170. data["kind"] = "rag-pipeline"
  171. imported_version = data.get("version", "0.1.0")
  172. # check if imported_version is a float-like string
  173. if not isinstance(imported_version, str):
  174. raise ValueError(f"Invalid version type, expected str, got {type(imported_version)}")
  175. status = _check_version_compatibility(imported_version)
  176. # Extract app data
  177. pipeline_data = data.get("pipeline")
  178. if not pipeline_data:
  179. return RagPipelineImportInfo(
  180. id=import_id,
  181. status=ImportStatus.FAILED,
  182. error="Missing pipeline data in YAML content",
  183. )
  184. # If app_id is provided, check if it exists
  185. pipeline = None
  186. if pipeline_id:
  187. stmt = select(Pipeline).where(
  188. Pipeline.id == pipeline_id,
  189. Pipeline.tenant_id == account.current_tenant_id,
  190. )
  191. pipeline = self._session.scalar(stmt)
  192. if not pipeline:
  193. return RagPipelineImportInfo(
  194. id=import_id,
  195. status=ImportStatus.FAILED,
  196. error="Pipeline not found",
  197. )
  198. # If major version mismatch, store import info in Redis
  199. if status == ImportStatus.PENDING:
  200. pending_data = RagPipelinePendingData(
  201. import_mode=import_mode,
  202. yaml_content=content,
  203. pipeline_id=pipeline_id,
  204. )
  205. redis_client.setex(
  206. f"{IMPORT_INFO_REDIS_KEY_PREFIX}{import_id}",
  207. IMPORT_INFO_REDIS_EXPIRY,
  208. pending_data.model_dump_json(),
  209. )
  210. return RagPipelineImportInfo(
  211. id=import_id,
  212. status=status,
  213. pipeline_id=pipeline_id,
  214. imported_dsl_version=imported_version,
  215. )
  216. # Extract dependencies
  217. dependencies = data.get("dependencies", [])
  218. check_dependencies_pending_data = None
  219. if dependencies:
  220. check_dependencies_pending_data = [PluginDependency.model_validate(d) for d in dependencies]
  221. # Create or update app
  222. pipeline = self._create_or_update_pipeline(
  223. pipeline=pipeline,
  224. data=data,
  225. account=account,
  226. dependencies=check_dependencies_pending_data,
  227. )
  228. # create dataset
  229. name = pipeline.name
  230. description = pipeline.description
  231. icon_type = data.get("rag_pipeline", {}).get("icon_type")
  232. icon = data.get("rag_pipeline", {}).get("icon")
  233. icon_background = data.get("rag_pipeline", {}).get("icon_background")
  234. icon_url = data.get("rag_pipeline", {}).get("icon_url")
  235. workflow = data.get("workflow", {})
  236. graph = workflow.get("graph", {})
  237. nodes = graph.get("nodes", [])
  238. dataset_id = None
  239. for node in nodes:
  240. if node.get("data", {}).get("type") == "knowledge_index":
  241. knowledge_configuration = node.get("data", {}).get("knowledge_configuration", {})
  242. knowledge_configuration = KnowledgeConfiguration(**knowledge_configuration)
  243. if not dataset:
  244. dataset = Dataset(
  245. tenant_id=account.current_tenant_id,
  246. name=name,
  247. description=description,
  248. icon_info={
  249. "type": icon_type,
  250. "icon": icon,
  251. "background": icon_background,
  252. "url": icon_url,
  253. },
  254. indexing_technique=knowledge_configuration.index_method.indexing_technique,
  255. created_by=account.id,
  256. retrieval_model=knowledge_configuration.retrieval_setting.model_dump(),
  257. runtime_mode="rag_pipeline",
  258. chunk_structure=knowledge_configuration.chunk_structure,
  259. )
  260. else:
  261. dataset.indexing_technique = knowledge_configuration.index_method.indexing_technique
  262. dataset.retrieval_model = knowledge_configuration.retrieval_setting.model_dump()
  263. dataset.runtime_mode = "rag_pipeline"
  264. dataset.chunk_structure = knowledge_configuration.chunk_structure
  265. if knowledge_configuration.index_method.indexing_technique == "high_quality":
  266. dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
  267. knowledge_configuration.index_method.embedding_setting.embedding_provider_name, # type: ignore
  268. knowledge_configuration.index_method.embedding_setting.embedding_model_name, # type: ignore
  269. )
  270. dataset_collection_binding = (
  271. db.session.query(DatasetCollectionBinding)
  272. .filter(
  273. DatasetCollectionBinding.provider_name
  274. == knowledge_configuration.index_method.embedding_setting.embedding_provider_name,
  275. DatasetCollectionBinding.model_name
  276. == knowledge_configuration.index_method.embedding_setting.embedding_model_name,
  277. DatasetCollectionBinding.type == "dataset",
  278. )
  279. .order_by(DatasetCollectionBinding.created_at)
  280. .first()
  281. )
  282. if not dataset_collection_binding:
  283. dataset_collection_binding = DatasetCollectionBinding(
  284. provider_name=knowledge_configuration.index_method.embedding_setting.embedding_provider_name,
  285. model_name=knowledge_configuration.index_method.embedding_setting.embedding_model_name,
  286. collection_name=Dataset.gen_collection_name_by_id(str(uuid.uuid4())),
  287. type="dataset",
  288. )
  289. db.session.add(dataset_collection_binding)
  290. db.session.commit()
  291. dataset_collection_binding_id = dataset_collection_binding.id
  292. dataset.collection_binding_id = dataset_collection_binding_id
  293. dataset.embedding_model = (
  294. knowledge_configuration.index_method.embedding_setting.embedding_model_name
  295. )
  296. dataset.embedding_model_provider = (
  297. knowledge_configuration.index_method.embedding_setting.embedding_provider_name
  298. )
  299. elif knowledge_configuration.index_method.indexing_technique == "economy":
  300. dataset.keyword_number = knowledge_configuration.index_method.economy_setting.keyword_number
  301. dataset.pipeline_id = pipeline.id
  302. self._session.add(dataset)
  303. self._session.commit()
  304. dataset_id = dataset.id
  305. if not dataset_id:
  306. raise ValueError("DSL is not valid, please check the Knowledge Index node.")
  307. return RagPipelineImportInfo(
  308. id=import_id,
  309. status=status,
  310. pipeline_id=pipeline.id,
  311. dataset_id=dataset_id,
  312. imported_dsl_version=imported_version,
  313. )
  314. except yaml.YAMLError as e:
  315. return RagPipelineImportInfo(
  316. id=import_id,
  317. status=ImportStatus.FAILED,
  318. error=f"Invalid YAML format: {str(e)}",
  319. )
  320. except Exception as e:
  321. logger.exception("Failed to import app")
  322. return RagPipelineImportInfo(
  323. id=import_id,
  324. status=ImportStatus.FAILED,
  325. error=str(e),
  326. )
  327. def confirm_import(self, *, import_id: str, account: Account) -> RagPipelineImportInfo:
  328. """
  329. Confirm an import that requires confirmation
  330. """
  331. redis_key = f"{IMPORT_INFO_REDIS_KEY_PREFIX}{import_id}"
  332. pending_data = redis_client.get(redis_key)
  333. if not pending_data:
  334. return RagPipelineImportInfo(
  335. id=import_id,
  336. status=ImportStatus.FAILED,
  337. error="Import information expired or does not exist",
  338. )
  339. try:
  340. if not isinstance(pending_data, str | bytes):
  341. return RagPipelineImportInfo(
  342. id=import_id,
  343. status=ImportStatus.FAILED,
  344. error="Invalid import information",
  345. )
  346. pending_data = RagPipelinePendingData.model_validate_json(pending_data)
  347. data = yaml.safe_load(pending_data.yaml_content)
  348. pipeline = None
  349. if pending_data.pipeline_id:
  350. stmt = select(Pipeline).where(
  351. Pipeline.id == pending_data.pipeline_id,
  352. Pipeline.tenant_id == account.current_tenant_id,
  353. )
  354. pipeline = self._session.scalar(stmt)
  355. # Create or update app
  356. pipeline = self._create_or_update_pipeline(
  357. pipeline=pipeline,
  358. data=data,
  359. account=account,
  360. )
  361. # create dataset
  362. name = pipeline.name
  363. description = pipeline.description
  364. icon_type = data.get("rag_pipeline", {}).get("icon_type")
  365. icon = data.get("rag_pipeline", {}).get("icon")
  366. icon_background = data.get("rag_pipeline", {}).get("icon_background")
  367. icon_url = data.get("rag_pipeline", {}).get("icon_url")
  368. workflow = data.get("workflow", {})
  369. graph = workflow.get("graph", {})
  370. nodes = graph.get("nodes", [])
  371. dataset_id = None
  372. for node in nodes:
  373. if node.get("data", {}).get("type") == "knowledge_index":
  374. knowledge_configuration = node.get("data", {}).get("knowledge_configuration", {})
  375. knowledge_configuration = KnowledgeConfiguration(**knowledge_configuration)
  376. if not dataset:
  377. dataset = Dataset(
  378. tenant_id=account.current_tenant_id,
  379. name=name,
  380. description=description,
  381. icon_info={
  382. "type": icon_type,
  383. "icon": icon,
  384. "background": icon_background,
  385. "url": icon_url,
  386. },
  387. indexing_technique=knowledge_configuration.index_method.indexing_technique,
  388. created_by=account.id,
  389. retrieval_model=knowledge_configuration.retrieval_setting.model_dump(),
  390. runtime_mode="rag_pipeline",
  391. chunk_structure=knowledge_configuration.chunk_structure,
  392. )
  393. else:
  394. dataset.indexing_technique = knowledge_configuration.index_method.indexing_technique
  395. dataset.retrieval_model = knowledge_configuration.retrieval_setting.model_dump()
  396. dataset.runtime_mode = "rag_pipeline"
  397. dataset.chunk_structure = knowledge_configuration.chunk_structure
  398. if knowledge_configuration.index_method.indexing_technique == "high_quality":
  399. dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
  400. knowledge_configuration.index_method.embedding_setting.embedding_provider_name, # type: ignore
  401. knowledge_configuration.index_method.embedding_setting.embedding_model_name, # type: ignore
  402. )
  403. dataset_collection_binding_id = dataset_collection_binding.id
  404. dataset.collection_binding_id = dataset_collection_binding_id
  405. dataset.embedding_model = (
  406. knowledge_configuration.index_method.embedding_setting.embedding_model_name
  407. )
  408. dataset.embedding_model_provider = (
  409. knowledge_configuration.index_method.embedding_setting.embedding_provider_name
  410. )
  411. elif knowledge_configuration.index_method.indexing_technique == "economy":
  412. dataset.keyword_number = knowledge_configuration.index_method.economy_setting.keyword_number
  413. dataset.pipeline_id = pipeline.id
  414. self._session.add(dataset)
  415. self._session.commit()
  416. dataset_id = dataset.id
  417. if not dataset_id:
  418. raise ValueError("DSL is not valid, please check the Knowledge Index node.")
  419. # Delete import info from Redis
  420. redis_client.delete(redis_key)
  421. return RagPipelineImportInfo(
  422. id=import_id,
  423. status=ImportStatus.COMPLETED,
  424. pipeline_id=pipeline.id,
  425. dataset_id=dataset_id,
  426. current_dsl_version=CURRENT_DSL_VERSION,
  427. imported_dsl_version=data.get("version", "0.1.0"),
  428. )
  429. except Exception as e:
  430. logger.exception("Error confirming import")
  431. return RagPipelineImportInfo(
  432. id=import_id,
  433. status=ImportStatus.FAILED,
  434. error=str(e),
  435. )
  436. def check_dependencies(
  437. self,
  438. *,
  439. pipeline: Pipeline,
  440. ) -> CheckDependenciesResult:
  441. """Check dependencies"""
  442. # Get dependencies from Redis
  443. redis_key = f"{CHECK_DEPENDENCIES_REDIS_KEY_PREFIX}{pipeline.id}"
  444. dependencies = redis_client.get(redis_key)
  445. if not dependencies:
  446. return CheckDependenciesResult()
  447. # Extract dependencies
  448. dependencies = CheckDependenciesPendingData.model_validate_json(dependencies)
  449. # Get leaked dependencies
  450. leaked_dependencies = DependenciesAnalysisService.get_leaked_dependencies(
  451. tenant_id=pipeline.tenant_id, dependencies=dependencies.dependencies
  452. )
  453. return CheckDependenciesResult(
  454. leaked_dependencies=leaked_dependencies,
  455. )
  456. def _create_or_update_pipeline(
  457. self,
  458. *,
  459. pipeline: Optional[Pipeline],
  460. data: dict,
  461. account: Account,
  462. dependencies: Optional[list[PluginDependency]] = None,
  463. ) -> Pipeline:
  464. """Create a new app or update an existing one."""
  465. pipeline_data = data.get("rag_pipeline", {})
  466. # Set icon type
  467. icon_type_value = pipeline_data.get("icon_type")
  468. if icon_type_value in ["emoji", "link"]:
  469. icon_type = icon_type_value
  470. else:
  471. icon_type = "emoji"
  472. icon = str(pipeline_data.get("icon", ""))
  473. if pipeline:
  474. # Update existing pipeline
  475. pipeline.name = pipeline_data.get("name", pipeline.name)
  476. pipeline.description = pipeline_data.get("description", pipeline.description)
  477. pipeline.icon_type = icon_type
  478. pipeline.icon = icon
  479. pipeline.icon_background = pipeline_data.get("icon_background", pipeline.icon_background)
  480. pipeline.updated_by = account.id
  481. else:
  482. if account.current_tenant_id is None:
  483. raise ValueError("Current tenant is not set")
  484. # Create new app
  485. pipeline = Pipeline()
  486. pipeline.id = str(uuid4())
  487. pipeline.tenant_id = account.current_tenant_id
  488. pipeline.name = pipeline_data.get("name", "")
  489. pipeline.description = pipeline_data.get("description", "")
  490. pipeline.icon_type = icon_type
  491. pipeline.icon = icon
  492. pipeline.icon_background = pipeline_data.get("icon_background", "#FFFFFF")
  493. pipeline.enable_site = True
  494. pipeline.enable_api = True
  495. pipeline.use_icon_as_answer_icon = pipeline_data.get("use_icon_as_answer_icon", False)
  496. pipeline.created_by = account.id
  497. pipeline.updated_by = account.id
  498. self._session.add(pipeline)
  499. self._session.commit()
  500. # save dependencies
  501. if dependencies:
  502. redis_client.setex(
  503. f"{CHECK_DEPENDENCIES_REDIS_KEY_PREFIX}{pipeline.id}",
  504. IMPORT_INFO_REDIS_EXPIRY,
  505. CheckDependenciesPendingData(pipeline_id=pipeline.id, dependencies=dependencies).model_dump_json(),
  506. )
  507. # Initialize pipeline based on mode
  508. workflow_data = data.get("workflow")
  509. if not workflow_data or not isinstance(workflow_data, dict):
  510. raise ValueError("Missing workflow data for rag pipeline")
  511. environment_variables_list = workflow_data.get("environment_variables", [])
  512. environment_variables = [
  513. variable_factory.build_environment_variable_from_mapping(obj) for obj in environment_variables_list
  514. ]
  515. conversation_variables_list = workflow_data.get("conversation_variables", [])
  516. conversation_variables = [
  517. variable_factory.build_conversation_variable_from_mapping(obj) for obj in conversation_variables_list
  518. ]
  519. rag_pipeline_variables_list = workflow_data.get("rag_pipeline_variables", [])
  520. rag_pipeline_variables = [
  521. variable_factory.build_pipeline_variable_from_mapping(obj) for obj in rag_pipeline_variables_list
  522. ]
  523. rag_pipeline_service = RagPipelineService()
  524. current_draft_workflow = rag_pipeline_service.get_draft_workflow(pipeline=pipeline)
  525. if current_draft_workflow:
  526. unique_hash = current_draft_workflow.unique_hash
  527. else:
  528. unique_hash = None
  529. graph = workflow_data.get("graph", {})
  530. for node in graph.get("nodes", []):
  531. if node.get("data", {}).get("type", "") == NodeType.KNOWLEDGE_RETRIEVAL.value:
  532. dataset_ids = node["data"].get("dataset_ids", [])
  533. node["data"]["dataset_ids"] = [
  534. decrypted_id
  535. for dataset_id in dataset_ids
  536. if (
  537. decrypted_id := self.decrypt_dataset_id(
  538. encrypted_data=dataset_id,
  539. tenant_id=pipeline.tenant_id,
  540. )
  541. )
  542. ]
  543. rag_pipeline_service.sync_draft_workflow(
  544. pipeline=pipeline,
  545. graph=workflow_data.get("graph", {}),
  546. features=workflow_data.get("features", {}),
  547. unique_hash=unique_hash,
  548. account=account,
  549. environment_variables=environment_variables,
  550. conversation_variables=conversation_variables,
  551. )
  552. return pipeline
  553. @classmethod
  554. def export_rag_pipeline_dsl(cls, pipeline: Pipeline, include_secret: bool = False) -> str:
  555. """
  556. Export pipeline
  557. :param pipeline: Pipeline instance
  558. :param include_secret: Whether include secret variable
  559. :return:
  560. """
  561. export_data = {
  562. "version": CURRENT_DSL_VERSION,
  563. "kind": "rag_pipeline",
  564. "pipeline": {
  565. "name": pipeline.name,
  566. "mode": pipeline.mode,
  567. "icon": "🤖" if pipeline.icon_type == "image" else pipeline.icon,
  568. "icon_background": "#FFEAD5" if pipeline.icon_type == "image" else pipeline.icon_background,
  569. "description": pipeline.description,
  570. "use_icon_as_answer_icon": pipeline.use_icon_as_answer_icon,
  571. },
  572. }
  573. cls._append_workflow_export_data(export_data=export_data, pipeline=pipeline, include_secret=include_secret)
  574. return yaml.dump(export_data, allow_unicode=True) # type: ignore
  575. @classmethod
  576. def _append_workflow_export_data(cls, *, export_data: dict, pipeline: Pipeline, include_secret: bool) -> None:
  577. """
  578. Append workflow export data
  579. :param export_data: export data
  580. :param pipeline: Pipeline instance
  581. """
  582. rag_pipeline_service = RagPipelineService()
  583. workflow = rag_pipeline_service.get_draft_workflow(pipeline=pipeline)
  584. if not workflow:
  585. raise ValueError("Missing draft workflow configuration, please check.")
  586. workflow_dict = workflow.to_dict(include_secret=include_secret)
  587. for node in workflow_dict.get("graph", {}).get("nodes", []):
  588. if node.get("data", {}).get("type", "") == NodeType.KNOWLEDGE_RETRIEVAL.value:
  589. dataset_ids = node["data"].get("dataset_ids", [])
  590. node["data"]["dataset_ids"] = [
  591. cls.encrypt_dataset_id(dataset_id=dataset_id, tenant_id=pipeline.tenant_id)
  592. for dataset_id in dataset_ids
  593. ]
  594. export_data["workflow"] = workflow_dict
  595. dependencies = cls._extract_dependencies_from_workflow(workflow)
  596. export_data["dependencies"] = [
  597. jsonable_encoder(d.model_dump())
  598. for d in DependenciesAnalysisService.generate_dependencies(
  599. tenant_id=pipeline.tenant_id, dependencies=dependencies
  600. )
  601. ]
  602. @classmethod
  603. def _append_model_config_export_data(cls, export_data: dict, pipeline: Pipeline) -> None:
  604. """
  605. Append model config export data
  606. :param export_data: export data
  607. :param pipeline: Pipeline instance
  608. """
  609. app_model_config = pipeline.app_model_config
  610. if not app_model_config:
  611. raise ValueError("Missing app configuration, please check.")
  612. export_data["model_config"] = app_model_config.to_dict()
  613. dependencies = cls._extract_dependencies_from_model_config(app_model_config.to_dict())
  614. export_data["dependencies"] = [
  615. jsonable_encoder(d.model_dump())
  616. for d in DependenciesAnalysisService.generate_dependencies(
  617. tenant_id=pipeline.tenant_id, dependencies=dependencies
  618. )
  619. ]
  620. @classmethod
  621. def _extract_dependencies_from_workflow(cls, workflow: Workflow) -> list[str]:
  622. """
  623. Extract dependencies from workflow
  624. :param workflow: Workflow instance
  625. :return: dependencies list format like ["langgenius/google"]
  626. """
  627. graph = workflow.graph_dict
  628. dependencies = cls._extract_dependencies_from_workflow_graph(graph)
  629. return dependencies
  630. @classmethod
  631. def _extract_dependencies_from_workflow_graph(cls, graph: Mapping) -> list[str]:
  632. """
  633. Extract dependencies from workflow graph
  634. :param graph: Workflow graph
  635. :return: dependencies list format like ["langgenius/google"]
  636. """
  637. dependencies = []
  638. for node in graph.get("nodes", []):
  639. try:
  640. typ = node.get("data", {}).get("type")
  641. match typ:
  642. case NodeType.TOOL.value:
  643. tool_entity = ToolNodeData(**node["data"])
  644. dependencies.append(
  645. DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id),
  646. )
  647. case NodeType.LLM.value:
  648. llm_entity = LLMNodeData(**node["data"])
  649. dependencies.append(
  650. DependenciesAnalysisService.analyze_model_provider_dependency(llm_entity.model.provider),
  651. )
  652. case NodeType.QUESTION_CLASSIFIER.value:
  653. question_classifier_entity = QuestionClassifierNodeData(**node["data"])
  654. dependencies.append(
  655. DependenciesAnalysisService.analyze_model_provider_dependency(
  656. question_classifier_entity.model.provider
  657. ),
  658. )
  659. case NodeType.PARAMETER_EXTRACTOR.value:
  660. parameter_extractor_entity = ParameterExtractorNodeData(**node["data"])
  661. dependencies.append(
  662. DependenciesAnalysisService.analyze_model_provider_dependency(
  663. parameter_extractor_entity.model.provider
  664. ),
  665. )
  666. case NodeType.KNOWLEDGE_RETRIEVAL.value:
  667. knowledge_retrieval_entity = KnowledgeRetrievalNodeData(**node["data"])
  668. if knowledge_retrieval_entity.retrieval_mode == "multiple":
  669. if knowledge_retrieval_entity.multiple_retrieval_config:
  670. if (
  671. knowledge_retrieval_entity.multiple_retrieval_config.reranking_mode
  672. == "reranking_model"
  673. ):
  674. if knowledge_retrieval_entity.multiple_retrieval_config.reranking_model:
  675. dependencies.append(
  676. DependenciesAnalysisService.analyze_model_provider_dependency(
  677. knowledge_retrieval_entity.multiple_retrieval_config.reranking_model.provider
  678. ),
  679. )
  680. elif (
  681. knowledge_retrieval_entity.multiple_retrieval_config.reranking_mode
  682. == "weighted_score"
  683. ):
  684. if knowledge_retrieval_entity.multiple_retrieval_config.weights:
  685. vector_setting = (
  686. knowledge_retrieval_entity.multiple_retrieval_config.weights.vector_setting
  687. )
  688. dependencies.append(
  689. DependenciesAnalysisService.analyze_model_provider_dependency(
  690. vector_setting.embedding_provider_name
  691. ),
  692. )
  693. elif knowledge_retrieval_entity.retrieval_mode == "single":
  694. model_config = knowledge_retrieval_entity.single_retrieval_config
  695. if model_config:
  696. dependencies.append(
  697. DependenciesAnalysisService.analyze_model_provider_dependency(
  698. model_config.model.provider
  699. ),
  700. )
  701. case _:
  702. # TODO: Handle default case or unknown node types
  703. pass
  704. except Exception as e:
  705. logger.exception("Error extracting node dependency", exc_info=e)
  706. return dependencies
  707. @classmethod
  708. def _extract_dependencies_from_model_config(cls, model_config: Mapping) -> list[str]:
  709. """
  710. Extract dependencies from model config
  711. :param model_config: model config dict
  712. :return: dependencies list format like ["langgenius/google"]
  713. """
  714. dependencies = []
  715. try:
  716. # completion model
  717. model_dict = model_config.get("model", {})
  718. if model_dict:
  719. dependencies.append(
  720. DependenciesAnalysisService.analyze_model_provider_dependency(model_dict.get("provider", ""))
  721. )
  722. # reranking model
  723. dataset_configs = model_config.get("dataset_configs", {})
  724. if dataset_configs:
  725. for dataset_config in dataset_configs.get("datasets", {}).get("datasets", []):
  726. if dataset_config.get("reranking_model"):
  727. dependencies.append(
  728. DependenciesAnalysisService.analyze_model_provider_dependency(
  729. dataset_config.get("reranking_model", {})
  730. .get("reranking_provider_name", {})
  731. .get("provider")
  732. )
  733. )
  734. # tools
  735. agent_configs = model_config.get("agent_mode", {})
  736. if agent_configs:
  737. for agent_config in agent_configs.get("tools", []):
  738. dependencies.append(
  739. DependenciesAnalysisService.analyze_tool_dependency(agent_config.get("provider_id"))
  740. )
  741. except Exception as e:
  742. logger.exception("Error extracting model config dependency", exc_info=e)
  743. return dependencies
  744. @classmethod
  745. def get_leaked_dependencies(cls, tenant_id: str, dsl_dependencies: list[dict]) -> list[PluginDependency]:
  746. """
  747. Returns the leaked dependencies in current workspace
  748. """
  749. dependencies = [PluginDependency(**dep) for dep in dsl_dependencies]
  750. if not dependencies:
  751. return []
  752. return DependenciesAnalysisService.get_leaked_dependencies(tenant_id=tenant_id, dependencies=dependencies)
  753. @staticmethod
  754. def _generate_aes_key(tenant_id: str) -> bytes:
  755. """Generate AES key based on tenant_id"""
  756. return hashlib.sha256(tenant_id.encode()).digest()
  757. @classmethod
  758. def encrypt_dataset_id(cls, dataset_id: str, tenant_id: str) -> str:
  759. """Encrypt dataset_id using AES-CBC mode"""
  760. key = cls._generate_aes_key(tenant_id)
  761. iv = key[:16]
  762. cipher = AES.new(key, AES.MODE_CBC, iv)
  763. ct_bytes = cipher.encrypt(pad(dataset_id.encode(), AES.block_size))
  764. return base64.b64encode(ct_bytes).decode()
  765. @classmethod
  766. def decrypt_dataset_id(cls, encrypted_data: str, tenant_id: str) -> str | None:
  767. """AES decryption"""
  768. try:
  769. key = cls._generate_aes_key(tenant_id)
  770. iv = key[:16]
  771. cipher = AES.new(key, AES.MODE_CBC, iv)
  772. pt = unpad(cipher.decrypt(base64.b64decode(encrypted_data)), AES.block_size)
  773. return pt.decode()
  774. except Exception:
  775. return None