| @@ -283,7 +283,7 @@ class DatasetApi(Resource): | |||
| location="json", | |||
| help="Invalid external knowledge api id.", | |||
| ) | |||
| parser.add_argument( | |||
| "icon_info", | |||
| type=dict, | |||
| @@ -52,6 +52,7 @@ from fields.document_fields import ( | |||
| ) | |||
| from libs.login import login_required | |||
| from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile | |||
| from models.dataset import DocumentPipelineExecutionLog | |||
| from services.dataset_service import DatasetService, DocumentService | |||
| from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig | |||
| from tasks.add_document_to_index_task import add_document_to_index_task | |||
| @@ -1092,6 +1093,35 @@ class WebsiteDocumentSyncApi(DocumentResource): | |||
| return {"result": "success"}, 200 | |||
| class DocumentPipelineExecutionLogApi(DocumentResource): | |||
| @setup_required | |||
| @login_required | |||
| @account_initialization_required | |||
| def get(self, dataset_id, document_id): | |||
| dataset_id = str(dataset_id) | |||
| document_id = str(document_id) | |||
| dataset = DatasetService.get_dataset(dataset_id) | |||
| if not dataset: | |||
| raise NotFound("Dataset not found.") | |||
| document = DocumentService.get_document(dataset.id, document_id) | |||
| if not document: | |||
| raise NotFound("Document not found.") | |||
| log = ( | |||
| db.session.query(DocumentPipelineExecutionLog) | |||
| .filter_by(document_id=document_id) | |||
| .order_by(DocumentPipelineExecutionLog.created_at.desc()) | |||
| .first() | |||
| ) | |||
| if not log: | |||
| return {"datasource_info": None, "datasource_type": None, "input_data": None}, 200 | |||
| return { | |||
| "datasource_info": log.datasource_info, | |||
| "datasource_type": log.datasource_type, | |||
| "input_data": log.input_data, | |||
| }, 200 | |||
| api.add_resource(GetProcessRuleApi, "/datasets/process-rule") | |||
| api.add_resource(DatasetDocumentListApi, "/datasets/<uuid:dataset_id>/documents") | |||
| api.add_resource(DatasetInitApi, "/datasets/init") | |||
| @@ -41,8 +41,9 @@ class DatasourcePluginOauthApi(Resource): | |||
| if not plugin_oauth_config: | |||
| raise NotFound() | |||
| oauth_handler = OAuthHandler() | |||
| redirect_url = (f"{dify_config.CONSOLE_WEB_URL}/oauth/datasource/callback?" | |||
| f"provider={provider}&plugin_id={plugin_id}") | |||
| redirect_url = ( | |||
| f"{dify_config.CONSOLE_WEB_URL}/oauth/datasource/callback?provider={provider}&plugin_id={plugin_id}" | |||
| ) | |||
| system_credentials = plugin_oauth_config.system_credentials | |||
| if system_credentials: | |||
| system_credentials["redirect_url"] = redirect_url | |||
| @@ -123,9 +124,7 @@ class DatasourceAuth(Resource): | |||
| args = parser.parse_args() | |||
| datasource_provider_service = DatasourceProviderService() | |||
| datasources = datasource_provider_service.get_datasource_credentials( | |||
| tenant_id=current_user.current_tenant_id, | |||
| provider=args["provider"], | |||
| plugin_id=args["plugin_id"] | |||
| tenant_id=current_user.current_tenant_id, provider=args["provider"], plugin_id=args["plugin_id"] | |||
| ) | |||
| return {"result": datasources}, 200 | |||
| @@ -146,7 +145,7 @@ class DatasourceAuthUpdateDeleteApi(Resource): | |||
| tenant_id=current_user.current_tenant_id, | |||
| auth_id=auth_id, | |||
| provider=args["provider"], | |||
| plugin_id=args["plugin_id"] | |||
| plugin_id=args["plugin_id"], | |||
| ) | |||
| return {"result": "success"}, 200 | |||
| @@ -384,6 +384,7 @@ class PublishedRagPipelineRunApi(Resource): | |||
| # return result | |||
| # | |||
| class RagPipelinePublishedDatasourceNodeRunApi(Resource): | |||
| @setup_required | |||
| @login_required | |||
| @@ -419,7 +420,7 @@ class RagPipelinePublishedDatasourceNodeRunApi(Resource): | |||
| user_inputs=inputs, | |||
| account=current_user, | |||
| datasource_type=datasource_type, | |||
| is_published=True | |||
| is_published=True, | |||
| ) | |||
| return result | |||
| @@ -458,12 +459,12 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource): | |||
| return helper.compact_generate_response( | |||
| PipelineGenerator.convert_to_event_stream( | |||
| rag_pipeline_service.run_datasource_workflow_node( | |||
| pipeline=pipeline, | |||
| node_id=node_id, | |||
| user_inputs=inputs, | |||
| account=current_user, | |||
| datasource_type=datasource_type, | |||
| is_published=False | |||
| pipeline=pipeline, | |||
| node_id=node_id, | |||
| user_inputs=inputs, | |||
| account=current_user, | |||
| datasource_type=datasource_type, | |||
| is_published=False, | |||
| ) | |||
| ) | |||
| ) | |||
| @@ -188,7 +188,7 @@ class WorkflowResponseConverter: | |||
| manager = PluginDatasourceManager() | |||
| provider_entity = manager.fetch_datasource_provider( | |||
| self._application_generate_entity.app_config.tenant_id, | |||
| f"{node_data.plugin_id}/{node_data.provider_name}" | |||
| f"{node_data.plugin_id}/{node_data.provider_name}", | |||
| ) | |||
| response.data.extras["icon"] = provider_entity.declaration.identity.icon | |||
| @@ -33,7 +33,7 @@ from core.workflow.repositories.workflow_execution_repository import WorkflowExe | |||
| from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository | |||
| from extensions.ext_database import db | |||
| from models import Account, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom | |||
| from models.dataset import Document, Pipeline | |||
| from models.dataset import Document, DocumentPipelineExecutionLog, Pipeline | |||
| from models.enums import WorkflowRunTriggeredFrom | |||
| from models.model import AppMode | |||
| from services.dataset_service import DocumentService | |||
| @@ -136,6 +136,16 @@ class PipelineGenerator(BaseAppGenerator): | |||
| document_id = None | |||
| if invoke_from == InvokeFrom.PUBLISHED: | |||
| document_id = documents[i].id | |||
| document_pipeline_execution_log = DocumentPipelineExecutionLog( | |||
| document_id=document_id, | |||
| datasource_type=datasource_type, | |||
| datasource_info=datasource_info, | |||
| input_data=inputs, | |||
| pipeline_id=pipeline.id, | |||
| created_by=user.id, | |||
| ) | |||
| db.session.add(document_pipeline_execution_log) | |||
| db.session.commit() | |||
| application_generate_entity = RagPipelineGenerateEntity( | |||
| task_id=str(uuid.uuid4()), | |||
| app_config=pipeline_config, | |||
| @@ -284,17 +284,20 @@ class WebSiteInfo(BaseModel): | |||
| """ | |||
| Website info | |||
| """ | |||
| status: Optional[str] = Field(..., description="crawl job status") | |||
| web_info_list: Optional[list[WebSiteInfoDetail]] = [] | |||
| total: Optional[int] = Field(default=0, description="The total number of websites") | |||
| completed: Optional[int] = Field(default=0, description="The number of completed websites") | |||
| class WebsiteCrawlMessage(BaseModel): | |||
| """ | |||
| Get website crawl response | |||
| """ | |||
| result: WebSiteInfo = WebSiteInfo(status="", web_info_list=[], total=0, completed=0) | |||
| class DatasourceMessage(ToolInvokeMessage): | |||
| pass | |||
| @@ -43,7 +43,6 @@ class WebsiteCrawlDatasourcePluginProviderController(DatasourcePluginProviderCon | |||
| if not datasource_entity: | |||
| raise ValueError(f"Datasource with name {datasource_name} not found") | |||
| return WebsiteCrawlDatasourcePlugin( | |||
| entity=datasource_entity, | |||
| runtime=DatasourceRuntime(tenant_id=self.tenant_id), | |||
| @@ -277,8 +277,7 @@ InNodeEvent = BaseNodeEvent | BaseParallelBranchEvent | BaseIterationEvent | Bas | |||
| class DatasourceRunEvent(BaseModel): | |||
| status: str = Field(..., description="status") | |||
| data: Mapping[str,Any] | list = Field(..., description="result") | |||
| data: Mapping[str, Any] | list = Field(..., description="result") | |||
| total: Optional[int] = Field(..., description="total") | |||
| completed: Optional[int] = Field(..., description="completed") | |||
| time_consuming: Optional[float] = Field(..., description="time consuming") | |||
| @@ -74,12 +74,12 @@ class DatasourceNode(BaseNode[DatasourceNodeData]): | |||
| except DatasourceNodeError as e: | |||
| yield RunCompletedEvent( | |||
| run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.FAILED, | |||
| inputs={}, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| error=f"Failed to get datasource runtime: {str(e)}", | |||
| error_type=type(e).__name__, | |||
| ) | |||
| status=WorkflowNodeExecutionStatus.FAILED, | |||
| inputs={}, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| error=f"Failed to get datasource runtime: {str(e)}", | |||
| error_type=type(e).__name__, | |||
| ) | |||
| ) | |||
| # get parameters | |||
| @@ -114,16 +114,17 @@ class DatasourceNode(BaseNode[DatasourceNodeData]): | |||
| ) | |||
| case DatasourceProviderType.WEBSITE_CRAWL: | |||
| yield RunCompletedEvent(run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.SUCCEEDED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| outputs={ | |||
| **datasource_info, | |||
| "datasource_type": datasource_type, | |||
| }, | |||
| )) | |||
| yield RunCompletedEvent( | |||
| run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.SUCCEEDED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| outputs={ | |||
| **datasource_info, | |||
| "datasource_type": datasource_type, | |||
| }, | |||
| ) | |||
| ) | |||
| case DatasourceProviderType.LOCAL_FILE: | |||
| related_id = datasource_info.get("related_id") | |||
| if not related_id: | |||
| @@ -155,33 +156,39 @@ class DatasourceNode(BaseNode[DatasourceNodeData]): | |||
| variable_key_list=new_key_list, | |||
| variable_value=value, | |||
| ) | |||
| yield RunCompletedEvent(run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.SUCCEEDED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| outputs={ | |||
| "file_info": datasource_info, | |||
| "datasource_type": datasource_type, | |||
| }, | |||
| )) | |||
| yield RunCompletedEvent( | |||
| run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.SUCCEEDED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| outputs={ | |||
| "file_info": datasource_info, | |||
| "datasource_type": datasource_type, | |||
| }, | |||
| ) | |||
| ) | |||
| case _: | |||
| raise DatasourceNodeError(f"Unsupported datasource provider: {datasource_type}") | |||
| except PluginDaemonClientSideError as e: | |||
| yield RunCompletedEvent(run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.FAILED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| error=f"Failed to transform datasource message: {str(e)}", | |||
| error_type=type(e).__name__, | |||
| )) | |||
| yield RunCompletedEvent( | |||
| run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.FAILED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| error=f"Failed to transform datasource message: {str(e)}", | |||
| error_type=type(e).__name__, | |||
| ) | |||
| ) | |||
| except DatasourceNodeError as e: | |||
| yield RunCompletedEvent(run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.FAILED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| error=f"Failed to invoke datasource: {str(e)}", | |||
| error_type=type(e).__name__, | |||
| )) | |||
| yield RunCompletedEvent( | |||
| run_result=NodeRunResult( | |||
| status=WorkflowNodeExecutionStatus.FAILED, | |||
| inputs=parameters_for_log, | |||
| metadata={WorkflowNodeExecutionMetadataKey.DATASOURCE_INFO: datasource_info}, | |||
| error=f"Failed to invoke datasource: {str(e)}", | |||
| error_type=type(e).__name__, | |||
| ) | |||
| ) | |||
| def _generate_parameters( | |||
| self, | |||
| @@ -286,8 +293,6 @@ class DatasourceNode(BaseNode[DatasourceNodeData]): | |||
| return result | |||
| def _transform_message( | |||
| self, | |||
| messages: Generator[DatasourceMessage, None, None], | |||
| @@ -123,10 +123,14 @@ class KnowledgeIndexNode(BaseNode[KnowledgeIndexNodeData]): | |||
| # update document status | |||
| document.indexing_status = "completed" | |||
| document.completed_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) | |||
| document.word_count = db.session.query(func.sum(DocumentSegment.word_count)).filter( | |||
| DocumentSegment.document_id == document.id, | |||
| DocumentSegment.dataset_id == dataset.id, | |||
| ).scalar() | |||
| document.word_count = ( | |||
| db.session.query(func.sum(DocumentSegment.word_count)) | |||
| .filter( | |||
| DocumentSegment.document_id == document.id, | |||
| DocumentSegment.dataset_id == dataset.id, | |||
| ) | |||
| .scalar() | |||
| ) | |||
| db.session.add(document) | |||
| # update document segment status | |||
| db.session.query(DocumentSegment).filter( | |||
| @@ -349,6 +349,7 @@ def _build_from_datasource_file( | |||
| storage_key=datasource_file.key, | |||
| ) | |||
| def _is_file_valid_with_config( | |||
| *, | |||
| input_file_type: str, | |||
| @@ -12,7 +12,7 @@ from sqlalchemy.dialects import postgresql | |||
| # revision identifiers, used by Alembic. | |||
| revision = 'b35c3db83d09' | |||
| down_revision = '2adcbe1f5dfb' | |||
| down_revision = '4474872b0ee6' | |||
| branch_labels = None | |||
| depends_on = None | |||
| @@ -0,0 +1,45 @@ | |||
| """add_pipeline_info_7 | |||
| Revision ID: 70a0fc0c013f | |||
| Revises: 224fba149d48 | |||
| Create Date: 2025-06-17 19:05:39.920953 | |||
| """ | |||
| from alembic import op | |||
| import models as models | |||
| import sqlalchemy as sa | |||
| # revision identifiers, used by Alembic. | |||
| revision = '70a0fc0c013f' | |||
| down_revision = '224fba149d48' | |||
| branch_labels = None | |||
| depends_on = None | |||
| def upgrade(): | |||
| # ### commands auto generated by Alembic - please adjust! ### | |||
| op.create_table('document_pipeline_execution_logs', | |||
| sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), | |||
| sa.Column('pipeline_id', models.types.StringUUID(), nullable=False), | |||
| sa.Column('document_id', models.types.StringUUID(), nullable=False), | |||
| sa.Column('datasource_type', sa.String(length=255), nullable=False), | |||
| sa.Column('datasource_info', sa.Text(), nullable=False), | |||
| sa.Column('input_data', sa.JSON(), nullable=False), | |||
| sa.Column('created_by', models.types.StringUUID(), nullable=True), | |||
| sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), | |||
| sa.PrimaryKeyConstraint('id', name='document_pipeline_execution_log_pkey') | |||
| ) | |||
| with op.batch_alter_table('document_pipeline_execution_logs', schema=None) as batch_op: | |||
| batch_op.create_index('document_pipeline_execution_logs_document_id_idx', ['document_id'], unique=False) | |||
| # ### end Alembic commands ### | |||
| def downgrade(): | |||
| # ### commands auto generated by Alembic - please adjust! ### | |||
| with op.batch_alter_table('document_pipeline_execution_logs', schema=None) as batch_op: | |||
| batch_op.drop_index('document_pipeline_execution_logs_document_id_idx') | |||
| op.drop_table('document_pipeline_execution_logs') | |||
| # ### end Alembic commands ### | |||
| @@ -75,12 +75,16 @@ class Dataset(Base): | |||
| @property | |||
| def total_available_documents(self): | |||
| return db.session.query(func.count(Document.id)).filter( | |||
| Document.dataset_id == self.id, | |||
| Document.indexing_status == "completed", | |||
| Document.enabled == True, | |||
| Document.archived == False, | |||
| ).scalar() | |||
| return ( | |||
| db.session.query(func.count(Document.id)) | |||
| .filter( | |||
| Document.dataset_id == self.id, | |||
| Document.indexing_status == "completed", | |||
| Document.enabled == True, | |||
| Document.archived == False, | |||
| ) | |||
| .scalar() | |||
| ) | |||
| @property | |||
| def dataset_keyword_table(self): | |||
| @@ -325,6 +329,7 @@ class DatasetProcessRule(Base): | |||
| except JSONDecodeError: | |||
| return None | |||
| class Document(Base): | |||
| __tablename__ = "documents" | |||
| __table_args__ = ( | |||
| @@ -1248,3 +1253,20 @@ class Pipeline(Base): # type: ignore[name-defined] | |||
| @property | |||
| def dataset(self): | |||
| return db.session.query(Dataset).filter(Dataset.pipeline_id == self.id).first() | |||
| class DocumentPipelineExecutionLog(Base): | |||
| __tablename__ = "document_pipeline_execution_logs" | |||
| __table_args__ = ( | |||
| db.PrimaryKeyConstraint("id", name="document_pipeline_execution_log_pkey"), | |||
| db.Index("document_pipeline_execution_logs_document_id_idx", "document_id"), | |||
| ) | |||
| id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) | |||
| pipeline_id = db.Column(StringUUID, nullable=False) | |||
| document_id = db.Column(StringUUID, nullable=False) | |||
| datasource_type = db.Column(db.String(255), nullable=False) | |||
| datasource_info = db.Column(db.Text, nullable=False) | |||
| input_data = db.Column(db.JSON, nullable=False) | |||
| created_by = db.Column(StringUUID, nullable=True) | |||
| created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) | |||
| @@ -334,11 +334,15 @@ class DatasetService: | |||
| dataset.retrieval_model = external_retrieval_model | |||
| dataset.name = data.get("name", dataset.name) | |||
| # check if dataset name is exists | |||
| if db.session.query(Dataset).filter( | |||
| Dataset.id != dataset_id, | |||
| Dataset.name == dataset.name, | |||
| Dataset.tenant_id == dataset.tenant_id, | |||
| ).first(): | |||
| if ( | |||
| db.session.query(Dataset) | |||
| .filter( | |||
| Dataset.id != dataset_id, | |||
| Dataset.name == dataset.name, | |||
| Dataset.tenant_id == dataset.tenant_id, | |||
| ) | |||
| .first() | |||
| ): | |||
| raise ValueError("Dataset name already exists") | |||
| dataset.description = data.get("description", "") | |||
| permission = data.get("permission") | |||
| @@ -36,7 +36,7 @@ class DatasourceProviderService: | |||
| user_id=current_user.id, | |||
| provider=provider, | |||
| plugin_id=plugin_id, | |||
| credentials=credentials | |||
| credentials=credentials, | |||
| ) | |||
| if credential_valid: | |||
| # Get all provider configurations of the current workspace | |||
| @@ -47,9 +47,8 @@ class DatasourceProviderService: | |||
| ) | |||
| provider_credential_secret_variables = self.extract_secret_variables( | |||
| tenant_id=tenant_id, | |||
| provider_id=f"{plugin_id}/{provider}" | |||
| ) | |||
| tenant_id=tenant_id, provider_id=f"{plugin_id}/{provider}" | |||
| ) | |||
| for key, value in credentials.items(): | |||
| if key in provider_credential_secret_variables: | |||
| # if send [__HIDDEN__] in secret input, it will be same as original value | |||
| @@ -73,9 +72,9 @@ class DatasourceProviderService: | |||
| :param credential_form_schemas: | |||
| :return: | |||
| """ | |||
| datasource_provider = self.provider_manager.fetch_datasource_provider(tenant_id=tenant_id, | |||
| provider_id=provider_id | |||
| ) | |||
| datasource_provider = self.provider_manager.fetch_datasource_provider( | |||
| tenant_id=tenant_id, provider_id=provider_id | |||
| ) | |||
| credential_form_schemas = datasource_provider.declaration.credentials_schema | |||
| secret_input_form_variables = [] | |||
| for credential_form_schema in credential_form_schemas: | |||
| @@ -108,8 +107,9 @@ class DatasourceProviderService: | |||
| for datasource_provider in datasource_providers: | |||
| encrypted_credentials = datasource_provider.encrypted_credentials | |||
| # Get provider credential secret variables | |||
| credential_secret_variables = self.extract_secret_variables(tenant_id=tenant_id, | |||
| provider_id=f"{plugin_id}/{provider}") | |||
| credential_secret_variables = self.extract_secret_variables( | |||
| tenant_id=tenant_id, provider_id=f"{plugin_id}/{provider}" | |||
| ) | |||
| # Obfuscate provider credentials | |||
| copy_credentials = encrypted_credentials.copy() | |||
| @@ -149,8 +149,9 @@ class DatasourceProviderService: | |||
| for datasource_provider in datasource_providers: | |||
| encrypted_credentials = datasource_provider.encrypted_credentials | |||
| # Get provider credential secret variables | |||
| credential_secret_variables = self.extract_secret_variables(tenant_id=tenant_id, | |||
| provider_id=f"{plugin_id}/{provider}") | |||
| credential_secret_variables = self.extract_secret_variables( | |||
| tenant_id=tenant_id, provider_id=f"{plugin_id}/{provider}" | |||
| ) | |||
| # Obfuscate provider credentials | |||
| copy_credentials = encrypted_credentials.copy() | |||
| @@ -166,18 +167,18 @@ class DatasourceProviderService: | |||
| return copy_credentials_list | |||
| def update_datasource_credentials(self, | |||
| tenant_id: str, | |||
| auth_id: str, | |||
| provider: str, | |||
| plugin_id: str, | |||
| credentials: dict) -> None: | |||
| def update_datasource_credentials( | |||
| self, tenant_id: str, auth_id: str, provider: str, plugin_id: str, credentials: dict | |||
| ) -> None: | |||
| """ | |||
| update datasource credentials. | |||
| """ | |||
| credential_valid = self.provider_manager.validate_provider_credentials( | |||
| tenant_id=tenant_id, user_id=current_user.id, provider=provider,plugin_id=plugin_id, credentials=credentials | |||
| tenant_id=tenant_id, | |||
| user_id=current_user.id, | |||
| provider=provider, | |||
| plugin_id=plugin_id, | |||
| credentials=credentials, | |||
| ) | |||
| if credential_valid: | |||
| # Get all provider configurations of the current workspace | |||
| @@ -188,9 +189,8 @@ class DatasourceProviderService: | |||
| ) | |||
| provider_credential_secret_variables = self.extract_secret_variables( | |||
| tenant_id=tenant_id, | |||
| provider_id=f"{plugin_id}/{provider}" | |||
| ) | |||
| tenant_id=tenant_id, provider_id=f"{plugin_id}/{provider}" | |||
| ) | |||
| if not datasource_provider: | |||
| raise ValueError("Datasource provider not found") | |||
| else: | |||
| @@ -66,7 +66,7 @@ class CustomizedPipelineTemplateRetrieval(PipelineTemplateRetrievalBase): | |||
| ) | |||
| if not pipeline_template: | |||
| return None | |||
| dsl_data = yaml.safe_load(pipeline_template.yaml_content) | |||
| graph_data = dsl_data.get("workflow", {}).get("graph", {}) | |||
| @@ -484,8 +484,13 @@ class RagPipelineService: | |||
| # raise ValueError(f"Unsupported datasource provider: {datasource_runtime.datasource_provider_type}") | |||
| def run_datasource_workflow_node( | |||
| self, pipeline: Pipeline, node_id: str, user_inputs: dict, account: Account, datasource_type: str, | |||
| is_published: bool | |||
| self, | |||
| pipeline: Pipeline, | |||
| node_id: str, | |||
| user_inputs: dict, | |||
| account: Account, | |||
| datasource_type: str, | |||
| is_published: bool, | |||
| ) -> Generator[str, None, None]: | |||
| """ | |||
| Run published workflow datasource | |||
| @@ -525,27 +530,26 @@ class RagPipelineService: | |||
| datasource_provider_service = DatasourceProviderService() | |||
| credentials = datasource_provider_service.get_real_datasource_credentials( | |||
| tenant_id=pipeline.tenant_id, | |||
| provider=datasource_node_data.get('provider_name'), | |||
| plugin_id=datasource_node_data.get('plugin_id'), | |||
| provider=datasource_node_data.get("provider_name"), | |||
| plugin_id=datasource_node_data.get("plugin_id"), | |||
| ) | |||
| if credentials: | |||
| datasource_runtime.runtime.credentials = credentials[0].get("credentials") | |||
| match datasource_type: | |||
| case DatasourceProviderType.ONLINE_DOCUMENT: | |||
| datasource_runtime = cast(OnlineDocumentDatasourcePlugin, datasource_runtime) | |||
| online_document_result: Generator[OnlineDocumentPagesMessage, None, None] =\ | |||
| online_document_result: Generator[OnlineDocumentPagesMessage, None, None] = ( | |||
| datasource_runtime.get_online_document_pages( | |||
| user_id=account.id, | |||
| datasource_parameters=user_inputs, | |||
| provider_type=datasource_runtime.datasource_provider_type(), | |||
| ) | |||
| ) | |||
| start_time = time.time() | |||
| for message in online_document_result: | |||
| end_time = time.time() | |||
| online_document_event = DatasourceRunEvent( | |||
| status="completed", | |||
| data=message.result, | |||
| time_consuming=round(end_time - start_time, 2) | |||
| status="completed", data=message.result, time_consuming=round(end_time - start_time, 2) | |||
| ) | |||
| yield json.dumps(online_document_event.model_dump()) | |||
| @@ -564,7 +568,7 @@ class RagPipelineService: | |||
| data=message.result.web_info_list, | |||
| total=message.result.total, | |||
| completed=message.result.completed, | |||
| time_consuming = round(end_time - start_time, 2) | |||
| time_consuming=round(end_time - start_time, 2), | |||
| ) | |||
| yield json.dumps(crawl_event.model_dump()) | |||
| case _: | |||
| @@ -781,9 +785,7 @@ class RagPipelineService: | |||
| raise ValueError("Datasource node data not found") | |||
| variables = datasource_node_data.get("variables", {}) | |||
| if variables: | |||
| variables_map = { | |||
| item["variable"]: item for item in variables | |||
| } | |||
| variables_map = {item["variable"]: item for item in variables} | |||
| else: | |||
| return [] | |||
| datasource_parameters = datasource_node_data.get("datasource_parameters", {}) | |||
| @@ -813,9 +815,7 @@ class RagPipelineService: | |||
| raise ValueError("Datasource node data not found") | |||
| variables = datasource_node_data.get("variables", {}) | |||
| if variables: | |||
| variables_map = { | |||
| item["variable"]: item for item in variables | |||
| } | |||
| variables_map = {item["variable"]: item for item in variables} | |||
| else: | |||
| return [] | |||
| datasource_parameters = datasource_node_data.get("datasource_parameters", {}) | |||
| @@ -967,11 +967,14 @@ class RagPipelineService: | |||
| if not dataset: | |||
| raise ValueError("Dataset not found") | |||
| max_position = db.session.query( | |||
| func.max(PipelineCustomizedTemplate.position)).filter( | |||
| PipelineCustomizedTemplate.tenant_id == pipeline.tenant_id).scalar() | |||
| max_position = ( | |||
| db.session.query(func.max(PipelineCustomizedTemplate.position)) | |||
| .filter(PipelineCustomizedTemplate.tenant_id == pipeline.tenant_id) | |||
| .scalar() | |||
| ) | |||
| from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService | |||
| dsl = RagPipelineDslService.export_rag_pipeline_dsl(pipeline=pipeline, include_secret=True) | |||
| pipeline_customized_template = PipelineCustomizedTemplate( | |||