Bläddra i källkod

Refactor: use logger = logging.getLogger(__name__) in logging (#24515)

Co-authored-by: Yongtao Huang <99629139+hyongtao-db@users.noreply.github.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
tags/1.8.0
Yongtao Huang 2 månader sedan
förälder
incheckning
fa753239ad
Inget konto är kopplat till bidragsgivarens mejladress
100 ändrade filer med 562 tillägg och 398 borttagningar
  1. 4
    2
      api/commands.py
  2. 7
    5
      api/controllers/console/app/audio.py
  3. 6
    4
      api/controllers/console/app/completion.py
  4. 3
    1
      api/controllers/console/app/message.py
  5. 5
    5
      api/controllers/console/app/workflow.py
  6. 4
    2
      api/controllers/console/auth/data_source_oauth.py
  7. 3
    1
      api/controllers/console/auth/oauth.py
  8. 3
    1
      api/controllers/console/datasets/datasets_document.py
  9. 3
    1
      api/controllers/console/datasets/hit_testing_base.py
  10. 6
    4
      api/controllers/console/explore/audio.py
  11. 6
    4
      api/controllers/console/explore/completion.py
  12. 4
    2
      api/controllers/console/explore/message.py
  13. 1
    1
      api/controllers/console/explore/workflow.py
  14. 4
    2
      api/controllers/console/version.py
  15. 4
    2
      api/controllers/console/workspace/models.py
  16. 4
    1
      api/controllers/console/workspace/workspace.py
  17. 6
    4
      api/controllers/service_api/app/audio.py
  18. 7
    4
      api/controllers/service_api/app/completion.py
  19. 4
    1
      api/controllers/service_api/app/message.py
  20. 2
    2
      api/controllers/service_api/app/workflow.py
  21. 3
    1
      api/controllers/web/app.py
  22. 6
    4
      api/controllers/web/audio.py
  23. 6
    4
      api/controllers/web/completion.py
  24. 4
    2
      api/controllers/web/message.py
  25. 1
    1
      api/controllers/web/workflow.py
  26. 3
    1
      api/core/app/apps/base_app_generate_response_converter.py
  27. 3
    1
      api/core/app/task_pipeline/message_cycle_manager.py
  28. 6
    4
      api/core/extension/extensible.py
  29. 3
    1
      api/core/helper/module_import_helper.py
  30. 4
    2
      api/core/helper/ssrf_proxy.py
  31. 7
    5
      api/core/indexing_runner.py
  32. 9
    7
      api/core/llm_generator/llm_generator.py
  33. 1
    1
      api/core/mcp/mcp_client.py
  34. 5
    2
      api/core/mcp/session/base_session.py
  35. 1
    1
      api/core/ops/aliyun_trace/aliyun_trace.py
  36. 5
    3
      api/core/ops/ops_trace_manager.py
  37. 3
    3
      api/core/plugin/impl/base.py
  38. 4
    2
      api/core/rag/datasource/vdb/myscale/myscale_vector.py
  39. 3
    1
      api/core/rag/datasource/vdb/pgvector/pgvector.py
  40. 10
    8
      api/core/rag/datasource/vdb/tablestore/tablestore_vector.py
  41. 3
    3
      api/core/rag/embedding/cached_embedding.py
  42. 3
    1
      api/core/rag/index_processor/processor/qa_index_processor.py
  43. 5
    3
      api/events/event_handlers/create_document_index.py
  44. 3
    1
      api/extensions/ext_mail.py
  45. 6
    4
      api/extensions/ext_otel.py
  46. 9
    9
      api/extensions/ext_request_logging.py
  47. 3
    1
      api/libs/helper.py
  48. 9
    7
      api/libs/sendgrid.py
  49. 5
    3
      api/libs/smtp.py
  50. 3
    1
      api/models/dataset.py
  51. 2
    2
      api/models/workflow.py
  52. 2
    2
      api/schedule/clean_messages.py
  53. 8
    8
      api/schedule/clean_workflow_runlogs_precise.py
  54. 5
    5
      api/schedule/mail_clean_document_notify_task.py
  55. 9
    7
      api/schedule/queue_monitor_task.py
  56. 9
    7
      api/services/account_service.py
  57. 3
    1
      api/services/app_service.py
  58. 16
    14
      api/services/dataset_service.py
  59. 4
    2
      api/services/hit_testing_service.py
  60. 7
    5
      api/services/metadata_service.py
  61. 2
    2
      api/services/vector_service.py
  62. 6
    6
      api/services/workflow_draft_variable_service.py
  63. 6
    4
      api/tasks/add_document_to_index_task.py
  64. 5
    3
      api/tasks/annotation/add_annotation_to_index_task.py
  65. 5
    3
      api/tasks/annotation/batch_import_annotations_task.py
  66. 6
    4
      api/tasks/annotation/delete_annotation_index_task.py
  67. 8
    6
      api/tasks/annotation/disable_annotation_reply_task.py
  68. 8
    6
      api/tasks/annotation/enable_annotation_reply_task.py
  69. 5
    3
      api/tasks/annotation/update_annotation_to_index_task.py
  70. 7
    5
      api/tasks/batch_clean_document_task.py
  71. 5
    3
      api/tasks/batch_create_segment_to_index_task.py
  72. 14
    12
      api/tasks/clean_dataset_task.py
  73. 7
    5
      api/tasks/clean_document_task.py
  74. 5
    5
      api/tasks/clean_notion_document_task.py
  75. 9
    7
      api/tasks/create_segment_to_index_task.py
  76. 5
    3
      api/tasks/deal_dataset_vector_index_task.py
  77. 5
    3
      api/tasks/delete_conversation_task.py
  78. 5
    3
      api/tasks/delete_segment_from_index_task.py
  79. 10
    8
      api/tasks/disable_segment_from_index_task.py
  80. 6
    4
      api/tasks/disable_segments_from_index_task.py
  81. 9
    7
      api/tasks/document_indexing_sync_task.py
  82. 7
    5
      api/tasks/document_indexing_task.py
  83. 9
    7
      api/tasks/document_indexing_update_task.py
  84. 7
    5
      api/tasks/duplicate_document_indexing_task.py
  85. 10
    8
      api/tasks/enable_segment_to_index_task.py
  86. 8
    6
      api/tasks/enable_segments_to_index_task.py
  87. 8
    6
      api/tasks/mail_account_deletion_task.py
  88. 8
    6
      api/tasks/mail_change_mail_task.py
  89. 5
    3
      api/tasks/mail_email_code_login.py
  90. 5
    3
      api/tasks/mail_inner_task.py
  91. 5
    5
      api/tasks/mail_invite_member_task.py
  92. 11
    9
      api/tasks/mail_owner_transfer_task.py
  93. 5
    3
      api/tasks/mail_reset_password_task.py
  94. 5
    3
      api/tasks/ops_trace_task.py
  95. 7
    5
      api/tasks/recover_document_indexing_task.py
  96. 13
    13
      api/tasks/remove_app_and_related_data_task.py
  97. 8
    8
      api/tasks/remove_document_from_index_task.py
  98. 9
    7
      api/tasks/retry_document_indexing_task.py
  99. 7
    5
      api/tasks/sync_website_document_indexing_task.py
  100. 0
    0
      api/tests/unit_tests/extensions/test_ext_request_logging.py

+ 4
- 2
api/commands.py Visa fil

from services.plugin.plugin_migration import PluginMigration from services.plugin.plugin_migration import PluginMigration
from tasks.remove_app_and_related_data_task import delete_draft_variables_batch from tasks.remove_app_and_related_data_task import delete_draft_variables_batch


logger = logging.getLogger(__name__)



@click.command("reset-password", help="Reset the account password.") @click.command("reset-password", help="Reset the account password.")
@click.option("--email", prompt=True, help="Account email to reset password for") @click.option("--email", prompt=True, help="Account email to reset password for")
click.echo(click.style("Database migration successful!", fg="green")) click.echo(click.style("Database migration successful!", fg="green"))


except Exception: except Exception:
logging.exception("Failed to execute database migration")
logger.exception("Failed to execute database migration")
finally: finally:
lock.release() lock.release()
else: else:
except Exception: except Exception:
failed_app_ids.append(app_id) failed_app_ids.append(app_id)
click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red")) click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red"))
logging.exception("Failed to fix app related site missing issue, app_id: %s", app_id)
logger.exception("Failed to fix app related site missing issue, app_id: %s", app_id)
continue continue


if not processed_count: if not processed_count:

+ 7
- 5
api/controllers/console/app/audio.py Visa fil

UnsupportedAudioTypeServiceError, UnsupportedAudioTypeServiceError,
) )


logger = logging.getLogger(__name__)



class ChatMessageAudioApi(Resource): class ChatMessageAudioApi(Resource):
@setup_required @setup_required


return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("Failed to handle post request to ChatMessageAudioApi")
logger.exception("Failed to handle post request to ChatMessageAudioApi")
raise InternalServerError() raise InternalServerError()




) )
return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("Failed to handle post request to ChatMessageTextApi")
logger.exception("Failed to handle post request to ChatMessageTextApi")
raise InternalServerError() raise InternalServerError()




except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("Failed to handle get request to TextModesApi")
logger.exception("Failed to handle get request to TextModesApi")
raise InternalServerError() raise InternalServerError()





+ 6
- 4
api/controllers/console/app/completion.py Visa fil

from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError from services.errors.llm import InvokeRateLimitError


logger = logging.getLogger(__name__)



# define completion message api for user # define completion message api for user
class CompletionMessageApi(Resource): class CompletionMessageApi(Resource):
except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 3
- 1
api/controllers/console/app/message.py Visa fil

from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
from services.message_service import MessageService from services.message_service import MessageService


logger = logging.getLogger(__name__)



class ChatMessageListApi(Resource): class ChatMessageListApi(Resource):
message_infinite_scroll_pagination_fields = { message_infinite_scroll_pagination_fields = {
except SuggestedQuestionsAfterAnswerDisabledError: except SuggestedQuestionsAfterAnswerDisabledError:
raise AppSuggestedQuestionsAfterAnswerDisabledError() raise AppSuggestedQuestionsAfterAnswerDisabledError()
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


return {"data": questions} return {"data": questions}

+ 5
- 5
api/controllers/console/app/workflow.py Visa fil

except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 4
- 2
api/controllers/console/auth/data_source_oauth.py Visa fil



from ..wraps import account_initialization_required, setup_required from ..wraps import account_initialization_required, setup_required


logger = logging.getLogger(__name__)



def get_oauth_providers(): def get_oauth_providers():
with current_app.app_context(): with current_app.app_context():
try: try:
oauth_provider.get_access_token(code) oauth_provider.get_access_token(code)
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
logging.exception(
logger.exception(
"An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text
) )
return {"error": "OAuth data source process failed"}, 400 return {"error": "OAuth data source process failed"}, 400
try: try:
oauth_provider.sync_data_source(binding_id) oauth_provider.sync_data_source(binding_id)
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
logging.exception(
logger.exception(
"An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text "An error occurred during the OAuthCallback process with %s: %s", provider, e.response.text
) )
return {"error": "OAuth data source process failed"}, 400 return {"error": "OAuth data source process failed"}, 400

+ 3
- 1
api/controllers/console/auth/oauth.py Visa fil



from .. import api from .. import api


logger = logging.getLogger(__name__)



def get_oauth_providers(): def get_oauth_providers():
with current_app.app_context(): with current_app.app_context():
user_info = oauth_provider.get_user_info(token) user_info = oauth_provider.get_user_info(token)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
error_text = e.response.text if e.response else str(e) error_text = e.response.text if e.response else str(e)
logging.exception("An error occurred during the OAuth process with %s: %s", provider, error_text)
logger.exception("An error occurred during the OAuth process with %s: %s", provider, error_text)
return {"error": "OAuth process failed"}, 400 return {"error": "OAuth process failed"}, 400


if invite_token and RegisterService.is_valid_invite_token(invite_token): if invite_token and RegisterService.is_valid_invite_token(invite_token):

+ 3
- 1
api/controllers/console/datasets/datasets_document.py Visa fil

from services.dataset_service import DatasetService, DocumentService from services.dataset_service import DatasetService, DocumentService
from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig


logger = logging.getLogger(__name__)



class DocumentResource(Resource): class DocumentResource(Resource):
def get_document(self, dataset_id: str, document_id: str) -> Document: def get_document(self, dataset_id: str, document_id: str) -> Document:
raise DocumentAlreadyFinishedError() raise DocumentAlreadyFinishedError()
retry_documents.append(document) retry_documents.append(document)
except Exception: except Exception:
logging.exception("Failed to retry document, document id: %s", document_id)
logger.exception("Failed to retry document, document id: %s", document_id)
continue continue
# retry document # retry document
DocumentService.retry_document(dataset_id, retry_documents) DocumentService.retry_document(dataset_id, retry_documents)

+ 3
- 1
api/controllers/console/datasets/hit_testing_base.py Visa fil

from services.dataset_service import DatasetService from services.dataset_service import DatasetService
from services.hit_testing_service import HitTestingService from services.hit_testing_service import HitTestingService


logger = logging.getLogger(__name__)



class DatasetsHitTestingBase: class DatasetsHitTestingBase:
@staticmethod @staticmethod
except ValueError as e: except ValueError as e:
raise ValueError(str(e)) raise ValueError(str(e))
except Exception as e: except Exception as e:
logging.exception("Hit testing failed.")
logger.exception("Hit testing failed.")
raise InternalServerError(str(e)) raise InternalServerError(str(e))

+ 6
- 4
api/controllers/console/explore/audio.py Visa fil

UnsupportedAudioTypeServiceError, UnsupportedAudioTypeServiceError,
) )


logger = logging.getLogger(__name__)



class ChatAudioApi(InstalledAppResource): class ChatAudioApi(InstalledAppResource):
def post(self, installed_app): def post(self, installed_app):


return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id) response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id)
return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

+ 6
- 4
api/controllers/console/explore/completion.py Visa fil

from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError from services.errors.llm import InvokeRateLimitError


logger = logging.getLogger(__name__)



# define completion api for user # define completion api for user
class CompletionApi(InstalledAppResource): class CompletionApi(InstalledAppResource):
except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 4
- 2
api/controllers/console/explore/message.py Visa fil

) )
from services.message_service import MessageService from services.message_service import MessageService


logger = logging.getLogger(__name__)



class MessageListApi(InstalledAppResource): class MessageListApi(InstalledAppResource):
@marshal_with(message_infinite_scroll_pagination_fields) @marshal_with(message_infinite_scroll_pagination_fields)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


return {"data": questions} return {"data": questions}

+ 1
- 1
api/controllers/console/explore/workflow.py Visa fil

except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 4
- 2
api/controllers/console/version.py Visa fil



from . import api from . import api


logger = logging.getLogger(__name__)



class VersionApi(Resource): class VersionApi(Resource):
def get(self): def get(self):
try: try:
response = requests.get(check_update_url, {"current_version": args.get("current_version")}, timeout=(3, 10)) response = requests.get(check_update_url, {"current_version": args.get("current_version")}, timeout=(3, 10))
except Exception as error: except Exception as error:
logging.warning("Check update version error: %s.", str(error))
logger.warning("Check update version error: %s.", str(error))
result["version"] = args.get("current_version") result["version"] = args.get("current_version")
return result return result


# Compare versions # Compare versions
return latest > current return latest > current
except version.InvalidVersion: except version.InvalidVersion:
logging.warning("Invalid version format: latest=%s, current=%s", latest_version, current_version)
logger.warning("Invalid version format: latest=%s, current=%s", latest_version, current_version)
return False return False





+ 4
- 2
api/controllers/console/workspace/models.py Visa fil

from services.model_load_balancing_service import ModelLoadBalancingService from services.model_load_balancing_service import ModelLoadBalancingService
from services.model_provider_service import ModelProviderService from services.model_provider_service import ModelProviderService


logger = logging.getLogger(__name__)



class DefaultModelApi(Resource): class DefaultModelApi(Resource):
@setup_required @setup_required
model=model_setting["model"], model=model_setting["model"],
) )
except Exception as ex: except Exception as ex:
logging.exception(
logger.exception(
"Failed to update default model, model type: %s, model: %s", "Failed to update default model, model type: %s, model: %s",
model_setting["model_type"], model_setting["model_type"],
model_setting.get("model"), model_setting.get("model"),
credential_name=args["name"], credential_name=args["name"],
) )
except CredentialsValidateFailedError as ex: except CredentialsValidateFailedError as ex:
logging.exception(
logger.exception(
"Failed to save model credentials, tenant_id: %s, model: %s, model_type: %s", "Failed to save model credentials, tenant_id: %s, model: %s, model_type: %s",
tenant_id, tenant_id,
args.get("model"), args.get("model"),

+ 4
- 1
api/controllers/console/workspace/workspace.py Visa fil

from services.file_service import FileService from services.file_service import FileService
from services.workspace_service import WorkspaceService from services.workspace_service import WorkspaceService


logger = logging.getLogger(__name__)


provider_fields = { provider_fields = {
"provider_name": fields.String, "provider_name": fields.String,
"provider_type": fields.String, "provider_type": fields.String,
@marshal_with(tenant_fields) @marshal_with(tenant_fields)
def get(self): def get(self):
if request.path == "/info": if request.path == "/info":
logging.warning("Deprecated URL /info was used.")
logger.warning("Deprecated URL /info was used.")


tenant = current_user.current_tenant tenant = current_user.current_tenant



+ 6
- 4
api/controllers/service_api/app/audio.py Visa fil

UnsupportedAudioTypeServiceError, UnsupportedAudioTypeServiceError,
) )


logger = logging.getLogger(__name__)



@service_api_ns.route("/audio-to-text") @service_api_ns.route("/audio-to-text")
class AudioApi(Resource): class AudioApi(Resource):


return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()






return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

+ 7
- 4
api/controllers/service_api/app/completion.py Visa fil

from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
from services.errors.llm import InvokeRateLimitError from services.errors.llm import InvokeRateLimitError


logger = logging.getLogger(__name__)


# Define parser for completion API # Define parser for completion API
completion_parser = reqparse.RequestParser() completion_parser = reqparse.RequestParser()
completion_parser.add_argument( completion_parser.add_argument(
except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 4
- 1
api/controllers/service_api/app/message.py Visa fil

) )
from services.message_service import MessageService from services.message_service import MessageService


logger = logging.getLogger(__name__)


# Define parsers for message APIs # Define parsers for message APIs
message_list_parser = reqparse.RequestParser() message_list_parser = reqparse.RequestParser()
message_list_parser.add_argument( message_list_parser.add_argument(
except SuggestedQuestionsAfterAnswerDisabledError: except SuggestedQuestionsAfterAnswerDisabledError:
raise BadRequest("Suggested Questions Is Disabled.") raise BadRequest("Suggested Questions Is Disabled.")
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


return {"result": "success", "data": questions} return {"result": "success", "data": questions}

+ 2
- 2
api/controllers/service_api/app/workflow.py Visa fil

except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 3
- 1
api/controllers/web/app.py Visa fil

from services.feature_service import FeatureService from services.feature_service import FeatureService
from services.webapp_auth_service import WebAppAuthService from services.webapp_auth_service import WebAppAuthService


logger = logging.getLogger(__name__)



class AppParameterApi(WebApiResource): class AppParameterApi(WebApiResource):
"""Resource for app variables.""" """Resource for app variables."""
except Unauthorized: except Unauthorized:
raise raise
except Exception: except Exception:
logging.exception("Unexpected error during auth verification")
logger.exception("Unexpected error during auth verification")
raise raise


features = FeatureService.get_system_features() features = FeatureService.get_system_features()

+ 6
- 4
api/controllers/web/audio.py Visa fil

UnsupportedAudioTypeServiceError, UnsupportedAudioTypeServiceError,
) )


logger = logging.getLogger(__name__)



class AudioApi(WebApiResource): class AudioApi(WebApiResource):
def post(self, app_model: App, end_user): def post(self, app_model: App, end_user):


return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("Failed to handle post request to AudioApi")
logger.exception("Failed to handle post request to AudioApi")
raise InternalServerError() raise InternalServerError()






return response return response
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except NoAudioUploadedServiceError: except NoAudioUploadedServiceError:
raise NoAudioUploadedError() raise NoAudioUploadedError()
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("Failed to handle post request to TextApi")
logger.exception("Failed to handle post request to TextApi")
raise InternalServerError() raise InternalServerError()





+ 6
- 4
api/controllers/web/completion.py Visa fil

from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError from services.errors.llm import InvokeRateLimitError


logger = logging.getLogger(__name__)



# define completion api for user # define completion api for user
class CompletionApi(WebApiResource): class CompletionApi(WebApiResource):
except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError: except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
logger.exception("App model config broken.")
raise AppUnavailableError() raise AppUnavailableError()
except ProviderTokenNotInitError as ex: except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description) raise ProviderNotInitializeError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception as e:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 4
- 2
api/controllers/web/message.py Visa fil

) )
from services.message_service import MessageService from services.message_service import MessageService


logger = logging.getLogger(__name__)



class MessageListApi(WebApiResource): class MessageListApi(WebApiResource):
message_fields = { message_fields = {
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()




except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


return {"data": questions} return {"data": questions}

+ 1
- 1
api/controllers/web/workflow.py Visa fil

except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
logging.exception("internal server error.")
logger.exception("internal server error.")
raise InternalServerError() raise InternalServerError()





+ 3
- 1
api/core/app/apps/base_app_generate_response_converter.py Visa fil

from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError from core.model_runtime.errors.invoke import InvokeError


logger = logging.getLogger(__name__)



class AppGenerateResponseConverter(ABC): class AppGenerateResponseConverter(ABC):
_blocking_response_type: type[AppBlockingResponse] _blocking_response_type: type[AppBlockingResponse]
if data: if data:
data.setdefault("message", getattr(e, "description", str(e))) data.setdefault("message", getattr(e, "description", str(e)))
else: else:
logging.error(e)
logger.error(e)
data = { data = {
"code": "internal_server_error", "code": "internal_server_error",
"message": "Internal Server Error, please contact support.", "message": "Internal Server Error, please contact support.",

+ 3
- 1
api/core/app/task_pipeline/message_cycle_manager.py Visa fil

from models.model import AppMode, Conversation, MessageAnnotation, MessageFile from models.model import AppMode, Conversation, MessageAnnotation, MessageFile
from services.annotation_service import AppAnnotationService from services.annotation_service import AppAnnotationService


logger = logging.getLogger(__name__)



class MessageCycleManager: class MessageCycleManager:
def __init__( def __init__(
conversation.name = name conversation.name = name
except Exception as e: except Exception as e:
if dify_config.DEBUG: if dify_config.DEBUG:
logging.exception("generate conversation name failed, conversation_id: %s", conversation_id)
logger.exception("generate conversation name failed, conversation_id: %s", conversation_id)
pass pass


db.session.merge(conversation) db.session.merge(conversation)

+ 6
- 4
api/core/extension/extensible.py Visa fil



from core.helper.position_helper import sort_to_dict_by_position_map from core.helper.position_helper import sort_to_dict_by_position_map


logger = logging.getLogger(__name__)



class ExtensionModule(enum.Enum): class ExtensionModule(enum.Enum):
MODERATION = "moderation" MODERATION = "moderation"


# Check for extension module file # Check for extension module file
if (extension_name + ".py") not in file_names: if (extension_name + ".py") not in file_names:
logging.warning("Missing %s.py file in %s, Skip.", extension_name, subdir_path)
logger.warning("Missing %s.py file in %s, Skip.", extension_name, subdir_path)
continue continue


# Check for builtin flag and position # Check for builtin flag and position
break break


if not extension_class: if not extension_class:
logging.warning("Missing subclass of %s in %s, Skip.", cls.__name__, module_name)
logger.warning("Missing subclass of %s in %s, Skip.", cls.__name__, module_name)
continue continue


# Load schema if not builtin # Load schema if not builtin
if not builtin: if not builtin:
json_path = os.path.join(subdir_path, "schema.json") json_path = os.path.join(subdir_path, "schema.json")
if not os.path.exists(json_path): if not os.path.exists(json_path):
logging.warning("Missing schema.json file in %s, Skip.", subdir_path)
logger.warning("Missing schema.json file in %s, Skip.", subdir_path)
continue continue


with open(json_path, encoding="utf-8") as f: with open(json_path, encoding="utf-8") as f:
) )


except Exception as e: except Exception as e:
logging.exception("Error scanning extensions")
logger.exception("Error scanning extensions")
raise raise


# Sort extensions by position # Sort extensions by position

+ 3
- 1
api/core/helper/module_import_helper.py Visa fil

from types import ModuleType from types import ModuleType
from typing import AnyStr from typing import AnyStr


logger = logging.getLogger(__name__)



def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_lazy_loader: bool = False) -> ModuleType: def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_lazy_loader: bool = False) -> ModuleType:
""" """
spec.loader.exec_module(module) spec.loader.exec_module(module)
return module return module
except Exception as e: except Exception as e:
logging.exception("Failed to load module %s from script file '%s'", module_name, repr(py_file_path))
logger.exception("Failed to load module %s from script file '%s'", module_name, repr(py_file_path))
raise e raise e





+ 4
- 2
api/core/helper/ssrf_proxy.py Visa fil



from configs import dify_config from configs import dify_config


logger = logging.getLogger(__name__)

SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES


HTTP_REQUEST_NODE_SSL_VERIFY = True # Default value for HTTP_REQUEST_NODE_SSL_VERIFY is True HTTP_REQUEST_NODE_SSL_VERIFY = True # Default value for HTTP_REQUEST_NODE_SSL_VERIFY is True
if response.status_code not in STATUS_FORCELIST: if response.status_code not in STATUS_FORCELIST:
return response return response
else: else:
logging.warning(
logger.warning(
"Received status code %s for URL %s which is in the force list", response.status_code, url "Received status code %s for URL %s which is in the force list", response.status_code, url
) )


except httpx.RequestError as e: except httpx.RequestError as e:
logging.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e)
logger.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e)
if max_retries == 0: if max_retries == 0:
raise raise



+ 7
- 5
api/core/indexing_runner.py Visa fil

from models.model import UploadFile from models.model import UploadFile
from services.feature_service import FeatureService from services.feature_service import FeatureService


logger = logging.getLogger(__name__)



class IndexingRunner: class IndexingRunner:
def __init__(self): def __init__(self):
dataset_document.stopped_at = naive_utc_now() dataset_document.stopped_at = naive_utc_now()
db.session.commit() db.session.commit()
except ObjectDeletedError: except ObjectDeletedError:
logging.warning("Document deleted, document id: %s", dataset_document.id)
logger.warning("Document deleted, document id: %s", dataset_document.id)
except Exception as e: except Exception as e:
logging.exception("consume document failed")
logger.exception("consume document failed")
dataset_document.indexing_status = "error" dataset_document.indexing_status = "error"
dataset_document.error = str(e) dataset_document.error = str(e)
dataset_document.stopped_at = naive_utc_now() dataset_document.stopped_at = naive_utc_now()
dataset_document.stopped_at = naive_utc_now() dataset_document.stopped_at = naive_utc_now()
db.session.commit() db.session.commit()
except Exception as e: except Exception as e:
logging.exception("consume document failed")
logger.exception("consume document failed")
dataset_document.indexing_status = "error" dataset_document.indexing_status = "error"
dataset_document.error = str(e) dataset_document.error = str(e)
dataset_document.stopped_at = naive_utc_now() dataset_document.stopped_at = naive_utc_now()
dataset_document.stopped_at = naive_utc_now() dataset_document.stopped_at = naive_utc_now()
db.session.commit() db.session.commit()
except Exception as e: except Exception as e:
logging.exception("consume document failed")
logger.exception("consume document failed")
dataset_document.indexing_status = "error" dataset_document.indexing_status = "error"
dataset_document.error = str(e) dataset_document.error = str(e)
dataset_document.stopped_at = naive_utc_now() dataset_document.stopped_at = naive_utc_now()
try: try:
storage.delete(image_file.key) storage.delete(image_file.key)
except Exception: except Exception:
logging.exception(
logger.exception(
"Delete image_files failed while indexing_estimate, \ "Delete image_files failed while indexing_estimate, \
image_upload_file_is: %s", image_upload_file_is: %s",
upload_file_id, upload_file_id,

+ 9
- 7
api/core/llm_generator/llm_generator.py Visa fil

from core.workflow.graph_engine.entities.event import AgentLogEvent from core.workflow.graph_engine.entities.event import AgentLogEvent
from models import App, Message, WorkflowNodeExecutionModel, db from models import App, Message, WorkflowNodeExecutionModel, db


logger = logging.getLogger(__name__)



class LLMGenerator: class LLMGenerator:
@classmethod @classmethod
result_dict = json.loads(cleaned_answer) result_dict = json.loads(cleaned_answer)
answer = result_dict["Your Output"] answer = result_dict["Your Output"]
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.exception("Failed to generate name after answer, use query instead")
logger.exception("Failed to generate name after answer, use query instead")
answer = query answer = query
name = answer.strip() name = answer.strip()


except InvokeError: except InvokeError:
questions = [] questions = []
except Exception: except Exception:
logging.exception("Failed to generate suggested questions after answer")
logger.exception("Failed to generate suggested questions after answer")
questions = [] questions = []


return questions return questions
error = str(e) error = str(e)
error_step = "generate rule config" error_step = "generate rule config"
except Exception as e: except Exception as e:
logging.exception("Failed to generate rule config, model: %s", model_config.get("name"))
logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
rule_config["error"] = str(e) rule_config["error"] = str(e)


rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
error_step = "generate conversation opener" error_step = "generate conversation opener"


except Exception as e: except Exception as e:
logging.exception("Failed to generate rule config, model: %s", model_config.get("name"))
logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
rule_config["error"] = str(e) rule_config["error"] = str(e)


rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
error = str(e) error = str(e)
return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"} return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"}
except Exception as e: except Exception as e:
logging.exception(
logger.exception(
"Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language "Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language
) )
return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"} return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"}
error = str(e) error = str(e)
return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"} return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"}
except Exception as e: except Exception as e:
logging.exception("Failed to invoke LLM model, model: %s", model_config.get("name"))
logger.exception("Failed to invoke LLM model, model: %s", model_config.get("name"))
return {"output": "", "error": f"An unexpected error occurred: {str(e)}"} return {"output": "", "error": f"An unexpected error occurred: {str(e)}"}


@staticmethod @staticmethod
error = str(e) error = str(e)
return {"error": f"Failed to generate code. Error: {error}"} return {"error": f"Failed to generate code. Error: {error}"}
except Exception as e: except Exception as e:
logging.exception("Failed to invoke LLM model, model: %s", model_config.get("name"), exc_info=e)
logger.exception("Failed to invoke LLM model, model: " + json.dumps(model_config.get("name")), exc_info=e)
return {"error": f"An unexpected error occurred: {str(e)}"} return {"error": f"An unexpected error occurred: {str(e)}"}

+ 1
- 1
api/core/mcp/mcp_client.py Visa fil

# ExitStack will handle proper cleanup of all managed context managers # ExitStack will handle proper cleanup of all managed context managers
self._exit_stack.close() self._exit_stack.close()
except Exception as e: except Exception as e:
logging.exception("Error during cleanup")
logger.exception("Error during cleanup")
raise ValueError(f"Error during cleanup: {e}") raise ValueError(f"Error during cleanup: {e}")
finally: finally:
self._session = None self._session = None

+ 5
- 2
api/core/mcp/session/base_session.py Visa fil

SessionMessage, SessionMessage,
) )


logger = logging.getLogger(__name__)


SendRequestT = TypeVar("SendRequestT", ClientRequest, ServerRequest) SendRequestT = TypeVar("SendRequestT", ClientRequest, ServerRequest)
SendResultT = TypeVar("SendResultT", ClientResult, ServerResult) SendResultT = TypeVar("SendResultT", ClientResult, ServerResult)
SendNotificationT = TypeVar("SendNotificationT", ClientNotification, ServerNotification) SendNotificationT = TypeVar("SendNotificationT", ClientNotification, ServerNotification)
self._handle_incoming(notification) self._handle_incoming(notification)
except Exception as e: except Exception as e:
# For other validation errors, log and continue # For other validation errors, log and continue
logging.warning("Failed to validate notification: %s. Message was: %s", e, message.message.root)
logger.warning("Failed to validate notification: %s. Message was: %s", e, message.message.root)
else: # Response or error else: # Response or error
response_queue = self._response_streams.get(message.message.root.id) response_queue = self._response_streams.get(message.message.root.id)
if response_queue is not None: if response_queue is not None:
except queue.Empty: except queue.Empty:
continue continue
except Exception: except Exception:
logging.exception("Error in message processing loop")
logger.exception("Error in message processing loop")
raise raise


def _received_request(self, responder: RequestResponder[ReceiveRequestT, SendResultT]) -> None: def _received_request(self, responder: RequestResponder[ReceiveRequestT, SendResultT]) -> None:

+ 1
- 1
api/core/ops/aliyun_trace/aliyun_trace.py Visa fil

node_span = self.build_workflow_task_span(trace_id, workflow_span_id, trace_info, node_execution) node_span = self.build_workflow_task_span(trace_id, workflow_span_id, trace_info, node_execution)
return node_span return node_span
except Exception as e: except Exception as e:
logging.debug("Error occurred in build_workflow_node_span: %s", e, exc_info=True)
logger.debug("Error occurred in build_workflow_node_span: %s", e, exc_info=True)
return None return None


def get_workflow_node_status(self, node_execution: WorkflowNodeExecution) -> Status: def get_workflow_node_status(self, node_execution: WorkflowNodeExecution) -> Status:

+ 5
- 3
api/core/ops/ops_trace_manager.py Visa fil

from models.workflow import WorkflowAppLog, WorkflowRun from models.workflow import WorkflowAppLog, WorkflowRun
from tasks.ops_trace_task import process_trace_tasks from tasks.ops_trace_task import process_trace_tasks


logger = logging.getLogger(__name__)



class OpsTraceProviderConfigMap(dict[str, dict[str, Any]]): class OpsTraceProviderConfigMap(dict[str, dict[str, Any]]):
def __getitem__(self, provider: str) -> dict[str, Any]: def __getitem__(self, provider: str) -> dict[str, Any]:
# create new tracing_instance and update the cache if it absent # create new tracing_instance and update the cache if it absent
tracing_instance = trace_instance(config_class(**decrypt_trace_config)) tracing_instance = trace_instance(config_class(**decrypt_trace_config))
cls.ops_trace_instances_cache[decrypt_trace_config_key] = tracing_instance cls.ops_trace_instances_cache[decrypt_trace_config_key] = tracing_instance
logging.info("new tracing_instance for app_id: %s", app_id)
logger.info("new tracing_instance for app_id: %s", app_id)
return tracing_instance return tracing_instance


@classmethod @classmethod
trace_task.app_id = self.app_id trace_task.app_id = self.app_id
trace_manager_queue.put(trace_task) trace_manager_queue.put(trace_task)
except Exception as e: except Exception as e:
logging.exception("Error adding trace task, trace_type %s", trace_task.trace_type)
logger.exception("Error adding trace task, trace_type %s", trace_task.trace_type)
finally: finally:
self.start_timer() self.start_timer()


if tasks: if tasks:
self.send_to_celery(tasks) self.send_to_celery(tasks)
except Exception as e: except Exception as e:
logging.exception("Error processing trace tasks")
logger.exception("Error processing trace tasks")


def start_timer(self): def start_timer(self):
global trace_manager_timer global trace_manager_timer

+ 3
- 3
api/core/plugin/impl/base.py Visa fil

response.raise_for_status() response.raise_for_status()
except HTTPError as e: except HTTPError as e:
msg = f"Failed to request plugin daemon, status: {e.response.status_code}, url: {path}" msg = f"Failed to request plugin daemon, status: {e.response.status_code}, url: {path}"
logging.exception(msg)
logger.exception(msg)
raise e raise e
except Exception as e: except Exception as e:
msg = f"Failed to request plugin daemon, url: {path}" msg = f"Failed to request plugin daemon, url: {path}"
logging.exception(msg)
logger.exception(msg)
raise ValueError(msg) from e raise ValueError(msg) from e


try: try:
f"Failed to parse response from plugin daemon to PluginDaemonBasicResponse [{str(type.__name__)}]," f"Failed to parse response from plugin daemon to PluginDaemonBasicResponse [{str(type.__name__)}],"
f" url: {path}" f" url: {path}"
) )
logging.exception(msg)
logger.exception(msg)
raise ValueError(msg) raise ValueError(msg)


if rep.code != 0: if rep.code != 0:

+ 4
- 2
api/core/rag/datasource/vdb/myscale/myscale_vector.py Visa fil

from core.rag.models.document import Document from core.rag.models.document import Document
from models.dataset import Dataset from models.dataset import Dataset


logger = logging.getLogger(__name__)



class MyScaleConfig(BaseModel): class MyScaleConfig(BaseModel):
host: str host: str
return self.add_texts(documents=texts, embeddings=embeddings, **kwargs) return self.add_texts(documents=texts, embeddings=embeddings, **kwargs)


def _create_collection(self, dimension: int): def _create_collection(self, dimension: int):
logging.info("create MyScale collection %s with dimension %s", self._collection_name, dimension)
logger.info("create MyScale collection %s with dimension %s", self._collection_name, dimension)
self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}") self._client.command(f"CREATE DATABASE IF NOT EXISTS {self._config.database}")
fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else "" fts_params = f"('{self._config.fts_params}')" if self._config.fts_params else ""
sql = f""" sql = f"""
for r in self._client.query(sql).named_results() for r in self._client.query(sql).named_results()
] ]
except Exception as e: except Exception as e:
logging.exception("\033[91m\033[1m%s\033[0m \033[95m%s\033[0m", type(e), str(e)) # noqa:TRY401
logger.exception("\033[91m\033[1m%s\033[0m \033[95m%s\033[0m", type(e), str(e)) # noqa:TRY401
return [] return []


def delete(self) -> None: def delete(self) -> None:

+ 3
- 1
api/core/rag/datasource/vdb/pgvector/pgvector.py Visa fil

from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset


logger = logging.getLogger(__name__)



class PGVectorConfig(BaseModel): class PGVectorConfig(BaseModel):
host: str host: str
cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),)) cur.execute(f"DELETE FROM {self.table_name} WHERE id IN %s", (tuple(ids),))
except psycopg2.errors.UndefinedTable: except psycopg2.errors.UndefinedTable:
# table not exists # table not exists
logging.warning("Table %s not found, skipping delete operation.", self.table_name)
logger.warning("Table %s not found, skipping delete operation.", self.table_name)
return return
except Exception as e: except Exception as e:
raise e raise e

+ 10
- 8
api/core/rag/datasource/vdb/tablestore/tablestore_vector.py Visa fil

from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models import Dataset from models import Dataset


logger = logging.getLogger(__name__)



class TableStoreConfig(BaseModel): class TableStoreConfig(BaseModel):
access_key_id: Optional[str] = None access_key_id: Optional[str] = None
with redis_client.lock(lock_name, timeout=20): with redis_client.lock(lock_name, timeout=20):
collection_exist_cache_key = f"vector_indexing_{self._collection_name}" collection_exist_cache_key = f"vector_indexing_{self._collection_name}"
if redis_client.get(collection_exist_cache_key): if redis_client.get(collection_exist_cache_key):
logging.info("Collection %s already exists.", self._collection_name)
logger.info("Collection %s already exists.", self._collection_name)
return return


self._create_table_if_not_exist() self._create_table_if_not_exist()
def _create_table_if_not_exist(self) -> None: def _create_table_if_not_exist(self) -> None:
table_list = self._tablestore_client.list_table() table_list = self._tablestore_client.list_table()
if self._table_name in table_list: if self._table_name in table_list:
logging.info("Tablestore system table[%s] already exists", self._table_name)
logger.info("Tablestore system table[%s] already exists", self._table_name)
return None return None


schema_of_primary_key = [("id", "STRING")] schema_of_primary_key = [("id", "STRING")]
table_options = tablestore.TableOptions() table_options = tablestore.TableOptions()
reserved_throughput = tablestore.ReservedThroughput(tablestore.CapacityUnit(0, 0)) reserved_throughput = tablestore.ReservedThroughput(tablestore.CapacityUnit(0, 0))
self._tablestore_client.create_table(table_meta, table_options, reserved_throughput) self._tablestore_client.create_table(table_meta, table_options, reserved_throughput)
logging.info("Tablestore create table[%s] successfully.", self._table_name)
logger.info("Tablestore create table[%s] successfully.", self._table_name)


def _create_search_index_if_not_exist(self, dimension: int) -> None: def _create_search_index_if_not_exist(self, dimension: int) -> None:
search_index_list = self._tablestore_client.list_search_index(table_name=self._table_name) search_index_list = self._tablestore_client.list_search_index(table_name=self._table_name)
if self._index_name in [t[1] for t in search_index_list]: if self._index_name in [t[1] for t in search_index_list]:
logging.info("Tablestore system index[%s] already exists", self._index_name)
logger.info("Tablestore system index[%s] already exists", self._index_name)
return None return None


field_schemas = [ field_schemas = [


index_meta = tablestore.SearchIndexMeta(field_schemas) index_meta = tablestore.SearchIndexMeta(field_schemas)
self._tablestore_client.create_search_index(self._table_name, self._index_name, index_meta) self._tablestore_client.create_search_index(self._table_name, self._index_name, index_meta)
logging.info("Tablestore create system index[%s] successfully.", self._index_name)
logger.info("Tablestore create system index[%s] successfully.", self._index_name)


def _delete_table_if_exist(self): def _delete_table_if_exist(self):
search_index_list = self._tablestore_client.list_search_index(table_name=self._table_name) search_index_list = self._tablestore_client.list_search_index(table_name=self._table_name)
for resp_tuple in search_index_list: for resp_tuple in search_index_list:
self._tablestore_client.delete_search_index(resp_tuple[0], resp_tuple[1]) self._tablestore_client.delete_search_index(resp_tuple[0], resp_tuple[1])
logging.info("Tablestore delete index[%s] successfully.", self._index_name)
logger.info("Tablestore delete index[%s] successfully.", self._index_name)


self._tablestore_client.delete_table(self._table_name) self._tablestore_client.delete_table(self._table_name)
logging.info("Tablestore delete system table[%s] successfully.", self._index_name)
logger.info("Tablestore delete system table[%s] successfully.", self._index_name)


def _delete_search_index(self) -> None: def _delete_search_index(self) -> None:
self._tablestore_client.delete_search_index(self._table_name, self._index_name) self._tablestore_client.delete_search_index(self._table_name, self._index_name)
logging.info("Tablestore delete index[%s] successfully.", self._index_name)
logger.info("Tablestore delete index[%s] successfully.", self._index_name)


def _write_row(self, primary_key: str, attributes: dict[str, Any]) -> None: def _write_row(self, primary_key: str, attributes: dict[str, Any]) -> None:
pk = [("id", primary_key)] pk = [("id", primary_key)]

+ 3
- 3
api/core/rag/embedding/cached_embedding.py Visa fil

except IntegrityError: except IntegrityError:
db.session.rollback() db.session.rollback()
except Exception: except Exception:
logging.exception("Failed transform embedding")
logger.exception("Failed transform embedding")
cache_embeddings = [] cache_embeddings = []
try: try:
for i, n_embedding in zip(embedding_queue_indices, embedding_queue_embeddings): for i, n_embedding in zip(embedding_queue_indices, embedding_queue_embeddings):
raise ValueError("Normalized embedding is nan please try again") raise ValueError("Normalized embedding is nan please try again")
except Exception as ex: except Exception as ex:
if dify_config.DEBUG: if dify_config.DEBUG:
logging.exception("Failed to embed query text '%s...(%s chars)'", text[:10], len(text))
logger.exception("Failed to embed query text '%s...(%s chars)'", text[:10], len(text))
raise ex raise ex


try: try:
redis_client.setex(embedding_cache_key, 600, encoded_str) redis_client.setex(embedding_cache_key, 600, encoded_str)
except Exception as ex: except Exception as ex:
if dify_config.DEBUG: if dify_config.DEBUG:
logging.exception(
logger.exception(
"Failed to add embedding to redis for the text '%s...(%s chars)'", text[:10], len(text) "Failed to add embedding to redis for the text '%s...(%s chars)'", text[:10], len(text)
) )
raise ex raise ex

+ 3
- 1
api/core/rag/index_processor/processor/qa_index_processor.py Visa fil

from models.dataset import Dataset from models.dataset import Dataset
from services.entities.knowledge_entities.knowledge_entities import Rule from services.entities.knowledge_entities.knowledge_entities import Rule


logger = logging.getLogger(__name__)



class QAIndexProcessor(BaseIndexProcessor): class QAIndexProcessor(BaseIndexProcessor):
def extract(self, extract_setting: ExtractSetting, **kwargs) -> list[Document]: def extract(self, extract_setting: ExtractSetting, **kwargs) -> list[Document]:
qa_documents.append(qa_document) qa_documents.append(qa_document)
format_documents.extend(qa_documents) format_documents.extend(qa_documents)
except Exception as e: except Exception as e:
logging.exception("Failed to format qa document")
logger.exception("Failed to format qa document")


all_qa_documents.extend(format_documents) all_qa_documents.extend(format_documents)



+ 5
- 3
api/events/event_handlers/create_document_index.py Visa fil

from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models.dataset import Document from models.dataset import Document


logger = logging.getLogger(__name__)



@document_index_created.connect @document_index_created.connect
def handle(sender, **kwargs): def handle(sender, **kwargs):
documents = [] documents = []
start_at = time.perf_counter() start_at = time.perf_counter()
for document_id in document_ids: for document_id in document_ids:
logging.info(click.style(f"Start process document: {document_id}", fg="green"))
logger.info(click.style(f"Start process document: {document_id}", fg="green"))


document = ( document = (
db.session.query(Document) db.session.query(Document)
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
indexing_runner.run(documents) indexing_runner.run(documents)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
except DocumentIsPausedError as ex: except DocumentIsPausedError as ex:
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))

+ 3
- 1
api/extensions/ext_mail.py Visa fil

from configs import dify_config from configs import dify_config
from dify_app import DifyApp from dify_app import DifyApp


logger = logging.getLogger(__name__)



class Mail: class Mail:
def __init__(self): def __init__(self):
def init_app(self, app: Flask): def init_app(self, app: Flask):
mail_type = dify_config.MAIL_TYPE mail_type = dify_config.MAIL_TYPE
if not mail_type: if not mail_type:
logging.warning("MAIL_TYPE is not set")
logger.warning("MAIL_TYPE is not set")
return return


if dify_config.MAIL_DEFAULT_SEND_FROM: if dify_config.MAIL_DEFAULT_SEND_FROM:

+ 6
- 4
api/extensions/ext_otel.py Visa fil

from libs.helper import extract_tenant_id from libs.helper import extract_tenant_id
from models import Account, EndUser from models import Account, EndUser


logger = logging.getLogger(__name__)



@user_logged_in.connect @user_logged_in.connect
@user_loaded_from_request.connect @user_loaded_from_request.connect
current_span.set_attribute("service.tenant.id", tenant_id) current_span.set_attribute("service.tenant.id", tenant_id)
current_span.set_attribute("service.user.id", user.id) current_span.set_attribute("service.user.id", user.id)
except Exception: except Exception:
logging.exception("Error setting tenant and user attributes")
logger.exception("Error setting tenant and user attributes")
pass pass




attributes[SpanAttributes.HTTP_METHOD] = str(request.method) attributes[SpanAttributes.HTTP_METHOD] = str(request.method)
_http_response_counter.add(1, attributes) _http_response_counter.add(1, attributes)
except Exception: except Exception:
logging.exception("Error setting status and attributes")
logger.exception("Error setting status and attributes")
pass pass


instrumentor = FlaskInstrumentor() instrumentor = FlaskInstrumentor()
if dify_config.DEBUG: if dify_config.DEBUG:
logging.info("Initializing Flask instrumentor")
logger.info("Initializing Flask instrumentor")
instrumentor.instrument_app(app, response_hook=response_hook) instrumentor.instrument_app(app, response_hook=response_hook)


def init_sqlalchemy_instrumentor(app: DifyApp): def init_sqlalchemy_instrumentor(app: DifyApp):
tracer_provider = get_tracer_provider() tracer_provider = get_tracer_provider()
metric_provider = get_meter_provider() metric_provider = get_meter_provider()
if dify_config.DEBUG: if dify_config.DEBUG:
logging.info("Initializing OpenTelemetry for Celery worker")
logger.info("Initializing OpenTelemetry for Celery worker")
CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument() CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument()

+ 9
- 9
api/extensions/ext_request_logging.py Visa fil



from configs import dify_config from configs import dify_config


_logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)




def _is_content_type_json(content_type: str) -> bool: def _is_content_type_json(content_type: str) -> bool:


def _log_request_started(_sender, **_extra): def _log_request_started(_sender, **_extra):
"""Log the start of a request.""" """Log the start of a request."""
if not _logger.isEnabledFor(logging.DEBUG):
if not logger.isEnabledFor(logging.DEBUG):
return return


request = flask.request request = flask.request
if not (_is_content_type_json(request.content_type) and request.data): if not (_is_content_type_json(request.content_type) and request.data):
_logger.debug("Received Request %s -> %s", request.method, request.path)
logger.debug("Received Request %s -> %s", request.method, request.path)
return return
try: try:
json_data = json.loads(request.data) json_data = json.loads(request.data)
except (TypeError, ValueError): except (TypeError, ValueError):
_logger.exception("Failed to parse JSON request")
logger.exception("Failed to parse JSON request")
return return
formatted_json = json.dumps(json_data, ensure_ascii=False, indent=2) formatted_json = json.dumps(json_data, ensure_ascii=False, indent=2)
_logger.debug(
logger.debug(
"Received Request %s -> %s, Request Body:\n%s", "Received Request %s -> %s, Request Body:\n%s",
request.method, request.method,
request.path, request.path,


def _log_request_finished(_sender, response, **_extra): def _log_request_finished(_sender, response, **_extra):
"""Log the end of a request.""" """Log the end of a request."""
if not _logger.isEnabledFor(logging.DEBUG) or response is None:
if not logger.isEnabledFor(logging.DEBUG) or response is None:
return return


if not _is_content_type_json(response.content_type): if not _is_content_type_json(response.content_type):
_logger.debug("Response %s %s", response.status, response.content_type)
logger.debug("Response %s %s", response.status, response.content_type)
return return


response_data = response.get_data(as_text=True) response_data = response.get_data(as_text=True)
try: try:
json_data = json.loads(response_data) json_data = json.loads(response_data)
except (TypeError, ValueError): except (TypeError, ValueError):
_logger.exception("Failed to parse JSON response")
logger.exception("Failed to parse JSON response")
return return
formatted_json = json.dumps(json_data, ensure_ascii=False, indent=2) formatted_json = json.dumps(json_data, ensure_ascii=False, indent=2)
_logger.debug(
logger.debug(
"Response %s %s, Response Body:\n%s", "Response %s %s, Response Body:\n%s",
response.status, response.status,
response.content_type, response.content_type,

+ 3
- 1
api/libs/helper.py Visa fil

from models.account import Account from models.account import Account
from models.model import EndUser from models.model import EndUser


logger = logging.getLogger(__name__)



def extract_tenant_id(user: Union["Account", "EndUser"]) -> str | None: def extract_tenant_id(user: Union["Account", "EndUser"]) -> str | None:
""" """
key = cls._get_token_key(token, token_type) key = cls._get_token_key(token, token_type)
token_data_json = redis_client.get(key) token_data_json = redis_client.get(key)
if token_data_json is None: if token_data_json is None:
logging.warning("%s token %s not found with key %s", token_type, token, key)
logger.warning("%s token %s not found with key %s", token_type, token, key)
return None return None
token_data: Optional[dict[str, Any]] = json.loads(token_data_json) token_data: Optional[dict[str, Any]] = json.loads(token_data_json)
return token_data return token_data

+ 9
- 7
api/libs/sendgrid.py Visa fil

from python_http_client.exceptions import ForbiddenError, UnauthorizedError from python_http_client.exceptions import ForbiddenError, UnauthorizedError
from sendgrid.helpers.mail import Content, Email, Mail, To # type: ignore from sendgrid.helpers.mail import Content, Email, Mail, To # type: ignore


logger = logging.getLogger(__name__)



class SendGridClient: class SendGridClient:
def __init__(self, sendgrid_api_key: str, _from: str): def __init__(self, sendgrid_api_key: str, _from: str):
self._from = _from self._from = _from


def send(self, mail: dict): def send(self, mail: dict):
logging.debug("Sending email with SendGrid")
logger.debug("Sending email with SendGrid")


try: try:
_to = mail["to"] _to = mail["to"]
mail = Mail(from_email, to_email, subject, content) mail = Mail(from_email, to_email, subject, content)
mail_json = mail.get() # type: ignore mail_json = mail.get() # type: ignore
response = sg.client.mail.send.post(request_body=mail_json) response = sg.client.mail.send.post(request_body=mail_json)
logging.debug(response.status_code)
logging.debug(response.body)
logging.debug(response.headers)
logger.debug(response.status_code)
logger.debug(response.body)
logger.debug(response.headers)


except TimeoutError as e: except TimeoutError as e:
logging.exception("SendGridClient Timeout occurred while sending email")
logger.exception("SendGridClient Timeout occurred while sending email")
raise raise
except (UnauthorizedError, ForbiddenError) as e: except (UnauthorizedError, ForbiddenError) as e:
logging.exception(
logger.exception(
"SendGridClient Authentication failed. " "SendGridClient Authentication failed. "
"Verify that your credentials and the 'from' email address are correct" "Verify that your credentials and the 'from' email address are correct"
) )
raise raise
except Exception as e: except Exception as e:
logging.exception("SendGridClient Unexpected error occurred while sending email to %s", _to)
logger.exception("SendGridClient Unexpected error occurred while sending email to %s", _to)
raise raise

+ 5
- 3
api/libs/smtp.py Visa fil

from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText


logger = logging.getLogger(__name__)



class SMTPClient: class SMTPClient:
def __init__( def __init__(


smtp.sendmail(self._from, mail["to"], msg.as_string()) smtp.sendmail(self._from, mail["to"], msg.as_string())
except smtplib.SMTPException as e: except smtplib.SMTPException as e:
logging.exception("SMTP error occurred")
logger.exception("SMTP error occurred")
raise raise
except TimeoutError as e: except TimeoutError as e:
logging.exception("Timeout occurred while sending email")
logger.exception("Timeout occurred while sending email")
raise raise
except Exception as e: except Exception as e:
logging.exception("Unexpected error occurred while sending email to %s", mail["to"])
logger.exception("Unexpected error occurred while sending email to %s", mail["to"])
raise raise
finally: finally:
if smtp: if smtp:

+ 3
- 1
api/models/dataset.py Visa fil

from .model import App, Tag, TagBinding, UploadFile from .model import App, Tag, TagBinding, UploadFile
from .types import StringUUID from .types import StringUUID


logger = logging.getLogger(__name__)



class DatasetPermissionEnum(enum.StrEnum): class DatasetPermissionEnum(enum.StrEnum):
ONLY_ME = "only_me" ONLY_ME = "only_me"
return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder) return json.loads(keyword_table_text.decode("utf-8"), cls=SetDecoder)
return None return None
except Exception as e: except Exception as e:
logging.exception("Failed to load keyword table from file: %s", file_key)
logger.exception("Failed to load keyword table from file: %s", file_key)
return None return None





+ 2
- 2
api/models/workflow.py Visa fil

from .enums import CreatorUserRole, DraftVariableType from .enums import CreatorUserRole, DraftVariableType
from .types import EnumText, StringUUID from .types import EnumText, StringUUID


_logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)




class WorkflowType(Enum): class WorkflowType(Enum):
def get_selector(self) -> list[str]: def get_selector(self) -> list[str]:
selector = json.loads(self.selector) selector = json.loads(self.selector)
if not isinstance(selector, list): if not isinstance(selector, list):
_logger.error(
logger.error(
"invalid selector loaded from database, type=%s, value=%s", "invalid selector loaded from database, type=%s, value=%s",
type(selector), type(selector),
self.selector, self.selector,

+ 2
- 2
api/schedule/clean_messages.py Visa fil

from models.web import SavedMessage from models.web import SavedMessage
from services.feature_service import FeatureService from services.feature_service import FeatureService


_logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)




@app.celery.task(queue="dataset") @app.celery.task(queue="dataset")
plan_sandbox_clean_message_day = message.created_at plan_sandbox_clean_message_day = message.created_at
app = db.session.query(App).filter_by(id=message.app_id).first() app = db.session.query(App).filter_by(id=message.app_id).first()
if not app: if not app:
_logger.warning(
logger.warning(
"Expected App record to exist, but none was found, app_id=%s, message_id=%s", "Expected App record to exist, but none was found, app_id=%s, message_id=%s",
message.app_id, message.app_id,
message.id, message.id,

+ 8
- 8
api/schedule/clean_workflow_runlogs_precise.py Visa fil

) )
from models.workflow import ConversationVariable, WorkflowAppLog, WorkflowNodeExecutionModel, WorkflowRun from models.workflow import ConversationVariable, WorkflowAppLog, WorkflowNodeExecutionModel, WorkflowRun


_logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)




MAX_RETRIES = 3 MAX_RETRIES = 3
try: try:
total_workflow_runs = db.session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count() total_workflow_runs = db.session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count()
if total_workflow_runs == 0: if total_workflow_runs == 0:
_logger.info("No expired workflow run logs found")
logger.info("No expired workflow run logs found")
return return
_logger.info("Found %s expired workflow run logs to clean", total_workflow_runs)
logger.info("Found %s expired workflow run logs to clean", total_workflow_runs)


total_deleted = 0 total_deleted = 0
failed_batches = 0 failed_batches = 0
else: else:
failed_batches += 1 failed_batches += 1
if failed_batches >= MAX_RETRIES: if failed_batches >= MAX_RETRIES:
_logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
break break
else: else:
# Calculate incremental delay times: 5, 10, 15 minutes # Calculate incremental delay times: 5, 10, 15 minutes
retry_delay_minutes = failed_batches * 5 retry_delay_minutes = failed_batches * 5
_logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
time.sleep(retry_delay_minutes * 60) time.sleep(retry_delay_minutes * 60)
continue continue


_logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted)
logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted)


except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
_logger.exception("Unexpected error in workflow log cleanup")
logger.exception("Unexpected error in workflow log cleanup")
raise raise


end_at = time.perf_counter() end_at = time.perf_counter()


except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
_logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1)
logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1)
return False return False

+ 5
- 5
api/schedule/mail_clean_document_notify_task.py Visa fil

from models.dataset import Dataset, DatasetAutoDisableLog from models.dataset import Dataset, DatasetAutoDisableLog
from services.feature_service import FeatureService from services.feature_service import FeatureService


logger = logging.getLogger(__name__)



@app.celery.task(queue="dataset") @app.celery.task(queue="dataset")
def mail_clean_document_notify_task(): def mail_clean_document_notify_task():
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style("Start send document clean notify mail", fg="green"))
logger.info(click.style("Start send document clean notify mail", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


# send document clean notify mail # send document clean notify mail
dataset_auto_disable_log.notified = True dataset_auto_disable_log.notified = True
db.session.commit() db.session.commit()
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
click.style(f"Send document clean notify mail succeeded: latency: {end_at - start_at}", fg="green")
)
logger.info(click.style(f"Send document clean notify mail succeeded: latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("Send document clean notify mail failed")
logger.exception("Send document clean notify mail failed")

+ 9
- 7
api/schedule/queue_monitor_task.py Visa fil

db=int(redis_config.get("virtual_host")) if redis_config.get("virtual_host") else 1, db=int(redis_config.get("virtual_host")) if redis_config.get("virtual_host") else 1,
) )


logger = logging.getLogger(__name__)



@app.celery.task(queue="monitor") @app.celery.task(queue="monitor")
def queue_monitor_task(): def queue_monitor_task():
threshold = dify_config.QUEUE_MONITOR_THRESHOLD threshold = dify_config.QUEUE_MONITOR_THRESHOLD


if threshold is None: if threshold is None:
logging.warning(click.style("QUEUE_MONITOR_THRESHOLD is not configured, skipping monitoring", fg="yellow"))
logger.warning(click.style("QUEUE_MONITOR_THRESHOLD is not configured, skipping monitoring", fg="yellow"))
return return


try: try:
queue_length = celery_redis.llen(f"{queue_name}") queue_length = celery_redis.llen(f"{queue_name}")
logging.info(click.style(f"Start monitor {queue_name}", fg="green"))
logger.info(click.style(f"Start monitor {queue_name}", fg="green"))


if queue_length is None: if queue_length is None:
logging.error(
logger.error(
click.style(f"Failed to get queue length for {queue_name} - Redis may be unavailable", fg="red") click.style(f"Failed to get queue length for {queue_name} - Redis may be unavailable", fg="red")
) )
return return


logging.info(click.style(f"Queue length: {queue_length}", fg="green"))
logger.info(click.style(f"Queue length: {queue_length}", fg="green"))


if queue_length >= threshold: if queue_length >= threshold:
warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}" warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}"
logging.warning(click.style(warning_msg, fg="red"))
logger.warning(click.style(warning_msg, fg="red"))
alter_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS alter_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS
if alter_emails: if alter_emails:
to_list = alter_emails.split(",") to_list = alter_emails.split(",")
}, },
) )
except Exception as e: except Exception as e:
logging.exception(click.style("Exception occurred during sending email", fg="red"))
logger.exception(click.style("Exception occurred during sending email", fg="red"))


except Exception as e: except Exception as e:
logging.exception(click.style("Exception occurred during queue monitoring", fg="red"))
logger.exception(click.style("Exception occurred during queue monitoring", fg="red"))
finally: finally:
if db.session.is_active: if db.session.is_active:
db.session.close() db.session.close()

+ 9
- 7
api/services/account_service.py Visa fil

) )
from tasks.mail_reset_password_task import send_reset_password_mail_task from tasks.mail_reset_password_task import send_reset_password_mail_task


logger = logging.getLogger(__name__)



class TokenPair(BaseModel): class TokenPair(BaseModel):
access_token: str access_token: str
db.session.add(account_integrate) db.session.add(account_integrate)


db.session.commit() db.session.commit()
logging.info("Account %s linked %s account %s.", account.id, provider, open_id)
logger.info("Account %s linked %s account %s.", account.id, provider, open_id)
except Exception as e: except Exception as e:
logging.exception("Failed to link %s account %s to Account %s", provider, open_id, account.id)
logger.exception("Failed to link %s account %s to Account %s", provider, open_id, account.id)
raise LinkAccountIntegrateError("Failed to link account.") from e raise LinkAccountIntegrateError("Failed to link account.") from e


@staticmethod @staticmethod
"""Create tenant member""" """Create tenant member"""
if role == TenantAccountRole.OWNER.value: if role == TenantAccountRole.OWNER.value:
if TenantService.has_roles(tenant, [TenantAccountRole.OWNER]): if TenantService.has_roles(tenant, [TenantAccountRole.OWNER]):
logging.error("Tenant %s has already an owner.", tenant.id)
logger.error("Tenant %s has already an owner.", tenant.id)
raise Exception("Tenant already has an owner.") raise Exception("Tenant already has an owner.")


ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first()
db.session.query(Tenant).delete() db.session.query(Tenant).delete()
db.session.commit() db.session.commit()


logging.exception("Setup account failed, email: %s, name: %s", email, name)
logger.exception("Setup account failed, email: %s, name: %s", email, name)
raise ValueError(f"Setup failed: {e}") raise ValueError(f"Setup failed: {e}")


@classmethod @classmethod
db.session.commit() db.session.commit()
except WorkSpaceNotAllowedCreateError: except WorkSpaceNotAllowedCreateError:
db.session.rollback() db.session.rollback()
logging.exception("Register failed")
logger.exception("Register failed")
raise AccountRegisterError("Workspace is not allowed to create.") raise AccountRegisterError("Workspace is not allowed to create.")
except AccountRegisterError as are: except AccountRegisterError as are:
db.session.rollback() db.session.rollback()
logging.exception("Register failed")
logger.exception("Register failed")
raise are raise are
except Exception as e: except Exception as e:
db.session.rollback() db.session.rollback()
logging.exception("Register failed")
logger.exception("Register failed")
raise AccountRegisterError(f"Registration failed: {e}") from e raise AccountRegisterError(f"Registration failed: {e}") from e


return account return account

+ 3
- 1
api/services/app_service.py Visa fil

from services.tag_service import TagService from services.tag_service import TagService
from tasks.remove_app_and_related_data_task import remove_app_and_related_data_task from tasks.remove_app_and_related_data_task import remove_app_and_related_data_task


logger = logging.getLogger(__name__)



class AppService: class AppService:
def get_paginate_apps(self, user_id: str, tenant_id: str, args: dict) -> Pagination | None: def get_paginate_apps(self, user_id: str, tenant_id: str, args: dict) -> Pagination | None:
except (ProviderTokenNotInitError, LLMBadRequestError): except (ProviderTokenNotInitError, LLMBadRequestError):
model_instance = None model_instance = None
except Exception as e: except Exception as e:
logging.exception("Get default model instance failed, tenant_id: %s", tenant_id)
logger.exception("Get default model instance failed, tenant_id: %s", tenant_id)
model_instance = None model_instance = None


if model_instance: if model_instance:

+ 16
- 14
api/services/dataset_service.py Visa fil

from tasks.retry_document_indexing_task import retry_document_indexing_task from tasks.retry_document_indexing_task import retry_document_indexing_task
from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task


logger = logging.getLogger(__name__)



class DatasetService: class DatasetService:
@staticmethod @staticmethod
) )
except ProviderTokenNotInitError: except ProviderTokenNotInitError:
# If we can't get the embedding model, preserve existing settings # If we can't get the embedding model, preserve existing settings
logging.warning(
logger.warning(
"Failed to initialize embedding model %s/%s, preserving existing settings", "Failed to initialize embedding model %s/%s, preserving existing settings",
data["embedding_model_provider"], data["embedding_model_provider"],
data["embedding_model"], data["embedding_model"],
@staticmethod @staticmethod
def check_dataset_permission(dataset, user): def check_dataset_permission(dataset, user):
if dataset.tenant_id != user.current_tenant_id: if dataset.tenant_id != user.current_tenant_id:
logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
logger.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
raise NoPermissionError("You do not have permission to access this dataset.") raise NoPermissionError("You do not have permission to access this dataset.")
if user.current_role != TenantAccountRole.OWNER: if user.current_role != TenantAccountRole.OWNER:
if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id: if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id:
logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
logger.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
raise NoPermissionError("You do not have permission to access this dataset.") raise NoPermissionError("You do not have permission to access this dataset.")
if dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: if dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM:
# For partial team permission, user needs explicit permission or be the creator # For partial team permission, user needs explicit permission or be the creator
db.session.query(DatasetPermission).filter_by(dataset_id=dataset.id, account_id=user.id).first() db.session.query(DatasetPermission).filter_by(dataset_id=dataset.id, account_id=user.id).first()
) )
if not user_permission: if not user_permission:
logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
logger.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
raise NoPermissionError("You do not have permission to access this dataset.") raise NoPermissionError("You do not have permission to access this dataset.")


@staticmethod @staticmethod
created_by=account.id, created_by=account.id,
) )
else: else:
logging.warning(
logger.warning(
"Invalid process rule mode: %s, can not find dataset process rule", "Invalid process rule mode: %s, can not find dataset process rule",
process_rule.mode, process_rule.mode,
) )
task_func.delay(*task_args) task_func.delay(*task_args)
except Exception as e: except Exception as e:
# Log the error but do not rollback the transaction # Log the error but do not rollback the transaction
logging.exception("Error executing async task for document %s", update_info["document"].id)
logger.exception("Error executing async task for document %s", update_info["document"].id)
# don't raise the error immediately, but capture it for later # don't raise the error immediately, but capture it for later
propagation_error = e propagation_error = e
try: try:
redis_client.setex(indexing_cache_key, 600, 1) redis_client.setex(indexing_cache_key, 600, 1)
except Exception as e: except Exception as e:
# Log the error but do not rollback the transaction # Log the error but do not rollback the transaction
logging.exception("Error setting cache for document %s", update_info["document"].id)
logger.exception("Error setting cache for document %s", update_info["document"].id)
# Raise any propagation error after all updates # Raise any propagation error after all updates
if propagation_error: if propagation_error:
raise propagation_error raise propagation_error
try: try:
VectorService.create_segments_vector([args["keywords"]], [segment_document], dataset, document.doc_form) VectorService.create_segments_vector([args["keywords"]], [segment_document], dataset, document.doc_form)
except Exception as e: except Exception as e:
logging.exception("create segment index failed")
logger.exception("create segment index failed")
segment_document.enabled = False segment_document.enabled = False
segment_document.disabled_at = naive_utc_now() segment_document.disabled_at = naive_utc_now()
segment_document.status = "error" segment_document.status = "error"
# save vector index # save vector index
VectorService.create_segments_vector(keywords_list, pre_segment_data_list, dataset, document.doc_form) VectorService.create_segments_vector(keywords_list, pre_segment_data_list, dataset, document.doc_form)
except Exception as e: except Exception as e:
logging.exception("create segment index failed")
logger.exception("create segment index failed")
for segment_document in segment_data_list: for segment_document in segment_data_list:
segment_document.enabled = False segment_document.enabled = False
segment_document.disabled_at = naive_utc_now() segment_document.disabled_at = naive_utc_now()
VectorService.update_segment_vector(args.keywords, segment, dataset) VectorService.update_segment_vector(args.keywords, segment, dataset)


except Exception as e: except Exception as e:
logging.exception("update segment index failed")
logger.exception("update segment index failed")
segment.enabled = False segment.enabled = False
segment.disabled_at = naive_utc_now() segment.disabled_at = naive_utc_now()
segment.status = "error" segment.status = "error"
try: try:
VectorService.create_child_chunk_vector(child_chunk, dataset) VectorService.create_child_chunk_vector(child_chunk, dataset)
except Exception as e: except Exception as e:
logging.exception("create child chunk index failed")
logger.exception("create child chunk index failed")
db.session.rollback() db.session.rollback()
raise ChildChunkIndexingError(str(e)) raise ChildChunkIndexingError(str(e))
db.session.commit() db.session.commit()
VectorService.update_child_chunk_vector(new_child_chunks, update_child_chunks, delete_child_chunks, dataset) VectorService.update_child_chunk_vector(new_child_chunks, update_child_chunks, delete_child_chunks, dataset)
db.session.commit() db.session.commit()
except Exception as e: except Exception as e:
logging.exception("update child chunk index failed")
logger.exception("update child chunk index failed")
db.session.rollback() db.session.rollback()
raise ChildChunkIndexingError(str(e)) raise ChildChunkIndexingError(str(e))
return sorted(new_child_chunks + update_child_chunks, key=lambda x: x.position) return sorted(new_child_chunks + update_child_chunks, key=lambda x: x.position)
VectorService.update_child_chunk_vector([], [child_chunk], [], dataset) VectorService.update_child_chunk_vector([], [child_chunk], [], dataset)
db.session.commit() db.session.commit()
except Exception as e: except Exception as e:
logging.exception("update child chunk index failed")
logger.exception("update child chunk index failed")
db.session.rollback() db.session.rollback()
raise ChildChunkIndexingError(str(e)) raise ChildChunkIndexingError(str(e))
return child_chunk return child_chunk
try: try:
VectorService.delete_child_chunk_vector(child_chunk, dataset) VectorService.delete_child_chunk_vector(child_chunk, dataset)
except Exception as e: except Exception as e:
logging.exception("delete child chunk index failed")
logger.exception("delete child chunk index failed")
db.session.rollback() db.session.rollback()
raise ChildChunkDeleteIndexError(str(e)) raise ChildChunkDeleteIndexError(str(e))
db.session.commit() db.session.commit()

+ 4
- 2
api/services/hit_testing_service.py Visa fil

from models.account import Account from models.account import Account
from models.dataset import Dataset, DatasetQuery from models.dataset import Dataset, DatasetQuery


logger = logging.getLogger(__name__)

default_retrieval_model = { default_retrieval_model = {
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value, "search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
"reranking_enable": False, "reranking_enable": False,
) )


end = time.perf_counter() end = time.perf_counter()
logging.debug("Hit testing retrieve in %s seconds", end - start)
logger.debug("Hit testing retrieve in %s seconds", end - start)


dataset_query = DatasetQuery( dataset_query = DatasetQuery(
dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id
) )


end = time.perf_counter() end = time.perf_counter()
logging.debug("External knowledge hit testing retrieve in %s seconds", end - start)
logger.debug("External knowledge hit testing retrieve in %s seconds", end - start)


dataset_query = DatasetQuery( dataset_query = DatasetQuery(
dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id

+ 7
- 5
api/services/metadata_service.py Visa fil

MetadataOperationData, MetadataOperationData,
) )


logger = logging.getLogger(__name__)



class MetadataService: class MetadataService:
@staticmethod @staticmethod
db.session.commit() db.session.commit()
return metadata # type: ignore return metadata # type: ignore
except Exception: except Exception:
logging.exception("Update metadata name failed")
logger.exception("Update metadata name failed")
finally: finally:
redis_client.delete(lock_key) redis_client.delete(lock_key)


db.session.commit() db.session.commit()
return metadata return metadata
except Exception: except Exception:
logging.exception("Delete metadata failed")
logger.exception("Delete metadata failed")
finally: finally:
redis_client.delete(lock_key) redis_client.delete(lock_key)


dataset.built_in_field_enabled = True dataset.built_in_field_enabled = True
db.session.commit() db.session.commit()
except Exception: except Exception:
logging.exception("Enable built-in field failed")
logger.exception("Enable built-in field failed")
finally: finally:
redis_client.delete(lock_key) redis_client.delete(lock_key)


dataset.built_in_field_enabled = False dataset.built_in_field_enabled = False
db.session.commit() db.session.commit()
except Exception: except Exception:
logging.exception("Disable built-in field failed")
logger.exception("Disable built-in field failed")
finally: finally:
redis_client.delete(lock_key) redis_client.delete(lock_key)


db.session.add(dataset_metadata_binding) db.session.add(dataset_metadata_binding)
db.session.commit() db.session.commit()
except Exception: except Exception:
logging.exception("Update documents metadata failed")
logger.exception("Update documents metadata failed")
finally: finally:
redis_client.delete(lock_key) redis_client.delete(lock_key)



+ 2
- 2
api/services/vector_service.py Visa fil

from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument
from services.entities.knowledge_entities.knowledge_entities import ParentMode from services.entities.knowledge_entities.knowledge_entities import ParentMode


_logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)




class VectorService: class VectorService:
if doc_form == IndexType.PARENT_CHILD_INDEX: if doc_form == IndexType.PARENT_CHILD_INDEX:
dataset_document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first() dataset_document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first()
if not dataset_document: if not dataset_document:
_logger.warning(
logger.warning(
"Expected DatasetDocument record to exist, but none was found, document_id=%s, segment_id=%s", "Expected DatasetDocument record to exist, but none was found, document_id=%s, segment_id=%s",
segment.document_id, segment.document_id,
segment.id, segment.id,

+ 6
- 6
api/services/workflow_draft_variable_service.py Visa fil

from models.workflow import Workflow, WorkflowDraftVariable, is_system_variable_editable from models.workflow import Workflow, WorkflowDraftVariable, is_system_variable_editable
from repositories.factory import DifyAPIRepositoryFactory from repositories.factory import DifyAPIRepositoryFactory


_logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)




@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
if conv_var is None: if conv_var is None:
self._session.delete(instance=variable) self._session.delete(instance=variable)
self._session.flush() self._session.flush()
_logger.warning(
logger.warning(
"Conversation variable not found for draft variable, id=%s, name=%s", variable.id, variable.name "Conversation variable not found for draft variable, id=%s, name=%s", variable.id, variable.name
) )
return None return None
if variable.node_execution_id is None: if variable.node_execution_id is None:
self._session.delete(instance=variable) self._session.delete(instance=variable)
self._session.flush() self._session.flush()
_logger.warning("draft variable has no node_execution_id, id=%s, name=%s", variable.id, variable.name)
logger.warning("draft variable has no node_execution_id, id=%s, name=%s", variable.id, variable.name)
return None return None


node_exec = self._api_node_execution_repo.get_execution_by_id(variable.node_execution_id) node_exec = self._api_node_execution_repo.get_execution_by_id(variable.node_execution_id)
if node_exec is None: if node_exec is None:
_logger.warning(
logger.warning(
"Node exectution not found for draft variable, id=%s, name=%s, node_execution_id=%s", "Node exectution not found for draft variable, id=%s, name=%s, node_execution_id=%s",
variable.id, variable.id,
variable.name, variable.name,
return None return None
segment = draft_var.get_value() segment = draft_var.get_value()
if not isinstance(segment, StringSegment): if not isinstance(segment, StringSegment):
_logger.warning(
logger.warning(
"sys.conversation_id variable is not a string: app_id=%s, id=%s", "sys.conversation_id variable is not a string: app_id=%s, id=%s",
app_id, app_id,
draft_var.id, draft_var.id,
draft_vars = [] draft_vars = []
for name, value in output.items(): for name, value in output.items():
if not self._should_variable_be_saved(name): if not self._should_variable_be_saved(name):
_logger.debug(
logger.debug(
"Skip saving variable as it has been excluded by its node_type, name=%s, node_type=%s", "Skip saving variable as it has been excluded by its node_type, name=%s, node_type=%s",
name, name,
self._node_type, self._node_type,

+ 6
- 4
api/tasks/add_document_to_index_task.py Visa fil

from models.dataset import DatasetAutoDisableLog, DocumentSegment from models.dataset import DatasetAutoDisableLog, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def add_document_to_index_task(dataset_document_id: str): def add_document_to_index_task(dataset_document_id: str):


Usage: add_document_to_index_task.delay(dataset_document_id) Usage: add_document_to_index_task.delay(dataset_document_id)
""" """
logging.info(click.style(f"Start add document to index: {dataset_document_id}", fg="green"))
logger.info(click.style(f"Start add document to index: {dataset_document_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == dataset_document_id).first() dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == dataset_document_id).first()
if not dataset_document: if not dataset_document:
logging.info(click.style(f"Document not found: {dataset_document_id}", fg="red"))
logger.info(click.style(f"Document not found: {dataset_document_id}", fg="red"))
db.session.close() db.session.close()
return return


db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green") click.style(f"Document added to index: {dataset_document.id} latency: {end_at - start_at}", fg="green")
) )
except Exception as e: except Exception as e:
logging.exception("add document to index failed")
logger.exception("add document to index failed")
dataset_document.enabled = False dataset_document.enabled = False
dataset_document.disabled_at = naive_utc_now() dataset_document.disabled_at = naive_utc_now()
dataset_document.indexing_status = "error" dataset_document.indexing_status = "error"

+ 5
- 3
api/tasks/annotation/add_annotation_to_index_task.py Visa fil

from models.dataset import Dataset from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def add_annotation_to_index_task( def add_annotation_to_index_task(


Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
""" """
logging.info(click.style(f"Start build index for annotation: {annotation_id}", fg="green"))
logger.info(click.style(f"Start build index for annotation: {annotation_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
vector.create([document], duplicate_check=True) vector.create([document], duplicate_check=True)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}", f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("Build index for annotation failed")
logger.exception("Build index for annotation failed")
finally: finally:
db.session.close() db.session.close()

+ 5
- 3
api/tasks/annotation/batch_import_annotations_task.py Visa fil

from models.model import App, AppAnnotationSetting, MessageAnnotation from models.model import App, AppAnnotationSetting, MessageAnnotation
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: str, tenant_id: str, user_id: str): def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id: str, tenant_id: str, user_id: str):
:param user_id: user_id :param user_id: user_id


""" """
logging.info(click.style(f"Start batch import annotation: {job_id}", fg="green"))
logger.info(click.style(f"Start batch import annotation: {job_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()
indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}" indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
# get app info # get app info
db.session.commit() db.session.commit()
redis_client.setex(indexing_cache_key, 600, "completed") redis_client.setex(indexing_cache_key, 600, "completed")
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
"Build index successful for batch import annotation: {} latency: {}".format( "Build index successful for batch import annotation: {} latency: {}".format(
job_id, end_at - start_at job_id, end_at - start_at
redis_client.setex(indexing_cache_key, 600, "error") redis_client.setex(indexing_cache_key, 600, "error")
indexing_error_msg_key = f"app_annotation_batch_import_error_msg_{str(job_id)}" indexing_error_msg_key = f"app_annotation_batch_import_error_msg_{str(job_id)}"
redis_client.setex(indexing_error_msg_key, 600, str(e)) redis_client.setex(indexing_error_msg_key, 600, str(e))
logging.exception("Build index for batch import annotations failed")
logger.exception("Build index for batch import annotations failed")
finally: finally:
db.session.close() db.session.close()

+ 6
- 4
api/tasks/annotation/delete_annotation_index_task.py Visa fil

from models.dataset import Dataset from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str, collection_binding_id: str): def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str, collection_binding_id: str):
""" """
Async delete annotation index task Async delete annotation index task
""" """
logging.info(click.style(f"Start delete app annotation index: {app_id}", fg="green"))
logger.info(click.style(f"Start delete app annotation index: {app_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()
try: try:
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type( dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding_by_id_and_type(
vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"]) vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
vector.delete_by_metadata_field("annotation_id", annotation_id) vector.delete_by_metadata_field("annotation_id", annotation_id)
except Exception: except Exception:
logging.exception("Delete annotation index failed when annotation deleted.")
logger.exception("Delete annotation index failed when annotation deleted.")
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception("Annotation deleted index failed")
logger.exception("Annotation deleted index failed")
finally: finally:
db.session.close() db.session.close()

+ 8
- 6
api/tasks/annotation/disable_annotation_reply_task.py Visa fil

from models.dataset import Dataset from models.dataset import Dataset
from models.model import App, AppAnnotationSetting, MessageAnnotation from models.model import App, AppAnnotationSetting, MessageAnnotation


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str): def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str):
""" """
Async enable annotation reply task Async enable annotation reply task
""" """
logging.info(click.style(f"Start delete app annotations index: {app_id}", fg="green"))
logger.info(click.style(f"Start delete app annotations index: {app_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()
# get app info # get app info
app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
annotations_count = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).count() annotations_count = db.session.query(MessageAnnotation).where(MessageAnnotation.app_id == app_id).count()
if not app: if not app:
logging.info(click.style(f"App not found: {app_id}", fg="red"))
logger.info(click.style(f"App not found: {app_id}", fg="red"))
db.session.close() db.session.close()
return return


app_annotation_setting = db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first() app_annotation_setting = db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app_id).first()


if not app_annotation_setting: if not app_annotation_setting:
logging.info(click.style(f"App annotation setting not found: {app_id}", fg="red"))
logger.info(click.style(f"App annotation setting not found: {app_id}", fg="red"))
db.session.close() db.session.close()
return return


vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"]) vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
vector.delete() vector.delete()
except Exception: except Exception:
logging.exception("Delete annotation index failed when annotation deleted.")
logger.exception("Delete annotation index failed when annotation deleted.")
redis_client.setex(disable_app_annotation_job_key, 600, "completed") redis_client.setex(disable_app_annotation_job_key, 600, "completed")


# delete annotation setting # delete annotation setting
db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"App annotations index deleted : {app_id} latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception("Annotation batch deleted index failed")
logger.exception("Annotation batch deleted index failed")
redis_client.setex(disable_app_annotation_job_key, 600, "error") redis_client.setex(disable_app_annotation_job_key, 600, "error")
disable_app_annotation_error_key = f"disable_app_annotation_error_{str(job_id)}" disable_app_annotation_error_key = f"disable_app_annotation_error_{str(job_id)}"
redis_client.setex(disable_app_annotation_error_key, 600, str(e)) redis_client.setex(disable_app_annotation_error_key, 600, str(e))

+ 8
- 6
api/tasks/annotation/enable_annotation_reply_task.py Visa fil

from models.model import App, AppAnnotationSetting, MessageAnnotation from models.model import App, AppAnnotationSetting, MessageAnnotation
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def enable_annotation_reply_task( def enable_annotation_reply_task(
""" """
Async enable annotation reply task Async enable annotation reply task
""" """
logging.info(click.style(f"Start add app annotation to index: {app_id}", fg="green"))
logger.info(click.style(f"Start add app annotation to index: {app_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()
# get app info # get app info
app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first() app = db.session.query(App).where(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()


if not app: if not app:
logging.info(click.style(f"App not found: {app_id}", fg="red"))
logger.info(click.style(f"App not found: {app_id}", fg="red"))
db.session.close() db.session.close()
return return


try: try:
old_vector.delete() old_vector.delete()
except Exception as e: except Exception as e:
logging.info(click.style(f"Delete annotation index error: {str(e)}", fg="red"))
logger.info(click.style(f"Delete annotation index error: {str(e)}", fg="red"))
annotation_setting.score_threshold = score_threshold annotation_setting.score_threshold = score_threshold
annotation_setting.collection_binding_id = dataset_collection_binding.id annotation_setting.collection_binding_id = dataset_collection_binding.id
annotation_setting.updated_user_id = user_id annotation_setting.updated_user_id = user_id
try: try:
vector.delete_by_metadata_field("app_id", app_id) vector.delete_by_metadata_field("app_id", app_id)
except Exception as e: except Exception as e:
logging.info(click.style(f"Delete annotation index error: {str(e)}", fg="red"))
logger.info(click.style(f"Delete annotation index error: {str(e)}", fg="red"))
vector.create(documents) vector.create(documents)
db.session.commit() db.session.commit()
redis_client.setex(enable_app_annotation_job_key, 600, "completed") redis_client.setex(enable_app_annotation_job_key, 600, "completed")
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"App annotations added to index: {app_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"App annotations added to index: {app_id} latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception("Annotation batch created index failed")
logger.exception("Annotation batch created index failed")
redis_client.setex(enable_app_annotation_job_key, 600, "error") redis_client.setex(enable_app_annotation_job_key, 600, "error")
enable_app_annotation_error_key = f"enable_app_annotation_error_{str(job_id)}" enable_app_annotation_error_key = f"enable_app_annotation_error_{str(job_id)}"
redis_client.setex(enable_app_annotation_error_key, 600, str(e)) redis_client.setex(enable_app_annotation_error_key, 600, str(e))

+ 5
- 3
api/tasks/annotation/update_annotation_to_index_task.py Visa fil

from models.dataset import Dataset from models.dataset import Dataset
from services.dataset_service import DatasetCollectionBindingService from services.dataset_service import DatasetCollectionBindingService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def update_annotation_to_index_task( def update_annotation_to_index_task(


Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
""" """
logging.info(click.style(f"Start update index for annotation: {annotation_id}", fg="green"))
logger.info(click.style(f"Start update index for annotation: {annotation_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
vector.delete_by_metadata_field("annotation_id", annotation_id) vector.delete_by_metadata_field("annotation_id", annotation_id)
vector.add_texts([document]) vector.add_texts([document])
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}", f"Build index successful for annotation: {annotation_id} latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("Build index for annotation failed")
logger.exception("Build index for annotation failed")
finally: finally:
db.session.close() db.session.close()

+ 7
- 5
api/tasks/batch_clean_document_task.py Visa fil

from models.dataset import Dataset, DocumentSegment from models.dataset import Dataset, DocumentSegment
from models.model import UploadFile from models.model import UploadFile


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form: str, file_ids: list[str]): def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form: str, file_ids: list[str]):


Usage: batch_clean_document_task.delay(document_ids, dataset_id) Usage: batch_clean_document_task.delay(document_ids, dataset_id)
""" """
logging.info(click.style("Start batch clean documents when documents deleted", fg="green"))
logger.info(click.style("Start batch clean documents when documents deleted", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
if image_file and image_file.key: if image_file and image_file.key:
storage.delete(image_file.key) storage.delete(image_file.key)
except Exception: except Exception:
logging.exception(
logger.exception(
"Delete image_files failed when storage deleted, \ "Delete image_files failed when storage deleted, \
image_upload_file_is: %s", image_upload_file_is: %s",
upload_file_id, upload_file_id,
try: try:
storage.delete(file.key) storage.delete(file.key)
except Exception: except Exception:
logging.exception("Delete file failed when document deleted, file_id: %s", file.id)
logger.exception("Delete file failed when document deleted, file_id: %s", file.id)
db.session.delete(file) db.session.delete(file)
db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Cleaned documents when documents deleted latency: {end_at - start_at}", f"Cleaned documents when documents deleted latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("Cleaned documents when documents deleted failed")
logger.exception("Cleaned documents when documents deleted failed")
finally: finally:
db.session.close() db.session.close()

+ 5
- 3
api/tasks/batch_create_segment_to_index_task.py Visa fil

from models.model import UploadFile from models.model import UploadFile
from services.vector_service import VectorService from services.vector_service import VectorService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def batch_create_segment_to_index_task( def batch_create_segment_to_index_task(


Usage: batch_create_segment_to_index_task.delay(job_id, upload_file_id, dataset_id, document_id, tenant_id, user_id) Usage: batch_create_segment_to_index_task.delay(job_id, upload_file_id, dataset_id, document_id, tenant_id, user_id)
""" """
logging.info(click.style(f"Start batch create segment jobId: {job_id}", fg="green"))
logger.info(click.style(f"Start batch create segment jobId: {job_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


indexing_cache_key = f"segment_batch_import_{job_id}" indexing_cache_key = f"segment_batch_import_{job_id}"
db.session.commit() db.session.commit()
redis_client.setex(indexing_cache_key, 600, "completed") redis_client.setex(indexing_cache_key, 600, "completed")
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Segment batch created job: {job_id} latency: {end_at - start_at}", f"Segment batch created job: {job_id} latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("Segments batch created index failed")
logger.exception("Segments batch created index failed")
redis_client.setex(indexing_cache_key, 600, "error") redis_client.setex(indexing_cache_key, 600, "error")
finally: finally:
db.session.close() db.session.close()

+ 14
- 12
api/tasks/clean_dataset_task.py Visa fil

) )
from models.model import UploadFile from models.model import UploadFile


logger = logging.getLogger(__name__)



# Add import statement for ValueError # Add import statement for ValueError
@shared_task(queue="dataset") @shared_task(queue="dataset")


Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct) Usage: clean_dataset_task.delay(dataset_id, tenant_id, indexing_technique, index_struct)
""" """
logging.info(click.style(f"Start clean dataset when dataset deleted: {dataset_id}", fg="green"))
logger.info(click.style(f"Start clean dataset when dataset deleted: {dataset_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.constant.index_type import IndexType


doc_form = IndexType.PARAGRAPH_INDEX doc_form = IndexType.PARAGRAPH_INDEX
logging.info(
logger.info(
click.style(f"Invalid doc_form detected, using default index type for cleanup: {doc_form}", fg="yellow") click.style(f"Invalid doc_form detected, using default index type for cleanup: {doc_form}", fg="yellow")
) )


try: try:
index_processor = IndexProcessorFactory(doc_form).init_index_processor() index_processor = IndexProcessorFactory(doc_form).init_index_processor()
index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True) index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True)
logging.info(click.style(f"Successfully cleaned vector database for dataset: {dataset_id}", fg="green"))
logger.info(click.style(f"Successfully cleaned vector database for dataset: {dataset_id}", fg="green"))
except Exception as index_cleanup_error: except Exception as index_cleanup_error:
logging.exception(click.style(f"Failed to clean vector database for dataset {dataset_id}", fg="red"))
logger.exception(click.style(f"Failed to clean vector database for dataset {dataset_id}", fg="red"))
# Continue with document and segment deletion even if vector cleanup fails # Continue with document and segment deletion even if vector cleanup fails
logging.info(
logger.info(
click.style(f"Continuing with document and segment deletion for dataset: {dataset_id}", fg="yellow") click.style(f"Continuing with document and segment deletion for dataset: {dataset_id}", fg="yellow")
) )


if documents is None or len(documents) == 0: if documents is None or len(documents) == 0:
logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green"))
logger.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green"))
else: else:
logging.info(click.style(f"Cleaning documents for dataset: {dataset_id}", fg="green"))
logger.info(click.style(f"Cleaning documents for dataset: {dataset_id}", fg="green"))


for document in documents: for document in documents:
db.session.delete(document) db.session.delete(document)
try: try:
storage.delete(image_file.key) storage.delete(image_file.key)
except Exception: except Exception:
logging.exception(
logger.exception(
"Delete image_files failed when storage deleted, \ "Delete image_files failed when storage deleted, \
image_upload_file_is: %s", image_upload_file_is: %s",
upload_file_id, upload_file_id,


db.session.commit() db.session.commit()
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green") click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green")
) )
except Exception: except Exception:
# This ensures the database session is properly cleaned up # This ensures the database session is properly cleaned up
try: try:
db.session.rollback() db.session.rollback()
logging.info(click.style(f"Rolled back database session for dataset: {dataset_id}", fg="yellow"))
logger.info(click.style(f"Rolled back database session for dataset: {dataset_id}", fg="yellow"))
except Exception as rollback_error: except Exception as rollback_error:
logging.exception("Failed to rollback database session")
logger.exception("Failed to rollback database session")


logging.exception("Cleaned dataset when dataset deleted failed")
logger.exception("Cleaned dataset when dataset deleted failed")
finally: finally:
db.session.close() db.session.close()

+ 7
- 5
api/tasks/clean_document_task.py Visa fil

from models.dataset import Dataset, DatasetMetadataBinding, DocumentSegment from models.dataset import Dataset, DatasetMetadataBinding, DocumentSegment
from models.model import UploadFile from models.model import UploadFile


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_id: Optional[str]): def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_id: Optional[str]):


Usage: clean_document_task.delay(document_id, dataset_id) Usage: clean_document_task.delay(document_id, dataset_id)
""" """
logging.info(click.style(f"Start clean document when document deleted: {document_id}", fg="green"))
logger.info(click.style(f"Start clean document when document deleted: {document_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
try: try:
storage.delete(image_file.key) storage.delete(image_file.key)
except Exception: except Exception:
logging.exception(
logger.exception(
"Delete image_files failed when storage deleted, \ "Delete image_files failed when storage deleted, \
image_upload_file_is: %s", image_upload_file_is: %s",
upload_file_id, upload_file_id,
try: try:
storage.delete(file.key) storage.delete(file.key)
except Exception: except Exception:
logging.exception("Delete file failed when document deleted, file_id: %s", file_id)
logger.exception("Delete file failed when document deleted, file_id: %s", file_id)
db.session.delete(file) db.session.delete(file)
db.session.commit() db.session.commit()


db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Cleaned document when document deleted: {document_id} latency: {end_at - start_at}", f"Cleaned document when document deleted: {document_id} latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("Cleaned document when document deleted failed")
logger.exception("Cleaned document when document deleted failed")
finally: finally:
db.session.close() db.session.close()

+ 5
- 5
api/tasks/clean_notion_document_task.py Visa fil

from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def clean_notion_document_task(document_ids: list[str], dataset_id: str): def clean_notion_document_task(document_ids: list[str], dataset_id: str):


Usage: clean_notion_document_task.delay(document_ids, dataset_id) Usage: clean_notion_document_task.delay(document_ids, dataset_id)
""" """
logging.info(
click.style(f"Start clean document when import form notion document deleted: {dataset_id}", fg="green")
)
logger.info(click.style(f"Start clean document when import form notion document deleted: {dataset_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
db.session.delete(segment) db.session.delete(segment)
db.session.commit() db.session.commit()
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
"Clean document when import form notion document deleted end :: {} latency: {}".format( "Clean document when import form notion document deleted end :: {} latency: {}".format(
dataset_id, end_at - start_at dataset_id, end_at - start_at
) )
) )
except Exception: except Exception:
logging.exception("Cleaned document when import form notion document deleted failed")
logger.exception("Cleaned document when import form notion document deleted failed")
finally: finally:
db.session.close() db.session.close()

+ 9
- 7
api/tasks/create_segment_to_index_task.py Visa fil

from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models.dataset import DocumentSegment from models.dataset import DocumentSegment


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] = None): def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] = None):
:param keywords: :param keywords:
Usage: create_segment_to_index_task.delay(segment_id) Usage: create_segment_to_index_task.delay(segment_id)
""" """
logging.info(click.style(f"Start create segment to index: {segment_id}", fg="green"))
logger.info(click.style(f"Start create segment to index: {segment_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first()
if not segment: if not segment:
logging.info(click.style(f"Segment not found: {segment_id}", fg="red"))
logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
db.session.close() db.session.close()
return return


dataset = segment.dataset dataset = segment.dataset


if not dataset: if not dataset:
logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
return return


dataset_document = segment.document dataset_document = segment.document


if not dataset_document: if not dataset_document:
logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
return return


if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
return return


index_type = dataset.doc_form index_type = dataset.doc_form
db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Segment created to index: {segment.id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Segment created to index: {segment.id} latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception("create segment to index failed")
logger.exception("create segment to index failed")
segment.enabled = False segment.enabled = False
segment.disabled_at = naive_utc_now() segment.disabled_at = naive_utc_now()
segment.status = "error" segment.status = "error"

+ 5
- 3
api/tasks/deal_dataset_vector_index_task.py Visa fil

from models.dataset import Dataset, DocumentSegment from models.dataset import Dataset, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def deal_dataset_vector_index_task(dataset_id: str, action: Literal["remove", "add", "update"]): def deal_dataset_vector_index_task(dataset_id: str, action: Literal["remove", "add", "update"]):
:param action: action :param action: action
Usage: deal_dataset_vector_index_task.delay(dataset_id, action) Usage: deal_dataset_vector_index_task.delay(dataset_id, action)
""" """
logging.info(click.style(f"Start deal dataset vector index: {dataset_id}", fg="green"))
logger.info(click.style(f"Start deal dataset vector index: {dataset_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
index_processor.clean(dataset, None, with_keywords=False, delete_child_chunks=False) index_processor.clean(dataset, None, with_keywords=False, delete_child_chunks=False)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Deal dataset vector index: {dataset_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Deal dataset vector index: {dataset_id} latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("Deal dataset vector index failed")
logger.exception("Deal dataset vector index failed")
finally: finally:
db.session.close() db.session.close()

+ 5
- 3
api/tasks/delete_conversation_task.py Visa fil

from models.tools import ToolConversationVariables, ToolFile from models.tools import ToolConversationVariables, ToolFile
from models.web import PinnedConversation from models.web import PinnedConversation


logger = logging.getLogger(__name__)



@shared_task(queue="conversation") @shared_task(queue="conversation")
def delete_conversation_related_data(conversation_id: str) -> None: def delete_conversation_related_data(conversation_id: str) -> None:
conversation_id: conversation Id conversation_id: conversation Id
""" """


logging.info(
logger.info(
click.style(f"Starting to delete conversation data from db for conversation_id {conversation_id}", fg="green") click.style(f"Starting to delete conversation data from db for conversation_id {conversation_id}", fg="green")
) )
start_at = time.perf_counter() start_at = time.perf_counter()
db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Succeeded cleaning data from db for conversation_id {conversation_id} latency: {end_at - start_at}", f"Succeeded cleaning data from db for conversation_id {conversation_id} latency: {end_at - start_at}",
fg="green", fg="green",
) )


except Exception as e: except Exception as e:
logging.exception("Failed to delete data from db for conversation_id: %s failed", conversation_id)
logger.exception("Failed to delete data from db for conversation_id: %s failed", conversation_id)
db.session.rollback() db.session.rollback()
raise e raise e
finally: finally:

+ 5
- 3
api/tasks/delete_segment_from_index_task.py Visa fil

from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Dataset, Document from models.dataset import Dataset, Document


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, document_id: str): def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, document_id: str):


Usage: delete_segment_from_index_task.delay(index_node_ids, dataset_id, document_id) Usage: delete_segment_from_index_task.delay(index_node_ids, dataset_id, document_id)
""" """
logging.info(click.style("Start delete segment from index", fg="green"))
logger.info(click.style("Start delete segment from index", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()
try: try:
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True) index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=True)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Segment deleted from index latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Segment deleted from index latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("delete segment from index failed")
logger.exception("delete segment from index failed")
finally: finally:
db.session.close() db.session.close()

+ 10
- 8
api/tasks/disable_segment_from_index_task.py Visa fil

from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import DocumentSegment from models.dataset import DocumentSegment


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def disable_segment_from_index_task(segment_id: str): def disable_segment_from_index_task(segment_id: str):


Usage: disable_segment_from_index_task.delay(segment_id) Usage: disable_segment_from_index_task.delay(segment_id)
""" """
logging.info(click.style(f"Start disable segment from index: {segment_id}", fg="green"))
logger.info(click.style(f"Start disable segment from index: {segment_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first()
if not segment: if not segment:
logging.info(click.style(f"Segment not found: {segment_id}", fg="red"))
logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
db.session.close() db.session.close()
return return


if segment.status != "completed": if segment.status != "completed":
logging.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red"))
logger.info(click.style(f"Segment is not completed, disable is not allowed: {segment_id}", fg="red"))
db.session.close() db.session.close()
return return


dataset = segment.dataset dataset = segment.dataset


if not dataset: if not dataset:
logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
return return


dataset_document = segment.document dataset_document = segment.document


if not dataset_document: if not dataset_document:
logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
return return


if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
return return


index_type = dataset_document.doc_form index_type = dataset_document.doc_form
index_processor.clean(dataset, [segment.index_node_id]) index_processor.clean(dataset, [segment.index_node_id])


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Segment removed from index: {segment.id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Segment removed from index: {segment.id} latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("remove segment from index failed")
logger.exception("remove segment from index failed")
segment.enabled = True segment.enabled = True
db.session.commit() db.session.commit()
finally: finally:

+ 6
- 4
api/tasks/disable_segments_from_index_task.py Visa fil

from models.dataset import Dataset, DocumentSegment from models.dataset import Dataset, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def disable_segments_from_index_task(segment_ids: list, dataset_id: str, document_id: str): def disable_segments_from_index_task(segment_ids: list, dataset_id: str, document_id: str):


dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
logging.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan"))
logger.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan"))
db.session.close() db.session.close()
return return


dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first() dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first()


if not dataset_document: if not dataset_document:
logging.info(click.style(f"Document {document_id} not found, pass.", fg="cyan"))
logger.info(click.style(f"Document {document_id} not found, pass.", fg="cyan"))
db.session.close() db.session.close()
return return
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan"))
logger.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan"))
db.session.close() db.session.close()
return return
# sync index processor # sync index processor
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Segments removed from index latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Segments removed from index latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
# update segment error msg # update segment error msg
db.session.query(DocumentSegment).where( db.session.query(DocumentSegment).where(

+ 9
- 7
api/tasks/document_indexing_sync_task.py Visa fil

from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from models.source import DataSourceOauthBinding from models.source import DataSourceOauthBinding


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def document_indexing_sync_task(dataset_id: str, document_id: str): def document_indexing_sync_task(dataset_id: str, document_id: str):


Usage: document_indexing_sync_task.delay(dataset_id, document_id) Usage: document_indexing_sync_task.delay(dataset_id, document_id)
""" """
logging.info(click.style(f"Start sync document: {document_id}", fg="green"))
logger.info(click.style(f"Start sync document: {document_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()


if not document: if not document:
logging.info(click.style(f"Document not found: {document_id}", fg="red"))
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
db.session.close() db.session.close()
return return


db.session.delete(segment) db.session.delete(segment)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
"Cleaned document when document update data source or process rule: {} latency: {}".format( "Cleaned document when document update data source or process rule: {} latency: {}".format(
document_id, end_at - start_at document_id, end_at - start_at
) )
) )
except Exception: except Exception:
logging.exception("Cleaned document when document update data source or process rule failed")
logger.exception("Cleaned document when document update data source or process rule failed")


try: try:
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
indexing_runner.run([document]) indexing_runner.run([document])
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green"))
except DocumentIsPausedError as ex: except DocumentIsPausedError as ex:
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
logging.exception("document_indexing_sync_task failed, document_id: %s", document_id)
logger.exception("document_indexing_sync_task failed, document_id: %s", document_id)
finally: finally:
db.session.close() db.session.close()

+ 7
- 5
api/tasks/document_indexing_task.py Visa fil

from models.dataset import Dataset, Document from models.dataset import Dataset, Document
from services.feature_service import FeatureService from services.feature_service import FeatureService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def document_indexing_task(dataset_id: str, document_ids: list): def document_indexing_task(dataset_id: str, document_ids: list):


dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
logging.info(click.style(f"Dataset is not found: {dataset_id}", fg="yellow"))
logger.info(click.style(f"Dataset is not found: {dataset_id}", fg="yellow"))
db.session.close() db.session.close()
return return
# check document limit # check document limit
return return


for document_id in document_ids: for document_id in document_ids:
logging.info(click.style(f"Start process document: {document_id}", fg="green"))
logger.info(click.style(f"Start process document: {document_id}", fg="green"))


document = ( document = (
db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
indexing_runner.run(documents) indexing_runner.run(documents)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
except DocumentIsPausedError as ex: except DocumentIsPausedError as ex:
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
logging.exception("Document indexing task failed, dataset_id: %s", dataset_id)
logger.exception("Document indexing task failed, dataset_id: %s", dataset_id)
finally: finally:
db.session.close() db.session.close()

+ 9
- 7
api/tasks/document_indexing_update_task.py Visa fil

from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def document_indexing_update_task(dataset_id: str, document_id: str): def document_indexing_update_task(dataset_id: str, document_id: str):


Usage: document_indexing_update_task.delay(dataset_id, document_id) Usage: document_indexing_update_task.delay(dataset_id, document_id)
""" """
logging.info(click.style(f"Start update document: {document_id}", fg="green"))
logger.info(click.style(f"Start update document: {document_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()


if not document: if not document:
logging.info(click.style(f"Document not found: {document_id}", fg="red"))
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
db.session.close() db.session.close()
return return


db.session.delete(segment) db.session.delete(segment)
db.session.commit() db.session.commit()
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
"Cleaned document when document update data source or process rule: {} latency: {}".format( "Cleaned document when document update data source or process rule: {} latency: {}".format(
document_id, end_at - start_at document_id, end_at - start_at
) )
) )
except Exception: except Exception:
logging.exception("Cleaned document when document update data source or process rule failed")
logger.exception("Cleaned document when document update data source or process rule failed")


try: try:
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
indexing_runner.run([document]) indexing_runner.run([document])
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"update document: {document.id} latency: {end_at - start_at}", fg="green"))
except DocumentIsPausedError as ex: except DocumentIsPausedError as ex:
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
logging.exception("document_indexing_update_task failed, document_id: %s", document_id)
logger.exception("document_indexing_update_task failed, document_id: %s", document_id)
finally: finally:
db.session.close() db.session.close()

+ 7
- 5
api/tasks/duplicate_document_indexing_task.py Visa fil

from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from services.feature_service import FeatureService from services.feature_service import FeatureService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def duplicate_document_indexing_task(dataset_id: str, document_ids: list): def duplicate_document_indexing_task(dataset_id: str, document_ids: list):


dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if dataset is None: if dataset is None:
logging.info(click.style(f"Dataset not found: {dataset_id}", fg="red"))
logger.info(click.style(f"Dataset not found: {dataset_id}", fg="red"))
db.session.close() db.session.close()
return return


db.session.close() db.session.close()


for document_id in document_ids: for document_id in document_ids:
logging.info(click.style(f"Start process document: {document_id}", fg="green"))
logger.info(click.style(f"Start process document: {document_id}", fg="green"))


document = ( document = (
db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
indexing_runner.run(documents) indexing_runner.run(documents)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Processed dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
except DocumentIsPausedError as ex: except DocumentIsPausedError as ex:
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
logging.exception("duplicate_document_indexing_task failed, dataset_id: %s", dataset_id)
logger.exception("duplicate_document_indexing_task failed, dataset_id: %s", dataset_id)
finally: finally:
db.session.close() db.session.close()

+ 10
- 8
api/tasks/enable_segment_to_index_task.py Visa fil

from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models.dataset import DocumentSegment from models.dataset import DocumentSegment


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def enable_segment_to_index_task(segment_id: str): def enable_segment_to_index_task(segment_id: str):


Usage: enable_segment_to_index_task.delay(segment_id) Usage: enable_segment_to_index_task.delay(segment_id)
""" """
logging.info(click.style(f"Start enable segment to index: {segment_id}", fg="green"))
logger.info(click.style(f"Start enable segment to index: {segment_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first() segment = db.session.query(DocumentSegment).where(DocumentSegment.id == segment_id).first()
if not segment: if not segment:
logging.info(click.style(f"Segment not found: {segment_id}", fg="red"))
logger.info(click.style(f"Segment not found: {segment_id}", fg="red"))
db.session.close() db.session.close()
return return


if segment.status != "completed": if segment.status != "completed":
logging.info(click.style(f"Segment is not completed, enable is not allowed: {segment_id}", fg="red"))
logger.info(click.style(f"Segment is not completed, enable is not allowed: {segment_id}", fg="red"))
db.session.close() db.session.close()
return return


dataset = segment.dataset dataset = segment.dataset


if not dataset: if not dataset:
logging.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} has no dataset, pass.", fg="cyan"))
return return


dataset_document = segment.document dataset_document = segment.document


if not dataset_document: if not dataset_document:
logging.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} has no document, pass.", fg="cyan"))
return return


if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
logger.info(click.style(f"Segment {segment.id} document status is invalid, pass.", fg="cyan"))
return return


index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor() index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
index_processor.load(dataset, [document]) index_processor.load(dataset, [document])


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Segment enabled to index: {segment.id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Segment enabled to index: {segment.id} latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception("enable segment to index failed")
logger.exception("enable segment to index failed")
segment.enabled = False segment.enabled = False
segment.disabled_at = naive_utc_now() segment.disabled_at = naive_utc_now()
segment.status = "error" segment.status = "error"

+ 8
- 6
api/tasks/enable_segments_to_index_task.py Visa fil

from models.dataset import Dataset, DocumentSegment from models.dataset import Dataset, DocumentSegment
from models.dataset import Document as DatasetDocument from models.dataset import Document as DatasetDocument


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_id: str): def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_id: str):
start_at = time.perf_counter() start_at = time.perf_counter()
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
logging.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan"))
logger.info(click.style(f"Dataset {dataset_id} not found, pass.", fg="cyan"))
return return


dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first() dataset_document = db.session.query(DatasetDocument).where(DatasetDocument.id == document_id).first()


if not dataset_document: if not dataset_document:
logging.info(click.style(f"Document {document_id} not found, pass.", fg="cyan"))
logger.info(click.style(f"Document {document_id} not found, pass.", fg="cyan"))
db.session.close() db.session.close()
return return
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed": if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
logging.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan"))
logger.info(click.style(f"Document {document_id} status is invalid, pass.", fg="cyan"))
db.session.close() db.session.close()
return return
# sync index processor # sync index processor
.all() .all()
) )
if not segments: if not segments:
logging.info(click.style(f"Segments not found: {segment_ids}", fg="cyan"))
logger.info(click.style(f"Segments not found: {segment_ids}", fg="cyan"))
db.session.close() db.session.close()
return return


index_processor.load(dataset, documents) index_processor.load(dataset, documents)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Segments enabled to index latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Segments enabled to index latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception("enable segments to index failed")
logger.exception("enable segments to index failed")
# update segment error msg # update segment error msg
db.session.query(DocumentSegment).where( db.session.query(DocumentSegment).where(
DocumentSegment.id.in_(segment_ids), DocumentSegment.id.in_(segment_ids),

+ 8
- 6
api/tasks/mail_account_deletion_task.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service from libs.email_i18n import EmailType, get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_deletion_success_task(to: str, language: str = "en-US") -> None: def send_deletion_success_task(to: str, language: str = "en-US") -> None:
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start send account deletion success email to {to}", fg="green"))
logger.info(click.style(f"Start send account deletion success email to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style(f"Send account deletion success email to {to}: latency: {end_at - start_at}", fg="green") click.style(f"Send account deletion success email to {to}: latency: {end_at - start_at}", fg="green")
) )
except Exception: except Exception:
logging.exception("Send account deletion success email to %s failed", to)
logger.exception("Send account deletion success email to %s failed", to)




@shared_task(queue="mail") @shared_task(queue="mail")
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start send account deletion verification code email to {to}", fg="green"))
logger.info(click.style(f"Start send account deletion verification code email to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
"Send account deletion verification code email to {} succeeded: latency: {}".format( "Send account deletion verification code email to {} succeeded: latency: {}".format(
to, end_at - start_at to, end_at - start_at
) )
) )
except Exception: except Exception:
logging.exception("Send account deletion verification code email to %s failed", to)
logger.exception("Send account deletion verification code email to %s failed", to)

+ 8
- 6
api/tasks/mail_change_mail_task.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service from libs.email_i18n import EmailType, get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_change_mail_task(language: str, to: str, code: str, phase: str) -> None: def send_change_mail_task(language: str, to: str, code: str, phase: str) -> None:
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start change email mail to {to}", fg="green"))
logger.info(click.style(f"Start change email mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Send change email mail to {to} succeeded: latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Send change email mail to {to} succeeded: latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("Send change email mail to %s failed", to)
logger.exception("Send change email mail to %s failed", to)




@shared_task(queue="mail") @shared_task(queue="mail")
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start change email completed notify mail to {to}", fg="green"))
logger.info(click.style(f"Start change email completed notify mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Send change email completed mail to {to} succeeded: latency: {end_at - start_at}", f"Send change email completed mail to {to} succeeded: latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("Send change email completed mail to %s failed", to)
logger.exception("Send change email completed mail to %s failed", to)

+ 5
- 3
api/tasks/mail_email_code_login.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service from libs.email_i18n import EmailType, get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_email_code_login_mail_task(language: str, to: str, code: str) -> None: def send_email_code_login_mail_task(language: str, to: str, code: str) -> None:
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start email code login mail to {to}", fg="green"))
logger.info(click.style(f"Start email code login mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style(f"Send email code login mail to {to} succeeded: latency: {end_at - start_at}", fg="green") click.style(f"Send email code login mail to {to} succeeded: latency: {end_at - start_at}", fg="green")
) )
except Exception: except Exception:
logging.exception("Send email code login mail to %s failed", to)
logger.exception("Send email code login mail to %s failed", to)

+ 5
- 3
api/tasks/mail_inner_task.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import get_email_i18n_service from libs.email_i18n import get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_inner_email_task(to: list[str], subject: str, body: str, substitutions: Mapping[str, str]): def send_inner_email_task(to: list[str], subject: str, body: str, substitutions: Mapping[str, str]):
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start enterprise mail to {to} with subject {subject}", fg="green"))
logger.info(click.style(f"Start enterprise mail to {to} with subject {subject}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
email_service.send_raw_email(to=to, subject=subject, html_content=html_content) email_service.send_raw_email(to=to, subject=subject, html_content=html_content)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Send enterprise mail to {to} succeeded: latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Send enterprise mail to {to} succeeded: latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("Send enterprise mail to %s failed", to)
logger.exception("Send enterprise mail to %s failed", to)

+ 5
- 5
api/tasks/mail_invite_member_task.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service from libs.email_i18n import EmailType, get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_invite_member_mail_task(language: str, to: str, token: str, inviter_name: str, workspace_name: str) -> None: def send_invite_member_mail_task(language: str, to: str, token: str, inviter_name: str, workspace_name: str) -> None:
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start send invite member mail to {to} in workspace {workspace_name}", fg="green"))
logger.info(click.style(f"Start send invite member mail to {to} in workspace {workspace_name}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
click.style(f"Send invite member mail to {to} succeeded: latency: {end_at - start_at}", fg="green")
)
logger.info(click.style(f"Send invite member mail to {to} succeeded: latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("Send invite member mail to %s failed", to)
logger.exception("Send invite member mail to %s failed", to)

+ 11
- 9
api/tasks/mail_owner_transfer_task.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service from libs.email_i18n import EmailType, get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_owner_transfer_confirm_task(language: str, to: str, code: str, workspace: str) -> None: def send_owner_transfer_confirm_task(language: str, to: str, code: str, workspace: str) -> None:
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start owner transfer confirm mail to {to}", fg="green"))
logger.info(click.style(f"Start owner transfer confirm mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Send owner transfer confirm mail to {to} succeeded: latency: {end_at - start_at}", f"Send owner transfer confirm mail to {to} succeeded: latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("owner transfer confirm email mail to %s failed", to)
logger.exception("owner transfer confirm email mail to %s failed", to)




@shared_task(queue="mail") @shared_task(queue="mail")
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start old owner transfer notify mail to {to}", fg="green"))
logger.info(click.style(f"Start old owner transfer notify mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Send old owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}", f"Send old owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("old owner transfer notify email mail to %s failed", to)
logger.exception("old owner transfer notify email mail to %s failed", to)




@shared_task(queue="mail") @shared_task(queue="mail")
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start new owner transfer notify mail to {to}", fg="green"))
logger.info(click.style(f"Start new owner transfer notify mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style( click.style(
f"Send new owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}", f"Send new owner transfer notify mail to {to} succeeded: latency: {end_at - start_at}",
fg="green", fg="green",
) )
) )
except Exception: except Exception:
logging.exception("new owner transfer notify email mail to %s failed", to)
logger.exception("new owner transfer notify email mail to %s failed", to)

+ 5
- 3
api/tasks/mail_reset_password_task.py Visa fil

from extensions.ext_mail import mail from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service from libs.email_i18n import EmailType, get_email_i18n_service


logger = logging.getLogger(__name__)



@shared_task(queue="mail") @shared_task(queue="mail")
def send_reset_password_mail_task(language: str, to: str, code: str) -> None: def send_reset_password_mail_task(language: str, to: str, code: str) -> None:
if not mail.is_inited(): if not mail.is_inited():
return return


logging.info(click.style(f"Start password reset mail to {to}", fg="green"))
logger.info(click.style(f"Start password reset mail to {to}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


try: try:
) )


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
logger.info(
click.style(f"Send password reset mail to {to} succeeded: latency: {end_at - start_at}", fg="green") click.style(f"Send password reset mail to {to} succeeded: latency: {end_at - start_at}", fg="green")
) )
except Exception: except Exception:
logging.exception("Send password reset mail to %s failed", to)
logger.exception("Send password reset mail to %s failed", to)

+ 5
- 3
api/tasks/ops_trace_task.py Visa fil

from models.model import Message from models.model import Message
from models.workflow import WorkflowRun from models.workflow import WorkflowRun


logger = logging.getLogger(__name__)



@shared_task(queue="ops_trace") @shared_task(queue="ops_trace")
def process_trace_tasks(file_info): def process_trace_tasks(file_info):
if trace_type: if trace_type:
trace_info = trace_type(**trace_info) trace_info = trace_type(**trace_info)
trace_instance.trace(trace_info) trace_instance.trace(trace_info)
logging.info("Processing trace tasks success, app_id: %s", app_id)
logger.info("Processing trace tasks success, app_id: %s", app_id)
except Exception as e: except Exception as e:
logging.info("error:\n\n\n%s\n\n\n\n", e)
logger.info("error:\n\n\n%s\n\n\n\n", e)
failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}" failed_key = f"{OPS_TRACE_FAILED_KEY}_{app_id}"
redis_client.incr(failed_key) redis_client.incr(failed_key)
logging.info("Processing trace tasks failed, app_id: %s", app_id)
logger.info("Processing trace tasks failed, app_id: %s", app_id)
finally: finally:
storage.delete(file_path) storage.delete(file_path)

+ 7
- 5
api/tasks/recover_document_indexing_task.py Visa fil

from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Document from models.dataset import Document


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def recover_document_indexing_task(dataset_id: str, document_id: str): def recover_document_indexing_task(dataset_id: str, document_id: str):


Usage: recover_document_indexing_task.delay(dataset_id, document_id) Usage: recover_document_indexing_task.delay(dataset_id, document_id)
""" """
logging.info(click.style(f"Recover document: {document_id}", fg="green"))
logger.info(click.style(f"Recover document: {document_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()


if not document: if not document:
logging.info(click.style(f"Document not found: {document_id}", fg="red"))
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
db.session.close() db.session.close()
return return


elif document.indexing_status == "indexing": elif document.indexing_status == "indexing":
indexing_runner.run_in_indexing_status(document) indexing_runner.run_in_indexing_status(document)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Processed document: {document.id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Processed document: {document.id} latency: {end_at - start_at}", fg="green"))
except DocumentIsPausedError as ex: except DocumentIsPausedError as ex:
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
except Exception: except Exception:
logging.exception("recover_document_indexing_task failed, document_id: %s", document_id)
logger.exception("recover_document_indexing_task failed, document_id: %s", document_id)
finally: finally:
db.session.close() db.session.close()

+ 13
- 13
api/tasks/remove_app_and_related_data_task.py Visa fil

) )
from repositories.factory import DifyAPIRepositoryFactory from repositories.factory import DifyAPIRepositoryFactory


logger = logging.getLogger(__name__)



@shared_task(queue="app_deletion", bind=True, max_retries=3) @shared_task(queue="app_deletion", bind=True, max_retries=3)
def remove_app_and_related_data_task(self, tenant_id: str, app_id: str): def remove_app_and_related_data_task(self, tenant_id: str, app_id: str):
logging.info(click.style(f"Start deleting app and related data: {tenant_id}:{app_id}", fg="green"))
logger.info(click.style(f"Start deleting app and related data: {tenant_id}:{app_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()
try: try:
# Delete related data # Delete related data
_delete_draft_variables(app_id) _delete_draft_variables(app_id)


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"App and related data deleted: {app_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"App and related data deleted: {app_id} latency: {end_at - start_at}", fg="green"))
except SQLAlchemyError as e: except SQLAlchemyError as e:
logging.exception(
click.style(f"Database error occurred while deleting app {app_id} and related data", fg="red")
)
logger.exception(click.style(f"Database error occurred while deleting app {app_id} and related data", fg="red"))
raise self.retry(exc=e, countdown=60) # Retry after 60 seconds raise self.retry(exc=e, countdown=60) # Retry after 60 seconds
except Exception as e: except Exception as e:
logging.exception(click.style(f"Error occurred while deleting app {app_id} and related data", fg="red"))
logger.exception(click.style(f"Error occurred while deleting app {app_id} and related data", fg="red"))
raise self.retry(exc=e, countdown=60) # Retry after 60 seconds raise self.retry(exc=e, countdown=60) # Retry after 60 seconds




batch_size=1000, batch_size=1000,
) )


logging.info("Deleted %s workflow runs for app %s", deleted_count, app_id)
logger.info("Deleted %s workflow runs for app %s", deleted_count, app_id)




def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): def _delete_app_workflow_node_executions(tenant_id: str, app_id: str):
batch_size=1000, batch_size=1000,
) )


logging.info("Deleted %s workflow node executions for app %s", deleted_count, app_id)
logger.info("Deleted %s workflow node executions for app %s", deleted_count, app_id)




def _delete_app_workflow_app_logs(tenant_id: str, app_id: str): def _delete_app_workflow_app_logs(tenant_id: str, app_id: str):
with db.engine.connect() as conn: with db.engine.connect() as conn:
conn.execute(stmt) conn.execute(stmt)
conn.commit() conn.commit()
logging.info(click.style(f"Deleted conversation variables for app {app_id}", fg="green"))
logger.info(click.style(f"Deleted conversation variables for app {app_id}", fg="green"))




def _delete_app_messages(tenant_id: str, app_id: str): def _delete_app_messages(tenant_id: str, app_id: str):
batch_deleted = deleted_result.rowcount batch_deleted = deleted_result.rowcount
total_deleted += batch_deleted total_deleted += batch_deleted


logging.info(click.style(f"Deleted {batch_deleted} draft variables (batch) for app {app_id}", fg="green"))
logger.info(click.style(f"Deleted {batch_deleted} draft variables (batch) for app {app_id}", fg="green"))


logging.info(click.style(f"Deleted {total_deleted} total draft variables for app {app_id}", fg="green"))
logger.info(click.style(f"Deleted {total_deleted} total draft variables for app {app_id}", fg="green"))
return total_deleted return total_deleted




try: try:
delete_func(record_id) delete_func(record_id)
db.session.commit() db.session.commit()
logging.info(click.style(f"Deleted {name} {record_id}", fg="green"))
logger.info(click.style(f"Deleted {name} {record_id}", fg="green"))
except Exception: except Exception:
logging.exception("Error occurred while deleting %s %s", name, record_id)
logger.exception("Error occurred while deleting %s %s", name, record_id)
continue continue
rs.close() rs.close()

+ 8
- 8
api/tasks/remove_document_from_index_task.py Visa fil

from libs.datetime_utils import naive_utc_now from libs.datetime_utils import naive_utc_now
from models.dataset import Document, DocumentSegment from models.dataset import Document, DocumentSegment


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def remove_document_from_index_task(document_id: str): def remove_document_from_index_task(document_id: str):


Usage: remove_document_from_index.delay(document_id) Usage: remove_document_from_index.delay(document_id)
""" """
logging.info(click.style(f"Start remove document segments from index: {document_id}", fg="green"))
logger.info(click.style(f"Start remove document segments from index: {document_id}", fg="green"))
start_at = time.perf_counter() start_at = time.perf_counter()


document = db.session.query(Document).where(Document.id == document_id).first() document = db.session.query(Document).where(Document.id == document_id).first()
if not document: if not document:
logging.info(click.style(f"Document not found: {document_id}", fg="red"))
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
db.session.close() db.session.close()
return return


if document.indexing_status != "completed": if document.indexing_status != "completed":
logging.info(click.style(f"Document is not completed, remove is not allowed: {document_id}", fg="red"))
logger.info(click.style(f"Document is not completed, remove is not allowed: {document_id}", fg="red"))
db.session.close() db.session.close()
return return


try: try:
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False) index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
except Exception: except Exception:
logging.exception("clean dataset %s from index failed", dataset.id)
logger.exception("clean dataset %s from index failed", dataset.id)
# update segment to disable # update segment to disable
db.session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update( db.session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update(
{ {
db.session.commit() db.session.commit()


end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(
click.style(f"Document removed from index: {document.id} latency: {end_at - start_at}", fg="green")
)
logger.info(click.style(f"Document removed from index: {document.id} latency: {end_at - start_at}", fg="green"))
except Exception: except Exception:
logging.exception("remove document from index failed")
logger.exception("remove document from index failed")
if not document.archived: if not document.archived:
document.enabled = True document.enabled = True
db.session.commit() db.session.commit()

+ 9
- 7
api/tasks/retry_document_indexing_task.py Visa fil

from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from services.feature_service import FeatureService from services.feature_service import FeatureService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def retry_document_indexing_task(dataset_id: str, document_ids: list[str]): def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
try: try:
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if not dataset: if not dataset:
logging.info(click.style(f"Dataset not found: {dataset_id}", fg="red"))
logger.info(click.style(f"Dataset not found: {dataset_id}", fg="red"))
return return
tenant_id = dataset.tenant_id tenant_id = dataset.tenant_id
for document_id in document_ids: for document_id in document_ids:
redis_client.delete(retry_indexing_cache_key) redis_client.delete(retry_indexing_cache_key)
return return


logging.info(click.style(f"Start retry document: {document_id}", fg="green"))
logger.info(click.style(f"Start retry document: {document_id}", fg="green"))
document = ( document = (
db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
) )
if not document: if not document:
logging.info(click.style(f"Document not found: {document_id}", fg="yellow"))
logger.info(click.style(f"Document not found: {document_id}", fg="yellow"))
return return
try: try:
# clean old data # clean old data
document.stopped_at = naive_utc_now() document.stopped_at = naive_utc_now()
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
redis_client.delete(retry_indexing_cache_key) redis_client.delete(retry_indexing_cache_key)
logging.exception("retry_document_indexing_task failed, document_id: %s", document_id)
logger.exception("retry_document_indexing_task failed, document_id: %s", document_id)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Retry dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Retry dataset: {dataset_id} latency: {end_at - start_at}", fg="green"))
except Exception as e: except Exception as e:
logging.exception(
logger.exception(
"retry_document_indexing_task failed, dataset_id: %s, document_ids: %s", dataset_id, document_ids "retry_document_indexing_task failed, dataset_id: %s, document_ids: %s", dataset_id, document_ids
) )
raise e raise e

+ 7
- 5
api/tasks/sync_website_document_indexing_task.py Visa fil

from models.dataset import Dataset, Document, DocumentSegment from models.dataset import Dataset, Document, DocumentSegment
from services.feature_service import FeatureService from services.feature_service import FeatureService


logger = logging.getLogger(__name__)



@shared_task(queue="dataset") @shared_task(queue="dataset")
def sync_website_document_indexing_task(dataset_id: str, document_id: str): def sync_website_document_indexing_task(dataset_id: str, document_id: str):
redis_client.delete(sync_indexing_cache_key) redis_client.delete(sync_indexing_cache_key)
return return


logging.info(click.style(f"Start sync website document: {document_id}", fg="green"))
logger.info(click.style(f"Start sync website document: {document_id}", fg="green"))
document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first() document = db.session.query(Document).where(Document.id == document_id, Document.dataset_id == dataset_id).first()
if not document: if not document:
logging.info(click.style(f"Document not found: {document_id}", fg="yellow"))
logger.info(click.style(f"Document not found: {document_id}", fg="yellow"))
return return
try: try:
# clean old data # clean old data
document.stopped_at = naive_utc_now() document.stopped_at = naive_utc_now()
db.session.add(document) db.session.add(document)
db.session.commit() db.session.commit()
logging.info(click.style(str(ex), fg="yellow"))
logger.info(click.style(str(ex), fg="yellow"))
redis_client.delete(sync_indexing_cache_key) redis_client.delete(sync_indexing_cache_key)
logging.exception("sync_website_document_indexing_task failed, document_id: %s", document_id)
logger.exception("sync_website_document_indexing_task failed, document_id: %s", document_id)
end_at = time.perf_counter() end_at = time.perf_counter()
logging.info(click.style(f"Sync document: {document_id} latency: {end_at - start_at}", fg="green"))
logger.info(click.style(f"Sync document: {document_id} latency: {end_at - start_at}", fg="green"))

+ 0
- 0
api/tests/unit_tests/extensions/test_ext_request_logging.py Visa fil


Vissa filer visades inte eftersom för många filer har ändrats

Laddar…
Avbryt
Spara