Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

audio.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. import logging
  2. from flask import request
  3. from flask_restx import fields, marshal_with, reqparse
  4. from werkzeug.exceptions import InternalServerError
  5. import services
  6. from controllers.web import web_ns
  7. from controllers.web.error import (
  8. AppUnavailableError,
  9. AudioTooLargeError,
  10. CompletionRequestError,
  11. NoAudioUploadedError,
  12. ProviderModelCurrentlyNotSupportError,
  13. ProviderNotInitializeError,
  14. ProviderNotSupportSpeechToTextError,
  15. ProviderQuotaExceededError,
  16. UnsupportedAudioTypeError,
  17. )
  18. from controllers.web.wraps import WebApiResource
  19. from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
  20. from core.model_runtime.errors.invoke import InvokeError
  21. from models.model import App
  22. from services.audio_service import AudioService
  23. from services.errors.audio import (
  24. AudioTooLargeServiceError,
  25. NoAudioUploadedServiceError,
  26. ProviderNotSupportSpeechToTextServiceError,
  27. UnsupportedAudioTypeServiceError,
  28. )
  29. logger = logging.getLogger(__name__)
  30. @web_ns.route("/audio-to-text")
  31. class AudioApi(WebApiResource):
  32. audio_to_text_response_fields = {
  33. "text": fields.String,
  34. }
  35. @marshal_with(audio_to_text_response_fields)
  36. @web_ns.doc("Audio to Text")
  37. @web_ns.doc(description="Convert audio file to text using speech-to-text service.")
  38. @web_ns.doc(
  39. responses={
  40. 200: "Success",
  41. 400: "Bad Request",
  42. 401: "Unauthorized",
  43. 403: "Forbidden",
  44. 413: "Audio file too large",
  45. 415: "Unsupported audio type",
  46. 500: "Internal Server Error",
  47. }
  48. )
  49. def post(self, app_model: App, end_user):
  50. """Convert audio to text"""
  51. file = request.files["file"]
  52. try:
  53. response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=end_user)
  54. return response
  55. except services.errors.app_model_config.AppModelConfigBrokenError:
  56. logger.exception("App model config broken.")
  57. raise AppUnavailableError()
  58. except NoAudioUploadedServiceError:
  59. raise NoAudioUploadedError()
  60. except AudioTooLargeServiceError as e:
  61. raise AudioTooLargeError(str(e))
  62. except UnsupportedAudioTypeServiceError:
  63. raise UnsupportedAudioTypeError()
  64. except ProviderNotSupportSpeechToTextServiceError:
  65. raise ProviderNotSupportSpeechToTextError()
  66. except ProviderTokenNotInitError as ex:
  67. raise ProviderNotInitializeError(ex.description)
  68. except QuotaExceededError:
  69. raise ProviderQuotaExceededError()
  70. except ModelCurrentlyNotSupportError:
  71. raise ProviderModelCurrentlyNotSupportError()
  72. except InvokeError as e:
  73. raise CompletionRequestError(e.description)
  74. except ValueError as e:
  75. raise e
  76. except Exception as e:
  77. logger.exception("Failed to handle post request to AudioApi")
  78. raise InternalServerError()
  79. @web_ns.route("/text-to-audio")
  80. class TextApi(WebApiResource):
  81. text_to_audio_response_fields = {
  82. "audio_url": fields.String,
  83. "duration": fields.Float,
  84. }
  85. @marshal_with(text_to_audio_response_fields)
  86. @web_ns.doc("Text to Audio")
  87. @web_ns.doc(description="Convert text to audio using text-to-speech service.")
  88. @web_ns.doc(
  89. responses={
  90. 200: "Success",
  91. 400: "Bad Request",
  92. 401: "Unauthorized",
  93. 403: "Forbidden",
  94. 500: "Internal Server Error",
  95. }
  96. )
  97. def post(self, app_model: App, end_user):
  98. """Convert text to audio"""
  99. try:
  100. parser = reqparse.RequestParser()
  101. parser.add_argument("message_id", type=str, required=False, location="json")
  102. parser.add_argument("voice", type=str, location="json")
  103. parser.add_argument("text", type=str, location="json")
  104. parser.add_argument("streaming", type=bool, location="json")
  105. args = parser.parse_args()
  106. message_id = args.get("message_id", None)
  107. text = args.get("text", None)
  108. voice = args.get("voice", None)
  109. response = AudioService.transcript_tts(
  110. app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
  111. )
  112. return response
  113. except services.errors.app_model_config.AppModelConfigBrokenError:
  114. logger.exception("App model config broken.")
  115. raise AppUnavailableError()
  116. except NoAudioUploadedServiceError:
  117. raise NoAudioUploadedError()
  118. except AudioTooLargeServiceError as e:
  119. raise AudioTooLargeError(str(e))
  120. except UnsupportedAudioTypeServiceError:
  121. raise UnsupportedAudioTypeError()
  122. except ProviderNotSupportSpeechToTextServiceError:
  123. raise ProviderNotSupportSpeechToTextError()
  124. except ProviderTokenNotInitError as ex:
  125. raise ProviderNotInitializeError(ex.description)
  126. except QuotaExceededError:
  127. raise ProviderQuotaExceededError()
  128. except ModelCurrentlyNotSupportError:
  129. raise ProviderModelCurrentlyNotSupportError()
  130. except InvokeError as e:
  131. raise CompletionRequestError(e.description)
  132. except ValueError as e:
  133. raise e
  134. except Exception as e:
  135. logger.exception("Failed to handle post request to TextApi")
  136. raise InternalServerError()