You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

audio.py 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. import logging
  2. from flask import request
  3. from flask_restx import Resource, reqparse
  4. from werkzeug.exceptions import InternalServerError
  5. import services
  6. from controllers.service_api import service_api_ns
  7. from controllers.service_api.app.error import (
  8. AppUnavailableError,
  9. AudioTooLargeError,
  10. CompletionRequestError,
  11. NoAudioUploadedError,
  12. ProviderModelCurrentlyNotSupportError,
  13. ProviderNotInitializeError,
  14. ProviderNotSupportSpeechToTextError,
  15. ProviderQuotaExceededError,
  16. UnsupportedAudioTypeError,
  17. )
  18. from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
  19. from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
  20. from core.model_runtime.errors.invoke import InvokeError
  21. from models.model import App, EndUser
  22. from services.audio_service import AudioService
  23. from services.errors.audio import (
  24. AudioTooLargeServiceError,
  25. NoAudioUploadedServiceError,
  26. ProviderNotSupportSpeechToTextServiceError,
  27. UnsupportedAudioTypeServiceError,
  28. )
  29. logger = logging.getLogger(__name__)
  30. @service_api_ns.route("/audio-to-text")
  31. class AudioApi(Resource):
  32. @service_api_ns.doc("audio_to_text")
  33. @service_api_ns.doc(description="Convert audio to text using speech-to-text")
  34. @service_api_ns.doc(
  35. responses={
  36. 200: "Audio successfully transcribed",
  37. 400: "Bad request - no audio or invalid audio",
  38. 401: "Unauthorized - invalid API token",
  39. 413: "Audio file too large",
  40. 415: "Unsupported audio type",
  41. 500: "Internal server error",
  42. }
  43. )
  44. @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
  45. def post(self, app_model: App, end_user: EndUser):
  46. """Convert audio to text using speech-to-text.
  47. Accepts an audio file upload and returns the transcribed text.
  48. """
  49. file = request.files["file"]
  50. try:
  51. response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=end_user)
  52. return response
  53. except services.errors.app_model_config.AppModelConfigBrokenError:
  54. logger.exception("App model config broken.")
  55. raise AppUnavailableError()
  56. except NoAudioUploadedServiceError:
  57. raise NoAudioUploadedError()
  58. except AudioTooLargeServiceError as e:
  59. raise AudioTooLargeError(str(e))
  60. except UnsupportedAudioTypeServiceError:
  61. raise UnsupportedAudioTypeError()
  62. except ProviderNotSupportSpeechToTextServiceError:
  63. raise ProviderNotSupportSpeechToTextError()
  64. except ProviderTokenNotInitError as ex:
  65. raise ProviderNotInitializeError(ex.description)
  66. except QuotaExceededError:
  67. raise ProviderQuotaExceededError()
  68. except ModelCurrentlyNotSupportError:
  69. raise ProviderModelCurrentlyNotSupportError()
  70. except InvokeError as e:
  71. raise CompletionRequestError(e.description)
  72. except ValueError as e:
  73. raise e
  74. except Exception as e:
  75. logger.exception("internal server error.")
  76. raise InternalServerError()
  77. # Define parser for text-to-audio API
  78. text_to_audio_parser = reqparse.RequestParser()
  79. text_to_audio_parser.add_argument("message_id", type=str, required=False, location="json", help="Message ID")
  80. text_to_audio_parser.add_argument("voice", type=str, location="json", help="Voice to use for TTS")
  81. text_to_audio_parser.add_argument("text", type=str, location="json", help="Text to convert to audio")
  82. text_to_audio_parser.add_argument("streaming", type=bool, location="json", help="Enable streaming response")
  83. @service_api_ns.route("/text-to-audio")
  84. class TextApi(Resource):
  85. @service_api_ns.expect(text_to_audio_parser)
  86. @service_api_ns.doc("text_to_audio")
  87. @service_api_ns.doc(description="Convert text to audio using text-to-speech")
  88. @service_api_ns.doc(
  89. responses={
  90. 200: "Text successfully converted to audio",
  91. 400: "Bad request - invalid parameters",
  92. 401: "Unauthorized - invalid API token",
  93. 500: "Internal server error",
  94. }
  95. )
  96. @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
  97. def post(self, app_model: App, end_user: EndUser):
  98. """Convert text to audio using text-to-speech.
  99. Converts the provided text to audio using the specified voice.
  100. """
  101. try:
  102. args = text_to_audio_parser.parse_args()
  103. message_id = args.get("message_id", None)
  104. text = args.get("text", None)
  105. voice = args.get("voice", None)
  106. response = AudioService.transcript_tts(
  107. app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
  108. )
  109. return response
  110. except services.errors.app_model_config.AppModelConfigBrokenError:
  111. logger.exception("App model config broken.")
  112. raise AppUnavailableError()
  113. except NoAudioUploadedServiceError:
  114. raise NoAudioUploadedError()
  115. except AudioTooLargeServiceError as e:
  116. raise AudioTooLargeError(str(e))
  117. except UnsupportedAudioTypeServiceError:
  118. raise UnsupportedAudioTypeError()
  119. except ProviderNotSupportSpeechToTextServiceError:
  120. raise ProviderNotSupportSpeechToTextError()
  121. except ProviderTokenNotInitError as ex:
  122. raise ProviderNotInitializeError(ex.description)
  123. except QuotaExceededError:
  124. raise ProviderQuotaExceededError()
  125. except ModelCurrentlyNotSupportError:
  126. raise ProviderModelCurrentlyNotSupportError()
  127. except InvokeError as e:
  128. raise CompletionRequestError(e.description)
  129. except ValueError as e:
  130. raise e
  131. except Exception as e:
  132. logger.exception("internal server error.")
  133. raise InternalServerError()