Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

audio.py 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. import logging
  2. from flask import request
  3. from flask_restx import fields, marshal_with, reqparse
  4. from werkzeug.exceptions import InternalServerError
  5. import services
  6. from controllers.web import api
  7. from controllers.web.error import (
  8. AppUnavailableError,
  9. AudioTooLargeError,
  10. CompletionRequestError,
  11. NoAudioUploadedError,
  12. ProviderModelCurrentlyNotSupportError,
  13. ProviderNotInitializeError,
  14. ProviderNotSupportSpeechToTextError,
  15. ProviderQuotaExceededError,
  16. UnsupportedAudioTypeError,
  17. )
  18. from controllers.web.wraps import WebApiResource
  19. from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
  20. from core.model_runtime.errors.invoke import InvokeError
  21. from models.model import App
  22. from services.audio_service import AudioService
  23. from services.errors.audio import (
  24. AudioTooLargeServiceError,
  25. NoAudioUploadedServiceError,
  26. ProviderNotSupportSpeechToTextServiceError,
  27. UnsupportedAudioTypeServiceError,
  28. )
  29. logger = logging.getLogger(__name__)
  30. class AudioApi(WebApiResource):
  31. audio_to_text_response_fields = {
  32. "text": fields.String,
  33. }
  34. @marshal_with(audio_to_text_response_fields)
  35. @api.doc("Audio to Text")
  36. @api.doc(description="Convert audio file to text using speech-to-text service.")
  37. @api.doc(
  38. responses={
  39. 200: "Success",
  40. 400: "Bad Request",
  41. 401: "Unauthorized",
  42. 403: "Forbidden",
  43. 413: "Audio file too large",
  44. 415: "Unsupported audio type",
  45. 500: "Internal Server Error",
  46. }
  47. )
  48. def post(self, app_model: App, end_user):
  49. """Convert audio to text"""
  50. file = request.files["file"]
  51. try:
  52. response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=end_user)
  53. return response
  54. except services.errors.app_model_config.AppModelConfigBrokenError:
  55. logger.exception("App model config broken.")
  56. raise AppUnavailableError()
  57. except NoAudioUploadedServiceError:
  58. raise NoAudioUploadedError()
  59. except AudioTooLargeServiceError as e:
  60. raise AudioTooLargeError(str(e))
  61. except UnsupportedAudioTypeServiceError:
  62. raise UnsupportedAudioTypeError()
  63. except ProviderNotSupportSpeechToTextServiceError:
  64. raise ProviderNotSupportSpeechToTextError()
  65. except ProviderTokenNotInitError as ex:
  66. raise ProviderNotInitializeError(ex.description)
  67. except QuotaExceededError:
  68. raise ProviderQuotaExceededError()
  69. except ModelCurrentlyNotSupportError:
  70. raise ProviderModelCurrentlyNotSupportError()
  71. except InvokeError as e:
  72. raise CompletionRequestError(e.description)
  73. except ValueError as e:
  74. raise e
  75. except Exception as e:
  76. logger.exception("Failed to handle post request to AudioApi")
  77. raise InternalServerError()
  78. class TextApi(WebApiResource):
  79. text_to_audio_response_fields = {
  80. "audio_url": fields.String,
  81. "duration": fields.Float,
  82. }
  83. @marshal_with(text_to_audio_response_fields)
  84. @api.doc("Text to Audio")
  85. @api.doc(description="Convert text to audio using text-to-speech service.")
  86. @api.doc(
  87. responses={
  88. 200: "Success",
  89. 400: "Bad Request",
  90. 401: "Unauthorized",
  91. 403: "Forbidden",
  92. 500: "Internal Server Error",
  93. }
  94. )
  95. def post(self, app_model: App, end_user):
  96. """Convert text to audio"""
  97. try:
  98. parser = reqparse.RequestParser()
  99. parser.add_argument("message_id", type=str, required=False, location="json")
  100. parser.add_argument("voice", type=str, location="json")
  101. parser.add_argument("text", type=str, location="json")
  102. parser.add_argument("streaming", type=bool, location="json")
  103. args = parser.parse_args()
  104. message_id = args.get("message_id", None)
  105. text = args.get("text", None)
  106. voice = args.get("voice", None)
  107. response = AudioService.transcript_tts(
  108. app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
  109. )
  110. return response
  111. except services.errors.app_model_config.AppModelConfigBrokenError:
  112. logger.exception("App model config broken.")
  113. raise AppUnavailableError()
  114. except NoAudioUploadedServiceError:
  115. raise NoAudioUploadedError()
  116. except AudioTooLargeServiceError as e:
  117. raise AudioTooLargeError(str(e))
  118. except UnsupportedAudioTypeServiceError:
  119. raise UnsupportedAudioTypeError()
  120. except ProviderNotSupportSpeechToTextServiceError:
  121. raise ProviderNotSupportSpeechToTextError()
  122. except ProviderTokenNotInitError as ex:
  123. raise ProviderNotInitializeError(ex.description)
  124. except QuotaExceededError:
  125. raise ProviderQuotaExceededError()
  126. except ModelCurrentlyNotSupportError:
  127. raise ProviderModelCurrentlyNotSupportError()
  128. except InvokeError as e:
  129. raise CompletionRequestError(e.description)
  130. except ValueError as e:
  131. raise e
  132. except Exception as e:
  133. logger.exception("Failed to handle post request to TextApi")
  134. raise InternalServerError()
  135. api.add_resource(AudioApi, "/audio-to-text")
  136. api.add_resource(TextApi, "/text-to-audio")