Browse Source

tts models support (#2033)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
Co-authored-by: crazywoola <427733928@qq.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com>
tags/0.5.0
Charlie.Wei 1 year ago
parent
commit
6355e61eb8
No account linked to committer's email address
86 changed files with 1645 additions and 133 deletions
  1. 1
    1
      api/Dockerfile
  2. 1
    0
      api/app.py
  3. 46
    3
      api/controllers/console/app/audio.py
  4. 45
    3
      api/controllers/console/explore/audio.py
  5. 2
    0
      api/controllers/console/explore/parameter.py
  6. 2
    0
      api/controllers/service_api/app/app.py
  7. 50
    4
      api/controllers/service_api/app/audio.py
  8. 2
    0
      api/controllers/web/app.py
  9. 41
    2
      api/controllers/web/audio.py
  10. 6
    0
      api/core/application_manager.py
  11. 1
    1
      api/core/entities/application_entities.py
  12. 24
    2
      api/core/model_manager.py
  13. 9
    1
      api/core/model_runtime/entities/model_entities.py
  14. 42
    0
      api/core/model_runtime/model_providers/__base/tts_model.py
  15. 1
    0
      api/core/model_runtime/model_providers/openai/openai.yaml
  16. 0
    0
      api/core/model_runtime/model_providers/openai/tts/__init__.py
  17. 7
    0
      api/core/model_runtime/model_providers/openai/tts/tts-1-hd.yaml
  18. 7
    0
      api/core/model_runtime/model_providers/openai/tts/tts-1.yaml
  19. 235
    0
      api/core/model_runtime/model_providers/openai/tts/tts.py
  20. 1
    0
      api/fields/app_fields.py
  21. 32
    0
      api/migrations/versions/b24be59fbb04_.py
  22. 10
    0
      api/models/model.py
  23. 1
    0
      api/requirements.txt
  24. 7
    7
      api/services/account_service.py
  25. 16
    0
      api/services/app_model_config_service.py
  26. 28
    6
      api/services/audio_service.py
  27. 6
    1
      api/services/errors/audio.py
  28. 10
    1
      web/app/components/app/chat/answer/index.tsx
  29. 3
    0
      web/app/components/app/chat/index.tsx
  30. 2
    0
      web/app/components/app/chat/style.module.css
  31. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/citations-and-attributions-preview@2x.png
  32. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/conversation-opener-preview@2x.png
  33. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/more-like-this-preview@2x.png
  34. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/next-question-suggestion-preview@2x.png
  35. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/opening-suggestion-preview@2x.png
  36. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/speech-to-text-preview@2x.png
  37. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-assistant@2x.png
  38. BIN
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-completion@2x.png
  39. 6
    1
      web/app/components/app/configuration/config/feature/choose-feature/feature-item/style.module.css
  40. 28
    1
      web/app/components/app/configuration/config/feature/choose-feature/index.tsx
  41. 8
    0
      web/app/components/app/configuration/config/feature/use-feature.tsx
  42. 21
    6
      web/app/components/app/configuration/config/index.tsx
  43. 10
    0
      web/app/components/app/configuration/debug/index.tsx
  44. 8
    0
      web/app/components/app/configuration/features/chat-group/index.tsx
  45. 2
    2
      web/app/components/app/configuration/features/chat-group/speech-to-text/index.tsx
  46. 25
    0
      web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
  47. 24
    3
      web/app/components/app/configuration/features/experience-enchance-group/index.tsx
  48. 15
    0
      web/app/components/app/configuration/index.tsx
  49. 2
    0
      web/app/components/app/log/list.tsx
  50. 15
    2
      web/app/components/app/text-generate/item/index.tsx
  51. 13
    0
      web/app/components/app/text-generate/saved-items/index.tsx
  52. 110
    0
      web/app/components/base/audio-btn/index.tsx
  53. 16
    0
      web/app/components/base/audio-btn/style.module.css
  54. 15
    0
      web/app/components/base/icons/assets/vender/line/mediaAndDevices/speaker.svg
  55. 15
    0
      web/app/components/base/icons/assets/vender/solid/mediaAndDevices/speaker.svg
  56. 112
    0
      web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.json
  57. 16
    0
      web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.tsx
  58. 1
    0
      web/app/components/base/icons/src/vender/line/mediaAndDevices/index.ts
  59. 112
    0
      web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.json
  60. 16
    0
      web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.tsx
  61. 1
    0
      web/app/components/base/icons/src/vender/solid/mediaAndDevices/index.ts
  62. 10
    0
      web/app/components/develop/secret-key/assets/pause.svg
  63. 11
    0
      web/app/components/develop/secret-key/assets/play.svg
  64. 11
    0
      web/app/components/develop/secret-key/assets/stop.svg
  65. 63
    15
      web/app/components/develop/template/template.en.mdx
  66. 62
    14
      web/app/components/develop/template/template.zh.mdx
  67. 71
    23
      web/app/components/develop/template/template_chat.en.mdx
  68. 65
    17
      web/app/components/develop/template/template_chat.zh.mdx
  69. 2
    0
      web/app/components/header/account-setting/model-provider-page/declarations.ts
  70. 2
    1
      web/app/components/header/account-setting/model-provider-page/hooks.ts
  71. 3
    1
      web/app/components/header/account-setting/model-provider-page/index.tsx
  72. 35
    5
      web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx
  73. 15
    4
      web/app/components/share/chat/index.tsx
  74. 7
    1
      web/app/components/share/chatbot/index.tsx
  75. 13
    4
      web/app/components/share/text-generation/index.tsx
  76. 3
    0
      web/app/components/share/text-generation/result/index.tsx
  77. 8
    0
      web/context/debug-configuration.ts
  78. 3
    0
      web/i18n/lang/app-api.en.ts
  79. 4
    1
      web/i18n/lang/app-api.zh.ts
  80. 5
    0
      web/i18n/lang/app-debug.en.ts
  81. 5
    0
      web/i18n/lang/app-debug.zh.ts
  82. 4
    0
      web/i18n/lang/common.en.ts
  83. 4
    0
      web/i18n/lang/common.zh.ts
  84. 3
    0
      web/models/debug.ts
  85. 4
    0
      web/service/share.ts
  86. 3
    0
      web/types/app.ts

+ 1
- 1
api/Dockerfile View File

WORKDIR /app/api WORKDIR /app/api


RUN apt-get update \ RUN apt-get update \
&& apt-get install -y --no-install-recommends bash curl wget vim nodejs \
&& apt-get install -y --no-install-recommends bash curl wget vim nodejs ffmpeg \
&& apt-get autoremove \ && apt-get autoremove \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*



+ 1
- 0
api/app.py View File

else: else:
return None return None



@login_manager.unauthorized_handler @login_manager.unauthorized_handler
def unauthorized_handler(): def unauthorized_handler():
"""Handle unauthorized requests.""" """Handle unauthorized requests."""

+ 46
- 3
api/controllers/console/app/audio.py View File

file = request.files['file'] file = request.files['file']


try: try:
response = AudioService.transcript(
response = AudioService.transcript_asr(
tenant_id=app_model.tenant_id, tenant_id=app_model.tenant_id,
file=file, file=file,
promot=app_model.app_model_config.pre_prompt
) )


return response return response
except Exception as e: except Exception as e:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


api.add_resource(ChatMessageAudioApi, '/apps/<uuid:app_id>/audio-to-text')

class ChatMessageTextApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
app_id = str(app_id)
app_model = _get_app(app_id, None)
try:
response = AudioService.transcript_tts(
tenant_id=app_model.tenant_id,
text=request.form['text'],
streaming=False
)

return {'data': response.data.decode('latin1')}
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except NoAudioUploadedServiceError:
raise NoAudioUploadedError()
except AudioTooLargeServiceError as e:
raise AudioTooLargeError(str(e))
except UnsupportedAudioTypeServiceError:
raise UnsupportedAudioTypeError()
except ProviderNotSupportSpeechToTextServiceError:
raise ProviderNotSupportSpeechToTextError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()


api.add_resource(ChatMessageAudioApi, '/apps/<uuid:app_id>/audio-to-text')
api.add_resource(ChatMessageTextApi, '/apps/<uuid:app_id>/text-to-audio')

+ 45
- 3
api/controllers/console/explore/audio.py View File

file = request.files['file'] file = request.files['file']


try: try:
response = AudioService.transcript(
response = AudioService.transcript_asr(
tenant_id=app_model.tenant_id, tenant_id=app_model.tenant_id,
file=file, file=file,
) )
except Exception as e: except Exception as e:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')

class ChatTextApi(InstalledAppResource):
def post(self, installed_app):
app_model = installed_app.app
app_model_config: AppModelConfig = app_model.app_model_config

if not app_model_config.text_to_speech_dict['enabled']:
raise AppUnavailableError()

try:
response = AudioService.transcript_tts(
tenant_id=app_model.tenant_id,
text=request.form['text'],
streaming=False
)
return {'data': response.data.decode('latin1')}
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except NoAudioUploadedServiceError:
raise NoAudioUploadedError()
except AudioTooLargeServiceError as e:
raise AudioTooLargeError(str(e))
except UnsupportedAudioTypeServiceError:
raise UnsupportedAudioTypeError()
except ProviderNotSupportSpeechToTextServiceError:
raise ProviderNotSupportSpeechToTextError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()


api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')
api.add_resource(ChatTextApi, '/installed-apps/<uuid:installed_app_id>/text-to-audio', endpoint='installed_app_text')

+ 2
- 0
api/controllers/console/explore/parameter.py View File

'suggested_questions': fields.Raw, 'suggested_questions': fields.Raw,
'suggested_questions_after_answer': fields.Raw, 'suggested_questions_after_answer': fields.Raw,
'speech_to_text': fields.Raw, 'speech_to_text': fields.Raw,
'text_to_speech': fields.Raw,
'retriever_resource': fields.Raw, 'retriever_resource': fields.Raw,
'annotation_reply': fields.Raw, 'annotation_reply': fields.Raw,
'more_like_this': fields.Raw, 'more_like_this': fields.Raw,
'suggested_questions': app_model_config.suggested_questions_list, 'suggested_questions': app_model_config.suggested_questions_list,
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict, 'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
'speech_to_text': app_model_config.speech_to_text_dict, 'speech_to_text': app_model_config.speech_to_text_dict,
'text_to_speech': app_model_config.text_to_speech_dict,
'retriever_resource': app_model_config.retriever_resource_dict, 'retriever_resource': app_model_config.retriever_resource_dict,
'annotation_reply': app_model_config.annotation_reply_dict, 'annotation_reply': app_model_config.annotation_reply_dict,
'more_like_this': app_model_config.more_like_this_dict, 'more_like_this': app_model_config.more_like_this_dict,

+ 2
- 0
api/controllers/service_api/app/app.py View File

'suggested_questions': fields.Raw, 'suggested_questions': fields.Raw,
'suggested_questions_after_answer': fields.Raw, 'suggested_questions_after_answer': fields.Raw,
'speech_to_text': fields.Raw, 'speech_to_text': fields.Raw,
'text_to_speech': fields.Raw,
'retriever_resource': fields.Raw, 'retriever_resource': fields.Raw,
'annotation_reply': fields.Raw, 'annotation_reply': fields.Raw,
'more_like_this': fields.Raw, 'more_like_this': fields.Raw,
'suggested_questions': app_model_config.suggested_questions_list, 'suggested_questions': app_model_config.suggested_questions_list,
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict, 'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
'speech_to_text': app_model_config.speech_to_text_dict, 'speech_to_text': app_model_config.speech_to_text_dict,
'text_to_speech': app_model_config.text_to_speech_dict,
'retriever_resource': app_model_config.retriever_resource_dict, 'retriever_resource': app_model_config.retriever_resource_dict,
'annotation_reply': app_model_config.annotation_reply_dict, 'annotation_reply': app_model_config.annotation_reply_dict,
'more_like_this': app_model_config.more_like_this_dict, 'more_like_this': app_model_config.more_like_this_dict,

+ 50
- 4
api/controllers/service_api/app/audio.py View File

from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError from core.model_runtime.errors.invoke import InvokeError
from flask import request from flask import request
from flask_restful import reqparse
from models.model import App, AppModelConfig from models.model import App, AppModelConfig
from services.audio_service import AudioService from services.audio_service import AudioService
from services.errors.audio import (AudioTooLargeServiceError, NoAudioUploadedServiceError, from services.errors.audio import (AudioTooLargeServiceError, NoAudioUploadedServiceError,
app_model_config: AppModelConfig = app_model.app_model_config app_model_config: AppModelConfig = app_model.app_model_config


if not app_model_config.speech_to_text_dict['enabled']: if not app_model_config.speech_to_text_dict['enabled']:
raise AppUnavailableError()
raise AppUnavailableError()


file = request.files['file'] file = request.files['file']


try: try:
response = AudioService.transcript(
response = AudioService.transcript_asr(
tenant_id=app_model.tenant_id, tenant_id=app_model.tenant_id,
file=file, file=file,
end_user=end_user
)

return response
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except NoAudioUploadedServiceError:
raise NoAudioUploadedError()
except AudioTooLargeServiceError as e:
raise AudioTooLargeError(str(e))
except UnsupportedAudioTypeServiceError:
raise UnsupportedAudioTypeError()
except ProviderNotSupportSpeechToTextServiceError:
raise ProviderNotSupportSpeechToTextError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()


class TextApi(AppApiResource):
def post(self, app_model: App, end_user):
parser = reqparse.RequestParser()
parser.add_argument('text', type=str, required=True, nullable=False, location='json')
parser.add_argument('user', type=str, required=True, nullable=False, location='json')
args = parser.parse_args()

try:
response = AudioService.transcript_tts(
tenant_id=app_model.tenant_id,
text=args['text'],
end_user=args['user'],
streaming=False
) )


return response return response
except Exception as e: except Exception as e:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()
api.add_resource(AudioApi, '/audio-to-text')


api.add_resource(AudioApi, '/audio-to-text')
api.add_resource(TextApi, '/text-to-audio')

+ 2
- 0
api/controllers/web/app.py View File

'suggested_questions': fields.Raw, 'suggested_questions': fields.Raw,
'suggested_questions_after_answer': fields.Raw, 'suggested_questions_after_answer': fields.Raw,
'speech_to_text': fields.Raw, 'speech_to_text': fields.Raw,
'text_to_speech': fields.Raw,
'retriever_resource': fields.Raw, 'retriever_resource': fields.Raw,
'annotation_reply': fields.Raw, 'annotation_reply': fields.Raw,
'more_like_this': fields.Raw, 'more_like_this': fields.Raw,
'suggested_questions': app_model_config.suggested_questions_list, 'suggested_questions': app_model_config.suggested_questions_list,
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict, 'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
'speech_to_text': app_model_config.speech_to_text_dict, 'speech_to_text': app_model_config.speech_to_text_dict,
'text_to_speech': app_model_config.text_to_speech_dict,
'retriever_resource': app_model_config.retriever_resource_dict, 'retriever_resource': app_model_config.retriever_resource_dict,
'annotation_reply': app_model_config.annotation_reply_dict, 'annotation_reply': app_model_config.annotation_reply_dict,
'more_like_this': app_model_config.more_like_this_dict, 'more_like_this': app_model_config.more_like_this_dict,

+ 41
- 2
api/controllers/web/audio.py View File

file = request.files['file'] file = request.files['file']


try: try:
response = AudioService.transcript(
response = AudioService.transcript_asr(
tenant_id=app_model.tenant_id, tenant_id=app_model.tenant_id,
file=file, file=file,
) )
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()


api.add_resource(AudioApi, '/audio-to-text')

class TextApi(WebApiResource):
def post(self, app_model: App, end_user):
try:
response = AudioService.transcript_tts(
tenant_id=app_model.tenant_id,
text=request.form['text'],
end_user=end_user.external_user_id,
streaming=False
)

return {'data': response.data.decode('latin1')}
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except NoAudioUploadedServiceError:
raise NoAudioUploadedError()
except AudioTooLargeServiceError as e:
raise AudioTooLargeError(str(e))
except UnsupportedAudioTypeServiceError:
raise UnsupportedAudioTypeError()
except ProviderNotSupportSpeechToTextServiceError:
raise ProviderNotSupportSpeechToTextError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()


api.add_resource(AudioApi, '/audio-to-text')
api.add_resource(TextApi, '/text-to-audio')

+ 6
- 0
api/core/application_manager.py View File

if 'enabled' in speech_to_text_dict and speech_to_text_dict['enabled']: if 'enabled' in speech_to_text_dict and speech_to_text_dict['enabled']:
properties['speech_to_text'] = True properties['speech_to_text'] = True


# text to speech
text_to_speech_dict = copy_app_model_config_dict.get('text_to_speech')
if text_to_speech_dict:
if 'enabled' in text_to_speech_dict and text_to_speech_dict['enabled']:
properties['text_to_speech'] = True

# sensitive word avoidance # sensitive word avoidance
sensitive_word_avoidance_dict = copy_app_model_config_dict.get('sensitive_word_avoidance') sensitive_word_avoidance_dict = copy_app_model_config_dict.get('sensitive_word_avoidance')
if sensitive_word_avoidance_dict: if sensitive_word_avoidance_dict:

+ 1
- 1
api/core/entities/application_entities.py View File

show_retrieve_source: bool = False show_retrieve_source: bool = False
more_like_this: bool = False more_like_this: bool = False
speech_to_text: bool = False speech_to_text: bool = False
text_to_speech: bool = False
sensitive_word_avoidance: Optional[SensitiveWordAvoidanceEntity] = None sensitive_word_avoidance: Optional[SensitiveWordAvoidanceEntity] = None




query: Optional[str] = None query: Optional[str] = None
files: list[FileObj] = [] files: list[FileObj] = []
user_id: str user_id: str

# extras # extras
stream: bool stream: bool
invoke_from: InvokeFrom invoke_from: InvokeFrom

+ 24
- 2
api/core/model_manager.py View File

from core.model_runtime.model_providers.__base.moderation_model import ModerationModel from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
from core.model_runtime.model_providers.__base.rerank_model import RerankModel from core.model_runtime.model_providers.__base.rerank_model import RerankModel
from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
from core.model_runtime.model_providers.__base.tts_model import TTSModel
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.provider_manager import ProviderManager from core.provider_manager import ProviderManager


user=user user=user
) )


def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None, **params) \
def invoke_speech2text(self, file: IO[bytes], user: Optional[str] = None) \
-> str: -> str:
""" """
Invoke large language model Invoke large language model
model=self.model, model=self.model,
credentials=self.credentials, credentials=self.credentials,
file=file, file=file,
user=user
)

def invoke_tts(self, content_text: str, streaming: bool, user: Optional[str] = None) \
-> str:
"""
Invoke large language model

:param content_text: text content to be translated
:param user: unique user id
:param streaming: output is streaming
:return: text for given audio file
"""
if not isinstance(self.model_type_instance, TTSModel):
raise Exception(f"Model type instance is not TTSModel")

self.model_type_instance = cast(TTSModel, self.model_type_instance)
return self.model_type_instance.invoke(
model=self.model,
credentials=self.credentials,
content_text=content_text,
user=user, user=user,
**params
streaming=streaming
) )





+ 9
- 1
api/core/model_runtime/entities/model_entities.py View File

RERANK = "rerank" RERANK = "rerank"
SPEECH2TEXT = "speech2text" SPEECH2TEXT = "speech2text"
MODERATION = "moderation" MODERATION = "moderation"
# TTS = "tts"
TTS = "tts"
# TEXT2IMG = "text2img" # TEXT2IMG = "text2img"


@classmethod @classmethod
return cls.RERANK return cls.RERANK
elif origin_model_type == 'speech2text' or origin_model_type == cls.SPEECH2TEXT.value: elif origin_model_type == 'speech2text' or origin_model_type == cls.SPEECH2TEXT.value:
return cls.SPEECH2TEXT return cls.SPEECH2TEXT
elif origin_model_type == 'tts' or origin_model_type == cls.TTS.value:
return cls.TTS
elif origin_model_type == cls.MODERATION.value: elif origin_model_type == cls.MODERATION.value:
return cls.MODERATION return cls.MODERATION
else: else:
return 'reranking' return 'reranking'
elif self == self.SPEECH2TEXT: elif self == self.SPEECH2TEXT:
return 'speech2text' return 'speech2text'
elif self == self.TTS:
return 'tts'
elif self == self.MODERATION: elif self == self.MODERATION:
return 'moderation' return 'moderation'
else: else:
FILE_UPLOAD_LIMIT = "file_upload_limit" FILE_UPLOAD_LIMIT = "file_upload_limit"
SUPPORTED_FILE_EXTENSIONS = "supported_file_extensions" SUPPORTED_FILE_EXTENSIONS = "supported_file_extensions"
MAX_CHARACTERS_PER_CHUNK = "max_characters_per_chunk" MAX_CHARACTERS_PER_CHUNK = "max_characters_per_chunk"
DEFAULT_VOICE = "default_voice"
WORD_LIMIT = "word_limit"
AUDOI_TYPE = "audio_type"
MAX_WORKERS = "max_workers"




class ProviderModel(BaseModel): class ProviderModel(BaseModel):

+ 42
- 0
api/core/model_runtime/model_providers/__base/tts_model.py View File

from abc import abstractmethod
from typing import Optional

from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.model_providers.__base.ai_model import AIModel


class TTSModel(AIModel):
"""
Model class for ttstext model.
"""
model_type: ModelType = ModelType.TTS

def invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None):
"""
Invoke large language model

:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param streaming: output is streaming
:param user: unique user id
:return: translated audio file
"""
try:
return self._invoke(model=model, credentials=credentials, user=user, streaming=streaming, content_text=content_text)
except Exception as e:
raise self._transform_invoke_error(e)

@abstractmethod
def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None):
"""
Invoke large language model

:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param streaming: output is streaming
:param user: unique user id
:return: translated audio file
"""
raise NotImplementedError

+ 1
- 0
api/core/model_runtime/model_providers/openai/openai.yaml View File

- text-embedding - text-embedding
- speech2text - speech2text
- moderation - moderation
- tts
configurate_methods: configurate_methods:
- predefined-model - predefined-model
- customizable-model - customizable-model

+ 0
- 0
api/core/model_runtime/model_providers/openai/tts/__init__.py View File


+ 7
- 0
api/core/model_runtime/model_providers/openai/tts/tts-1-hd.yaml View File

model: tts-1-hd
model_type: tts
model_properties:
default_voice: 'alloy'
word_limit: 120
audio_type: 'mp3'
max_workers: 5

+ 7
- 0
api/core/model_runtime/model_providers/openai/tts/tts-1.yaml View File

model: tts-1
model_type: tts
model_properties:
default_voice: 'alloy'
word_limit: 120
audio_type: 'mp3'
max_workers: 5

+ 235
- 0
api/core/model_runtime/model_providers/openai/tts/tts.py View File

import uuid
import hashlib
import subprocess
from io import BytesIO
from typing import Optional
from functools import reduce
from pydub import AudioSegment

from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.model_providers.__base.tts_model import TTSModel
from core.model_runtime.model_providers.openai._common import _CommonOpenAI

from typing_extensions import Literal
from flask import Response, stream_with_context
from openai import OpenAI
import concurrent.futures


class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
"""
Model class for OpenAI Speech to text model.
"""
def _invoke(self, model: str, credentials: dict, content_text: str, streaming: bool, user: Optional[str] = None) -> any:
"""
_invoke text2speech model

:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param streaming: output is streaming
:param user: unique user id
:return: text translated to audio file
"""
self._is_ffmpeg_installed()
audio_type = self._get_model_audio_type(model, credentials)
if streaming:
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
credentials=credentials,
content_text=content_text,
user=user)),
status=200, mimetype=f'audio/{audio_type}')
else:
return self._tts_invoke(model=model, credentials=credentials, content_text=content_text, user=user)

def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
"""
validate credentials text2speech model

:param model: model name
:param credentials: model credentials
:param user: unique user id
:return: text translated to audio file
"""
try:
self._tts_invoke(
model=model,
credentials=credentials,
content_text='Hello world!',
user=user
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))

def _tts_invoke(self, model: str, credentials: dict, content_text: str, user: Optional[str] = None) -> any:
"""
_tts_invoke text2speech model

:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param user: unique user id
:return: text translated to audio file
"""
audio_type = self._get_model_audio_type(model, credentials)
word_limit = self._get_model_word_limit(model, credentials)
max_workers = self._get_model_workers_limit(model, credentials)

try:
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
audio_bytes_list = list()

# Create a thread pool and map the function to the list of sentences
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(self._process_sentence, sentence, model, credentials) for sentence
in sentences]
for future in futures:
try:
audio_bytes_list.append(future.result())
except Exception as ex:
raise InvokeBadRequestError(str(ex))

audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in
audio_bytes_list if audio_bytes]
combined_segment = reduce(lambda x, y: x + y, audio_segments)
buffer: BytesIO = BytesIO()
combined_segment.export(buffer, format=audio_type)
buffer.seek(0)
return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}")
except Exception as ex:
raise InvokeBadRequestError(str(ex))

# Todo: To improve the streaming function
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, user: Optional[str] = None) -> any:
"""
_tts_invoke_streaming text2speech model

:param model: model name
:param credentials: model credentials
:param content_text: text content to be translated
:param user: unique user id
:return: text translated to audio file
"""
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
voice_name = self._get_model_voice(model, credentials)
word_limit = self._get_model_word_limit(model, credentials)
audio_type = self._get_model_audio_type(model, credentials)
tts_file_id = self._get_file_name(content_text)
file_path = f'storage/generate_files/{audio_type}/{tts_file_id}.{audio_type}'
try:
client = OpenAI(**credentials_kwargs)
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
for sentence in sentences:
response = client.audio.speech.create(model=model, voice=voice_name, input=sentence.strip())
response.stream_to_file(file_path)
except Exception as ex:
raise InvokeBadRequestError(str(ex))

def _get_model_voice(self, model: str, credentials: dict) -> Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]:
"""
Get voice for given tts model

:param model: model name
:param credentials: model credentials
:return: voice
"""
model_schema = self.get_model_schema(model, credentials)

if model_schema and ModelPropertyKey.DEFAULT_VOICE in model_schema.model_properties:
return model_schema.model_properties[ModelPropertyKey.DEFAULT_VOICE]

def _get_model_audio_type(self, model: str, credentials: dict) -> str:
"""
Get audio type for given tts model

:param model: model name
:param credentials: model credentials
:return: voice
"""
model_schema = self.get_model_schema(model, credentials)

if model_schema and ModelPropertyKey.AUDOI_TYPE in model_schema.model_properties:
return model_schema.model_properties[ModelPropertyKey.AUDOI_TYPE]

def _get_model_word_limit(self, model: str, credentials: dict) -> int:
"""
Get audio type for given tts model
:return: audio type
"""
model_schema = self.get_model_schema(model, credentials)

if model_schema and ModelPropertyKey.WORD_LIMIT in model_schema.model_properties:
return model_schema.model_properties[ModelPropertyKey.WORD_LIMIT]

def _get_model_workers_limit(self, model: str, credentials: dict) -> int:
"""
Get audio max workers for given tts model
:return: audio type
"""
model_schema = self.get_model_schema(model, credentials)

if model_schema and ModelPropertyKey.MAX_WORKERS in model_schema.model_properties:
return model_schema.model_properties[ModelPropertyKey.MAX_WORKERS]

@staticmethod
def _split_text_into_sentences(text: str, limit: int, delimiters=None):
if delimiters is None:
delimiters = set('。!?;\n')

buf = []
word_count = 0
for char in text:
buf.append(char)
if char in delimiters:
if word_count >= limit:
yield ''.join(buf)
buf = []
word_count = 0
else:
word_count += 1
else:
word_count += 1

if buf:
yield ''.join(buf)

@staticmethod
def _get_file_name(file_content: str) -> str:
hash_object = hashlib.sha256(file_content.encode())
hex_digest = hash_object.hexdigest()

namespace_uuid = uuid.UUID('a5da6ef9-b303-596f-8e88-bf8fa40f4b31')
unique_uuid = uuid.uuid5(namespace_uuid, hex_digest)
return str(unique_uuid)

def _process_sentence(self, sentence: str, model: str, credentials: dict):
"""
_tts_invoke openai text2speech model api

:param model: model name
:param credentials: model credentials
:param sentence: text content to be translated
:return: text translated to audio file
"""
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
voice_name = self._get_model_voice(model, credentials)

client = OpenAI(**credentials_kwargs)
response = client.audio.speech.create(model=model, voice=voice_name, input=sentence.strip())
if isinstance(response.read(), bytes):
return response.read()

@staticmethod
def _is_ffmpeg_installed():
try:
output = subprocess.check_output("ffmpeg -version", shell=True)
if "ffmpeg version" in output.decode("utf-8"):
return True
else:
raise InvokeBadRequestError("ffmpeg is not installed")
except Exception:
raise InvokeBadRequestError("ffmpeg is not installed")

+ 1
- 0
api/fields/app_fields.py View File

'suggested_questions': fields.Raw(attribute='suggested_questions_list'), 'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'), 'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
'speech_to_text': fields.Raw(attribute='speech_to_text_dict'), 'speech_to_text': fields.Raw(attribute='speech_to_text_dict'),
'text_to_speech': fields.Raw(attribute='text_to_speech_dict'),
'retriever_resource': fields.Raw(attribute='retriever_resource_dict'), 'retriever_resource': fields.Raw(attribute='retriever_resource_dict'),
'annotation_reply': fields.Raw(attribute='annotation_reply_dict'), 'annotation_reply': fields.Raw(attribute='annotation_reply_dict'),
'more_like_this': fields.Raw(attribute='more_like_this_dict'), 'more_like_this': fields.Raw(attribute='more_like_this_dict'),

+ 32
- 0
api/migrations/versions/b24be59fbb04_.py View File

"""empty message

Revision ID: b24be59fbb04
Revises: 187385f442fc
Create Date: 2024-01-17 01:31:12.670556

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'b24be59fbb04'
down_revision = 'de95f5c77138'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('app_model_configs', schema=None) as batch_op:
batch_op.add_column(sa.Column('text_to_speech', sa.Text(), nullable=True))

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('app_model_configs', schema=None) as batch_op:
batch_op.drop_column('text_to_speech')

# ### end Alembic commands ###

+ 10
- 0
api/models/model.py View File

suggested_questions = db.Column(db.Text) suggested_questions = db.Column(db.Text)
suggested_questions_after_answer = db.Column(db.Text) suggested_questions_after_answer = db.Column(db.Text)
speech_to_text = db.Column(db.Text) speech_to_text = db.Column(db.Text)
text_to_speech = db.Column(db.Text)
more_like_this = db.Column(db.Text) more_like_this = db.Column(db.Text)
model = db.Column(db.Text) model = db.Column(db.Text)
user_input_form = db.Column(db.Text) user_input_form = db.Column(db.Text)
return json.loads(self.speech_to_text) if self.speech_to_text \ return json.loads(self.speech_to_text) if self.speech_to_text \
else {"enabled": False} else {"enabled": False}


@property
def text_to_speech_dict(self) -> dict:
return json.loads(self.text_to_speech) if self.text_to_speech \
else {"enabled": False}

@property @property
def retriever_resource_dict(self) -> dict: def retriever_resource_dict(self) -> dict:
return json.loads(self.retriever_resource) if self.retriever_resource \ return json.loads(self.retriever_resource) if self.retriever_resource \
"suggested_questions": self.suggested_questions_list, "suggested_questions": self.suggested_questions_list,
"suggested_questions_after_answer": self.suggested_questions_after_answer_dict, "suggested_questions_after_answer": self.suggested_questions_after_answer_dict,
"speech_to_text": self.speech_to_text_dict, "speech_to_text": self.speech_to_text_dict,
"text_to_speech": self.text_to_speech_dict,
"retriever_resource": self.retriever_resource_dict, "retriever_resource": self.retriever_resource_dict,
"annotation_reply": self.annotation_reply_dict, "annotation_reply": self.annotation_reply_dict,
"more_like_this": self.more_like_this_dict, "more_like_this": self.more_like_this_dict,
self.suggested_questions_after_answer = json.dumps(model_config['suggested_questions_after_answer']) self.suggested_questions_after_answer = json.dumps(model_config['suggested_questions_after_answer'])
self.speech_to_text = json.dumps(model_config['speech_to_text']) \ self.speech_to_text = json.dumps(model_config['speech_to_text']) \
if model_config.get('speech_to_text') else None if model_config.get('speech_to_text') else None
self.text_to_speech = json.dumps(model_config['text_to_speech']) \
if model_config.get('text_to_speech') else None
self.more_like_this = json.dumps(model_config['more_like_this']) self.more_like_this = json.dumps(model_config['more_like_this'])
self.sensitive_word_avoidance = json.dumps(model_config['sensitive_word_avoidance']) \ self.sensitive_word_avoidance = json.dumps(model_config['sensitive_word_avoidance']) \
if model_config.get('sensitive_word_avoidance') else None if model_config.get('sensitive_word_avoidance') else None
suggested_questions=self.suggested_questions, suggested_questions=self.suggested_questions,
suggested_questions_after_answer=self.suggested_questions_after_answer, suggested_questions_after_answer=self.suggested_questions_after_answer,
speech_to_text=self.speech_to_text, speech_to_text=self.speech_to_text,
text_to_speech=self.text_to_speech,
more_like_this=self.more_like_this, more_like_this=self.more_like_this,
sensitive_word_avoidance=self.sensitive_word_avoidance, sensitive_word_avoidance=self.sensitive_word_avoidance,
external_data_tools=self.external_data_tools, external_data_tools=self.external_data_tools,

+ 1
- 0
api/requirements.txt View File

pydub~=0.25.1 pydub~=0.25.1
matplotlib~=3.8.2 matplotlib~=3.8.2
yfinance~=0.2.35 yfinance~=0.2.35
pydub~=0.25.1

+ 7
- 7
api/services/account_service.py View File

db.session.commit() db.session.commit()


return account return account
@staticmethod @staticmethod
def get_account_jwt_token(account): def get_account_jwt_token(account):
payload = { payload = {
"user_id": account.id, "user_id": account.id,
"exp": datetime.utcnow() + timedelta(days=30), "exp": datetime.utcnow() + timedelta(days=30),
"iss": current_app.config['EDITION'],
"iss": current_app.config['EDITION'],
"sub": 'Console API Passport', "sub": 'Console API Passport',
} }


} }
if action not in ['add', 'remove', 'update']: if action not in ['add', 'remove', 'update']:
raise InvalidActionError("Invalid action.") raise InvalidActionError("Invalid action.")
if member: if member:
if operator.id == member.id: if operator.id == member.id:
raise CannotOperateSelfError("Cannot operate self.") raise CannotOperateSelfError("Cannot operate self.")
return None return None


return { return {
'account': account,
'data': invitation_data,
'tenant': tenant,
}
'account': account,
'data': invitation_data,
'tenant': tenant,
}


@classmethod @classmethod
def _get_invitation_by_token(cls, token: str, workspace_id: str, email: str) -> Optional[Dict[str, str]]: def _get_invitation_by_token(cls, token: str, workspace_id: str, email: str) -> Optional[Dict[str, str]]:

+ 16
- 0
api/services/app_model_config_service.py View File

if not isinstance(config["speech_to_text"]["enabled"], bool): if not isinstance(config["speech_to_text"]["enabled"], bool):
raise ValueError("enabled in speech_to_text must be of boolean type") raise ValueError("enabled in speech_to_text must be of boolean type")


# text_to_speech
if 'text_to_speech' not in config or not config["text_to_speech"]:
config["text_to_speech"] = {
"enabled": False
}

if not isinstance(config["text_to_speech"], dict):
raise ValueError("text_to_speech must be of dict type")

if "enabled" not in config["text_to_speech"] or not config["text_to_speech"]["enabled"]:
config["text_to_speech"]["enabled"] = False

if not isinstance(config["text_to_speech"]["enabled"], bool):
raise ValueError("enabled in text_to_speech must be of boolean type")

# return retriever resource # return retriever resource
if 'retriever_resource' not in config or not config["retriever_resource"]: if 'retriever_resource' not in config or not config["retriever_resource"]:
config["retriever_resource"] = { config["retriever_resource"] = {
"suggested_questions": config["suggested_questions"], "suggested_questions": config["suggested_questions"],
"suggested_questions_after_answer": config["suggested_questions_after_answer"], "suggested_questions_after_answer": config["suggested_questions_after_answer"],
"speech_to_text": config["speech_to_text"], "speech_to_text": config["speech_to_text"],
"text_to_speech": config["text_to_speech"],
"retriever_resource": config["retriever_resource"], "retriever_resource": config["retriever_resource"],
"more_like_this": config["more_like_this"], "more_like_this": config["more_like_this"],
"sensitive_word_avoidance": config["sensitive_word_avoidance"], "sensitive_word_avoidance": config["sensitive_word_avoidance"],

+ 28
- 6
api/services/audio_service.py View File

import io import io
from typing import Optional


from core.model_manager import ModelManager from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
from services.errors.audio import (AudioTooLargeServiceError, NoAudioUploadedServiceError,
ProviderNotSupportSpeechToTextServiceError, UnsupportedAudioTypeServiceError)
from services.errors.audio import (AudioTooLargeServiceError,
NoAudioUploadedServiceError,
ProviderNotSupportTextToSpeechServiceError,
ProviderNotSupportSpeechToTextServiceError,
UnsupportedAudioTypeServiceError)
from werkzeug.datastructures import FileStorage from werkzeug.datastructures import FileStorage


FILE_SIZE = 15 FILE_SIZE = 15
FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024 FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm', 'amr']




class AudioService: class AudioService:
@classmethod @classmethod
def transcript(cls, tenant_id: str, file: FileStorage):
def transcript_asr(cls, tenant_id: str, file: FileStorage, end_user: Optional[str] = None):
if file is None: if file is None:
raise NoAudioUploadedServiceError() raise NoAudioUploadedServiceError()
extension = file.mimetype extension = file.mimetype
if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]: if extension not in [f'audio/{ext}' for ext in ALLOWED_EXTENSIONS]:
raise UnsupportedAudioTypeServiceError() raise UnsupportedAudioTypeServiceError()
tenant_id=tenant_id, tenant_id=tenant_id,
model_type=ModelType.SPEECH2TEXT model_type=ModelType.SPEECH2TEXT
) )
if model_instance is None:
raise ProviderNotSupportSpeechToTextServiceError()


buffer = io.BytesIO(file_content) buffer = io.BytesIO(file_content)
buffer.name = 'temp.mp3' buffer.name = 'temp.mp3'


return {"text": model_instance.invoke_speech2text(buffer)}
return {"text": model_instance.invoke_speech2text(file=buffer, user=end_user)}

@classmethod
def transcript_tts(cls, tenant_id: str, text: str, streaming: bool, end_user: Optional[str] = None):
model_manager = ModelManager()
model_instance = model_manager.get_default_model_instance(
tenant_id=tenant_id,
model_type=ModelType.TTS
)
if model_instance is None:
raise ProviderNotSupportTextToSpeechServiceError()

try:
audio_response = model_instance.invoke_tts(content_text=text.strip(), user=end_user, streaming=streaming)
return audio_response
except Exception as e:
raise e

+ 6
- 1
api/services/errors/audio.py View File

class UnsupportedAudioTypeServiceError(Exception): class UnsupportedAudioTypeServiceError(Exception):
pass pass



class ProviderNotSupportSpeechToTextServiceError(Exception): class ProviderNotSupportSpeechToTextServiceError(Exception):
pass
pass


class ProviderNotSupportTextToSpeechServiceError(Exception):
pass

+ 10
- 1
web/app/components/app/chat/answer/index.tsx View File

import CopyBtn from '../copy-btn' import CopyBtn from '../copy-btn'
import Thought from '../thought' import Thought from '../thought'
import Citation from '../citation' import Citation from '../citation'
import AudioBtn from '@/app/components/base/audio-btn'
import { randomString } from '@/utils' import { randomString } from '@/utils'
import type { MessageRating } from '@/models/log' import type { MessageRating } from '@/models/log'
import Tooltip from '@/app/components/base/tooltip' import Tooltip from '@/app/components/base/tooltip'
dataSets?: DataSet[] dataSets?: DataSet[]
isShowCitation?: boolean isShowCitation?: boolean
isShowCitationHitInfo?: boolean isShowCitationHitInfo?: boolean
isShowTextToSpeech?: boolean
// Annotation props // Annotation props
supportAnnotation?: boolean supportAnnotation?: boolean
appId?: string appId?: string
citation, citation,
isShowCitation, isShowCitation,
isShowCitationHitInfo = false, isShowCitationHitInfo = false,
isShowTextToSpeech,
supportAnnotation, supportAnnotation,
appId, appId,
question, question,
className={cn(s.copyBtn, 'mr-1')} className={cn(s.copyBtn, 'mr-1')}
/> />
)} )}
{(supportAnnotation && !item.isOpeningStatement) && (
{!item.isOpeningStatement && isShowTextToSpeech && (
<AudioBtn
value={content}
className={cn(s.playBtn, 'mr-1')}
/>
)}
{(!item.isOpeningStatement && supportAnnotation) && (
<AnnotationCtrlBtn <AnnotationCtrlBtn
appId={appId!} appId={appId!}
messageId={id} messageId={id}

+ 3
- 0
web/app/components/app/chat/index.tsx View File

isShowSuggestion?: boolean isShowSuggestion?: boolean
suggestionList?: string[] suggestionList?: string[]
isShowSpeechToText?: boolean isShowSpeechToText?: boolean
isShowTextToSpeech?: boolean
isShowCitation?: boolean isShowCitation?: boolean
answerIcon?: ReactNode answerIcon?: ReactNode
isShowConfigElem?: boolean isShowConfigElem?: boolean
isShowSuggestion, isShowSuggestion,
suggestionList, suggestionList,
isShowSpeechToText, isShowSpeechToText,
isShowTextToSpeech,
isShowCitation, isShowCitation,
answerIcon, answerIcon,
isShowConfigElem, isShowConfigElem,
dataSets={dataSets} dataSets={dataSets}
isShowCitation={isShowCitation} isShowCitation={isShowCitation}
isShowCitationHitInfo={isShowCitationHitInfo} isShowCitationHitInfo={isShowCitationHitInfo}
isShowTextToSpeech={isShowTextToSpeech}
supportAnnotation={supportAnnotation} supportAnnotation={supportAnnotation}
appId={appId} appId={appId}
question={chatList[index - 1]?.content} question={chatList[index - 1]?.content}

+ 2
- 0
web/app/components/app/chat/style.module.css View File

} }


.copyBtn, .copyBtn,
.playBtn,
.annotationBtn { .annotationBtn {
display: none; display: none;
} }
} }


.answerWrap:hover .copyBtn, .answerWrap:hover .copyBtn,
.answerWrap:hover .playBtn,
.answerWrap:hover .annotationBtn { .answerWrap:hover .annotationBtn {
display: block; display: block;
} }

BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/citations-and-attributions-preview@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/conversation-opener-preview@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/more-like-this-preview@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/next-question-suggestion-preview@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/opening-suggestion-preview@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/speech-to-text-preview@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-assistant@2x.png View File


BIN
web/app/components/app/configuration/config/feature/choose-feature/feature-item/preview-imgs/text-to-audio-preview-completion@2x.png View File


+ 6
- 1
web/app/components/app/configuration/config/feature/choose-feature/feature-item/style.module.css View File

background-image: url(./preview-imgs/speech-to-text.svg); background-image: url(./preview-imgs/speech-to-text.svg);
} }


.textToSpeechPreview {
@apply shadow-lg rounded-lg;
background-image: url(./preview-imgs/text-to-audio-preview-assistant@2x.png);
}

.citationPreview { .citationPreview {
background-image: url(./preview-imgs/citation.svg); background-image: url(./preview-imgs/citation.svg);
}
}

+ 28
- 1
web/app/components/app/configuration/config/feature/choose-feature/index.tsx View File

import FeatureItem from './feature-item' import FeatureItem from './feature-item'
import Modal from '@/app/components/base/modal' import Modal from '@/app/components/base/modal'
import SuggestedQuestionsAfterAnswerIcon from '@/app/components/app/configuration/base/icons/suggested-questions-after-answer-icon' import SuggestedQuestionsAfterAnswerIcon from '@/app/components/app/configuration/base/icons/suggested-questions-after-answer-icon'
import { Microphone01 } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import { Microphone01, Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import { Citations } from '@/app/components/base/icons/src/vender/solid/editor' import { Citations } from '@/app/components/base/icons/src/vender/solid/editor'
import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files' import { FileSearch02 } from '@/app/components/base/icons/src/vender/solid/files'
import { MessageFast } from '@/app/components/base/icons/src/vender/solid/communication' import { MessageFast } from '@/app/components/base/icons/src/vender/solid/communication'
moreLikeThis: boolean moreLikeThis: boolean
suggestedQuestionsAfterAnswer: boolean suggestedQuestionsAfterAnswer: boolean
speechToText: boolean speechToText: boolean
textToSpeech: boolean
citation: boolean citation: boolean
moderation: boolean moderation: boolean
annotation: boolean annotation: boolean
config: IConfig config: IConfig
isChatApp: boolean isChatApp: boolean
onChange: (key: string, value: boolean) => void onChange: (key: string, value: boolean) => void
showTextToSpeechItem?: boolean
showSpeechToTextItem?: boolean showSpeechToTextItem?: boolean
} }


isChatApp, isChatApp,
config, config,
onChange, onChange,
showTextToSpeechItem,
showSpeechToTextItem, showSpeechToTextItem,
}) => { }) => {
const { t } = useTranslation() const { t } = useTranslation()
value={config.suggestedQuestionsAfterAnswer} value={config.suggestedQuestionsAfterAnswer}
onChange={value => onChange('suggestedQuestionsAfterAnswer', value)} onChange={value => onChange('suggestedQuestionsAfterAnswer', value)}
/> />
{
showTextToSpeechItem && (
<FeatureItem
icon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
previewImgClassName='textToSpeechPreview'
title={t('appDebug.feature.textToSpeech.title')}
description={t('appDebug.feature.textToSpeech.description')}
value={config.textToSpeech}
onChange={value => onChange('textToSpeech', value)}
/>
)
}
{ {
showSpeechToTextItem && ( showSpeechToTextItem && (
<FeatureItem <FeatureItem
value={config.moreLikeThis} value={config.moreLikeThis}
onChange={value => onChange('moreLikeThis', value)} onChange={value => onChange('moreLikeThis', value)}
/> />
{
showTextToSpeechItem && (
<FeatureItem
icon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
previewImgClassName='textToSpeechPreview'
title={t('appDebug.feature.textToSpeech.title')}
description={t('appDebug.feature.textToSpeech.description')}
value={config.textToSpeech}
onChange={value => onChange('textToSpeech', value)}
/>
)
}
</> </>
</FeatureGroup> </FeatureGroup>
)} )}

+ 8
- 0
web/app/components/app/configuration/config/feature/use-feature.tsx View File

setSuggestedQuestionsAfterAnswer, setSuggestedQuestionsAfterAnswer,
speechToText, speechToText,
setSpeechToText, setSpeechToText,
textToSpeech,
setTextToSpeech,
citation, citation,
setCitation, setCitation,
annotation, annotation,
setSuggestedQuestionsAfterAnswer: (suggestedQuestionsAfterAnswer: boolean) => void setSuggestedQuestionsAfterAnswer: (suggestedQuestionsAfterAnswer: boolean) => void
speechToText: boolean speechToText: boolean
setSpeechToText: (speechToText: boolean) => void setSpeechToText: (speechToText: boolean) => void
textToSpeech: boolean
setTextToSpeech: (textToSpeech: boolean) => void
citation: boolean citation: boolean
setCitation: (citation: boolean) => void setCitation: (citation: boolean) => void
annotation: boolean annotation: boolean
moreLikeThis, moreLikeThis,
suggestedQuestionsAfterAnswer, suggestedQuestionsAfterAnswer,
speechToText, speechToText,
textToSpeech,
citation, citation,
annotation, annotation,
moderation, moderation,
case 'speechToText': case 'speechToText':
setSpeechToText(value) setSpeechToText(value)
break break
case 'textToSpeech':
setTextToSpeech(value)
break
case 'citation': case 'citation':
setCitation(value) setCitation(value)
break break

+ 21
- 6
web/app/components/app/configuration/config/index.tsx View File

import ConfigContext from '@/context/debug-configuration' import ConfigContext from '@/context/debug-configuration'
import ConfigPrompt from '@/app/components/app/configuration/config-prompt' import ConfigPrompt from '@/app/components/app/configuration/config-prompt'
import ConfigVar from '@/app/components/app/configuration/config-var' import ConfigVar from '@/app/components/app/configuration/config-var'
import { type CitationConfig, type ModelConfig, type ModerationConfig, type MoreLikeThisConfig, PromptMode, type PromptVariable, type SpeechToTextConfig, type SuggestedQuestionsAfterAnswerConfig } from '@/models/debug'
import { type CitationConfig, type ModelConfig, type ModerationConfig, type MoreLikeThisConfig, PromptMode, type PromptVariable, type SpeechToTextConfig, type SuggestedQuestionsAfterAnswerConfig, type TextToSpeechConfig } from '@/models/debug'
import { AppType, ModelModeType } from '@/types/app' import { AppType, ModelModeType } from '@/types/app'
import { useModalContext } from '@/context/modal-context' import { useModalContext } from '@/context/modal-context'
import ConfigParamModal from '@/app/components/app/configuration/toolbox/annotation/config-param-modal' import ConfigParamModal from '@/app/components/app/configuration/toolbox/annotation/config-param-modal'
setSuggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig,
speechToTextConfig, speechToTextConfig,
setSpeechToTextConfig, setSpeechToTextConfig,
textToSpeechConfig,
setTextToSpeechConfig,
citationConfig, citationConfig,
setCitationConfig, setCitationConfig,
annotationConfig, annotationConfig,
} = useContext(ConfigContext) } = useContext(ConfigContext)
const isChatApp = mode === AppType.chat const isChatApp = mode === AppType.chat
const { data: speech2textDefaultModel } = useDefaultModel(4) const { data: speech2textDefaultModel } = useDefaultModel(4)
const { data: text2speechDefaultModel } = useDefaultModel(5)
const { setShowModerationSettingModal } = useModalContext() const { setShowModerationSettingModal } = useModalContext()


const promptTemplate = modelConfig.configs.prompt_template const promptTemplate = modelConfig.configs.prompt_template
draft.enabled = value draft.enabled = value
})) }))
}, },
textToSpeech: textToSpeechConfig.enabled,
setTextToSpeech: (value) => {
setTextToSpeechConfig(produce(textToSpeechConfig, (draft: TextToSpeechConfig) => {
draft.enabled = value
}))
},
citation: citationConfig.enabled, citation: citationConfig.enabled,
setCitation: (value) => { setCitation: (value) => {
setCitationConfig(produce(citationConfig, (draft: CitationConfig) => { setCitationConfig(produce(citationConfig, (draft: CitationConfig) => {
setAnnotationConfig, setAnnotationConfig,
}) })


const hasChatConfig = isChatApp && (featureConfig.openingStatement || featureConfig.suggestedQuestionsAfterAnswer || (featureConfig.speechToText && !!speech2textDefaultModel) || featureConfig.citation)
const hasChatConfig = isChatApp && (featureConfig.openingStatement || featureConfig.suggestedQuestionsAfterAnswer || (featureConfig.speechToText && !!speech2textDefaultModel) || (featureConfig.textToSpeech && !!text2speechDefaultModel) || featureConfig.citation)
const hasToolbox = moderationConfig.enabled || featureConfig.annotation const hasToolbox = moderationConfig.enabled || featureConfig.annotation


const wrapRef = useRef<HTMLDivElement>(null) const wrapRef = useRef<HTMLDivElement>(null)
config={featureConfig} config={featureConfig}
onChange={handleFeatureChange} onChange={handleFeatureChange}
showSpeechToTextItem={!!speech2textDefaultModel} showSpeechToTextItem={!!speech2textDefaultModel}
showTextToSpeechItem={!!text2speechDefaultModel}
/> />
)} )}


} }
} }
isShowSuggestedQuestionsAfterAnswer={featureConfig.suggestedQuestionsAfterAnswer} isShowSuggestedQuestionsAfterAnswer={featureConfig.suggestedQuestionsAfterAnswer}
isShowTextToSpeech={featureConfig.textToSpeech && !!text2speechDefaultModel}
isShowSpeechText={featureConfig.speechToText && !!speech2textDefaultModel} isShowSpeechText={featureConfig.speechToText && !!speech2textDefaultModel}
isShowCitation={featureConfig.citation} isShowCitation={featureConfig.citation}
/> />
) )
} }


{/* TextnGeneration config */}
{moreLikeThisConfig.enabled && (
<ExperienceEnchanceGroup />
)}
{/* TextnGeneration config */}{
!hasChatConfig && (
<ExperienceEnchanceGroup
isShowMoreLike={moreLikeThisConfig.enabled}
isShowTextToSpeech={featureConfig.textToSpeech && !!text2speechDefaultModel}
/>
)
}


{/* Toolbox */} {/* Toolbox */}
{ {

+ 10
- 0
web/app/components/app/configuration/debug/index.tsx View File

suggestedQuestions, suggestedQuestions,
suggestedQuestionsAfterAnswerConfig, suggestedQuestionsAfterAnswerConfig,
speechToTextConfig, speechToTextConfig,
textToSpeechConfig,
citationConfig, citationConfig,
moderationConfig, moderationConfig,
moreLikeThisConfig, moreLikeThisConfig,
annotationConfig, annotationConfig,
} = useContext(ConfigContext) } = useContext(ConfigContext)
const { data: speech2textDefaultModel } = useDefaultModel(4) const { data: speech2textDefaultModel } = useDefaultModel(4)
const { data: text2speechDefaultModel } = useDefaultModel(5)
const [chatList, setChatList, getChatList] = useGetState<IChatItem[]>([]) const [chatList, setChatList, getChatList] = useGetState<IChatItem[]>([])
const chatListDomRef = useRef<HTMLDivElement>(null) const chatListDomRef = useRef<HTMLDivElement>(null)
const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig) const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig)
setChatList(newListWithAnswer) setChatList(newListWithAnswer)
} }
const postModelConfig: BackendModelConfig = { const postModelConfig: BackendModelConfig = {
text_to_speech: {
enabled: false,
},
pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '', pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '',
prompt_type: promptMode, prompt_type: promptMode,
chat_prompt_config: {}, chat_prompt_config: {},
const contextVar = modelConfig.configs.prompt_variables.find(item => item.is_context_var)?.key const contextVar = modelConfig.configs.prompt_variables.find(item => item.is_context_var)?.key


const postModelConfig: BackendModelConfig = { const postModelConfig: BackendModelConfig = {
text_to_speech: {
enabled: false,
},
pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '', pre_prompt: !isAdvancedMode ? modelConfig.configs.prompt_template : '',
prompt_type: promptMode, prompt_type: promptMode,
chat_prompt_config: {}, chat_prompt_config: {},
isShowSuggestion={doShowSuggestion} isShowSuggestion={doShowSuggestion}
suggestionList={suggestQuestions} suggestionList={suggestQuestions}
isShowSpeechToText={speechToTextConfig.enabled && !!speech2textDefaultModel} isShowSpeechToText={speechToTextConfig.enabled && !!speech2textDefaultModel}
isShowTextToSpeech={textToSpeechConfig.enabled && !!text2speechDefaultModel}
isShowCitation={citationConfig.enabled} isShowCitation={citationConfig.enabled}
isShowCitationHitInfo isShowCitationHitInfo
isShowPromptLog isShowPromptLog
className="mt-2" className="mt-2"
content={completionRes} content={completionRes}
isLoading={!completionRes && isResponsing} isLoading={!completionRes && isResponsing}
isShowTextToSpeech={textToSpeechConfig.enabled && !!text2speechDefaultModel}
isResponsing={isResponsing} isResponsing={isResponsing}
isInstalledApp={false} isInstalledApp={false}
messageId={messageId} messageId={messageId}

+ 8
- 0
web/app/components/app/configuration/features/chat-group/index.tsx View File

import OpeningStatement from './opening-statement' import OpeningStatement from './opening-statement'
import SuggestedQuestionsAfterAnswer from './suggested-questions-after-answer' import SuggestedQuestionsAfterAnswer from './suggested-questions-after-answer'
import SpeechToText from './speech-to-text' import SpeechToText from './speech-to-text'
import TextToSpeech from './text-to-speech'
import Citation from './citation' import Citation from './citation'
/* /*
* Include * Include
openingStatementConfig: IOpeningStatementProps openingStatementConfig: IOpeningStatementProps
isShowSuggestedQuestionsAfterAnswer: boolean isShowSuggestedQuestionsAfterAnswer: boolean
isShowSpeechText: boolean isShowSpeechText: boolean
isShowTextToSpeech: boolean
isShowCitation: boolean isShowCitation: boolean
} }
const ChatGroup: FC<ChatGroupProps> = ({ const ChatGroup: FC<ChatGroupProps> = ({
openingStatementConfig, openingStatementConfig,
isShowSuggestedQuestionsAfterAnswer, isShowSuggestedQuestionsAfterAnswer,
isShowSpeechText, isShowSpeechText,
isShowTextToSpeech,
isShowCitation, isShowCitation,
}) => { }) => {
const { t } = useTranslation() const { t } = useTranslation()
{isShowSuggestedQuestionsAfterAnswer && ( {isShowSuggestedQuestionsAfterAnswer && (
<SuggestedQuestionsAfterAnswer /> <SuggestedQuestionsAfterAnswer />
)} )}
{
isShowTextToSpeech && (
<TextToSpeech />
)
}
{ {
isShowSpeechText && ( isShowSpeechText && (
<SpeechToText /> <SpeechToText />

+ 2
- 2
web/app/components/app/configuration/features/chat-group/speech-to-text/index.tsx View File

import Panel from '@/app/components/app/configuration/base/feature-panel' import Panel from '@/app/components/app/configuration/base/feature-panel'
import { Microphone01 } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices' import { Microphone01 } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'


const SuggestedQuestionsAfterAnswer: FC = () => {
const SpeechToTextConfig: FC = () => {
const { t } = useTranslation() const { t } = useTranslation()


return ( return (
/> />
) )
} }
export default React.memo(SuggestedQuestionsAfterAnswer)
export default React.memo(SpeechToTextConfig)

+ 25
- 0
web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx View File

'use client'
import React, { type FC } from 'react'
import { useTranslation } from 'react-i18next'
import Panel from '@/app/components/app/configuration/base/feature-panel'
import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'

const TextToSpeech: FC = () => {
const { t } = useTranslation()

return (
<Panel
title={
<div className='flex items-center gap-2'>
<div>{t('appDebug.feature.textToSpeech.title')}</div>
</div>
}
headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
headerRight={
<div className='text-xs text-gray-500'>{t('appDebug.feature.textToSpeech.resDes')}</div>
}
noBodySpacing
/>
)
}
export default React.memo(TextToSpeech)

+ 24
- 3
web/app/components/app/configuration/features/experience-enchance-group/index.tsx View File

import React from 'react' import React from 'react'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import GroupName from '../../base/group-name' import GroupName from '../../base/group-name'
import TextToSpeech from '../chat-group/text-to-speech'
import MoreLikeThis from './more-like-this' import MoreLikeThis from './more-like-this'


/* /*
* Include * Include
* 1. More like this * 1. More like this
*/ */
const ExperienceEnchanceGroup: FC = () => {

type ExperienceGroupProps = {
isShowTextToSpeech: boolean
isShowMoreLike: boolean
}

const ExperienceEnchanceGroup: FC<ExperienceGroupProps> = ({
isShowTextToSpeech,
isShowMoreLike,
}) => {
const { t } = useTranslation() const { t } = useTranslation()


return ( return (
<div className='mt-7'> <div className='mt-7'>
<GroupName name={t('appDebug.feature.groupExperience.title')} />
<MoreLikeThis />
<GroupName name={t('appDebug.feature.groupExperience.title')}/>
<div className='space-y-3'>
{
isShowMoreLike && (
<MoreLikeThis/>
)
}
{
isShowTextToSpeech && (
<TextToSpeech/>
)
}
</div>
</div> </div>
) )
} }

+ 15
- 0
web/app/components/app/configuration/index.tsx View File

const [speechToTextConfig, setSpeechToTextConfig] = useState<MoreLikeThisConfig>({ const [speechToTextConfig, setSpeechToTextConfig] = useState<MoreLikeThisConfig>({
enabled: false, enabled: false,
}) })
const [textToSpeechConfig, setTextToSpeechConfig] = useState<MoreLikeThisConfig>({
enabled: false,
})
const [citationConfig, setCitationConfig] = useState<MoreLikeThisConfig>({ const [citationConfig, setCitationConfig] = useState<MoreLikeThisConfig>({
enabled: false, enabled: false,
}) })
more_like_this: null, more_like_this: null,
suggested_questions_after_answer: null, suggested_questions_after_answer: null,
speech_to_text: null, speech_to_text: null,
text_to_speech: null,
retriever_resource: null, retriever_resource: null,
sensitive_word_avoidance: null, sensitive_word_avoidance: null,
dataSets: [], dataSets: [],
setSpeechToTextConfig(modelConfig.speech_to_text || { setSpeechToTextConfig(modelConfig.speech_to_text || {
enabled: false, enabled: false,
}) })
setTextToSpeechConfig(modelConfig.text_to_speech || {
enabled: false,
})
setCitationConfig(modelConfig.retriever_resource || { setCitationConfig(modelConfig.retriever_resource || {
enabled: false, enabled: false,
}) })
if (modelConfig.speech_to_text) if (modelConfig.speech_to_text)
setSpeechToTextConfig(modelConfig.speech_to_text) setSpeechToTextConfig(modelConfig.speech_to_text)


if (modelConfig.text_to_speech)
setTextToSpeechConfig(modelConfig.text_to_speech)

if (modelConfig.retriever_resource) if (modelConfig.retriever_resource)
setCitationConfig(modelConfig.retriever_resource) setCitationConfig(modelConfig.retriever_resource)


more_like_this: modelConfig.more_like_this, more_like_this: modelConfig.more_like_this,
suggested_questions_after_answer: modelConfig.suggested_questions_after_answer, suggested_questions_after_answer: modelConfig.suggested_questions_after_answer,
speech_to_text: modelConfig.speech_to_text, speech_to_text: modelConfig.speech_to_text,
text_to_speech: modelConfig.text_to_speech,
retriever_resource: modelConfig.retriever_resource, retriever_resource: modelConfig.retriever_resource,
sensitive_word_avoidance: modelConfig.sensitive_word_avoidance, sensitive_word_avoidance: modelConfig.sensitive_word_avoidance,
external_data_tools: modelConfig.external_data_tools, external_data_tools: modelConfig.external_data_tools,
more_like_this: moreLikeThisConfig, more_like_this: moreLikeThisConfig,
suggested_questions_after_answer: suggestedQuestionsAfterAnswerConfig, suggested_questions_after_answer: suggestedQuestionsAfterAnswerConfig,
speech_to_text: speechToTextConfig, speech_to_text: speechToTextConfig,
text_to_speech: textToSpeechConfig,
retriever_resource: citationConfig, retriever_resource: citationConfig,
sensitive_word_avoidance: moderationConfig, sensitive_word_avoidance: moderationConfig,
agent_mode: { agent_mode: {
draft.more_like_this = moreLikeThisConfig draft.more_like_this = moreLikeThisConfig
draft.suggested_questions_after_answer = suggestedQuestionsAfterAnswerConfig draft.suggested_questions_after_answer = suggestedQuestionsAfterAnswerConfig
draft.speech_to_text = speechToTextConfig draft.speech_to_text = speechToTextConfig
draft.text_to_speech = textToSpeechConfig
draft.retriever_resource = citationConfig draft.retriever_resource = citationConfig
draft.dataSets = dataSets draft.dataSets = dataSets
}) })
setSuggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig,
speechToTextConfig, speechToTextConfig,
setSpeechToTextConfig, setSpeechToTextConfig,
textToSpeechConfig,
setTextToSpeechConfig,
citationConfig, citationConfig,
setCitationConfig, setCitationConfig,
annotationConfig, annotationConfig,

+ 2
- 0
web/app/components/app/log/list.tsx View File

feedback={detail.message.feedbacks.find((item: any) => item.from_source === 'admin')} feedback={detail.message.feedbacks.find((item: any) => item.from_source === 'admin')}
onFeedback={feedback => onFeedback(detail.message.id, feedback)} onFeedback={feedback => onFeedback(detail.message.id, feedback)}
supportAnnotation supportAnnotation
isShowTextToSpeech
appId={appDetail?.id} appId={appDetail?.id}
varList={varList} varList={varList}
/> />
displayScene='console' displayScene='console'
isShowPromptLog isShowPromptLog
supportAnnotation supportAnnotation
isShowTextToSpeech
appId={appDetail?.id} appId={appDetail?.id}
onChatListChange={setItems} onChatListChange={setItems}
/> />

+ 15
- 2
web/app/components/app/text-generate/item/index.tsx View File

import { Markdown } from '@/app/components/base/markdown' import { Markdown } from '@/app/components/base/markdown'
import Loading from '@/app/components/base/loading' import Loading from '@/app/components/base/loading'
import Toast from '@/app/components/base/toast' import Toast from '@/app/components/base/toast'
import AudioBtn from '@/app/components/base/audio-btn'
import type { Feedbacktype } from '@/app/components/app/chat/type' import type { Feedbacktype } from '@/app/components/app/chat/type'
import { fetchMoreLikeThis, updateFeedback } from '@/service/share' import { fetchMoreLikeThis, updateFeedback } from '@/service/share'
import { Clipboard, File02 } from '@/app/components/base/icons/src/vender/line/files' import { Clipboard, File02 } from '@/app/components/base/icons/src/vender/line/files'
controlClearMoreLikeThis?: number controlClearMoreLikeThis?: number
supportFeedback?: boolean supportFeedback?: boolean
supportAnnotation?: boolean supportAnnotation?: boolean
isShowTextToSpeech?: boolean
appId?: string appId?: string
varList?: { label: string; value: string | number | object }[] varList?: { label: string; value: string | number | object }[]
} }
controlClearMoreLikeThis, controlClearMoreLikeThis,
supportFeedback, supportFeedback,
supportAnnotation, supportAnnotation,
isShowTextToSpeech,
appId, appId,
varList, varList,
}) => { }) => {
isLoading: isQuerying, isLoading: isQuerying,
feedback: childFeedback, feedback: childFeedback,
onSave, onSave,
isShowTextToSpeech,
isMobile, isMobile,
isInstalledApp, isInstalledApp,
installedAppId, installedAppId,
<div className='ml-1'> <div className='ml-1'>
{ratingContent} {ratingContent}
</div> </div>
)
}
)}

{isShowTextToSpeech && (
<>
<div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
<AudioBtn
value={content}
className={'mr-1'}
/>
</>
)}
</div> </div>
<div className='text-xs text-gray-500'>{content?.length} {t('common.unit.char')}</div> <div className='text-xs text-gray-500'>{content?.length} {t('common.unit.char')}</div>
</div> </div>

+ 13
- 0
web/app/components/app/text-generate/saved-items/index.tsx View File

import { Markdown } from '@/app/components/base/markdown' import { Markdown } from '@/app/components/base/markdown'
import { SimpleBtn, copyIcon } from '@/app/components/app/text-generate/item' import { SimpleBtn, copyIcon } from '@/app/components/app/text-generate/item'
import Toast from '@/app/components/base/toast' import Toast from '@/app/components/base/toast'
import AudioBtn from '@/app/components/base/audio-btn'


export type ISavedItemsProps = { export type ISavedItemsProps = {
className?: string className?: string
isShowTextToSpeech?: boolean
list: SavedMessage[] list: SavedMessage[]
onRemove: (id: string) => void onRemove: (id: string) => void
onStartCreateContent: () => void onStartCreateContent: () => void


const SavedItems: FC<ISavedItemsProps> = ({ const SavedItems: FC<ISavedItemsProps> = ({
className, className,
isShowTextToSpeech,
list, list,
onRemove, onRemove,
onStartCreateContent, onStartCreateContent,
{removeIcon} {removeIcon}
<div>{t('common.operation.remove')}</div> <div>{t('common.operation.remove')}</div>
</SimpleBtn> </SimpleBtn>

{isShowTextToSpeech && (
<>
<div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
<AudioBtn
value={answer}
className={'mr-1'}
/>
</>
)}
</div> </div>
<div className='text-xs text-gray-500'>{answer?.length} {t('common.unit.char')}</div> <div className='text-xs text-gray-500'>{answer?.length} {t('common.unit.char')}</div>
</div> </div>

+ 110
- 0
web/app/components/base/audio-btn/index.tsx View File

'use client'
import { useRef, useState } from 'react'
import { t } from 'i18next'
import { useParams, usePathname } from 'next/navigation'
import s from './style.module.css'
import Tooltip from '@/app/components/base/tooltip'
import { randomString } from '@/utils'
import { textToAudio } from '@/service/share'

type AudioBtnProps = {
value: string
className?: string
}

const AudioBtn = ({
value,
className,
}: AudioBtnProps) => {
const audioRef = useRef<HTMLAudioElement | null>(null)
const [isPlaying, setIsPlaying] = useState(false)
const [isPause, setPause] = useState(false)
const [hasEnded, setHasEnded] = useState(false)
const selector = useRef(`play-tooltip-${randomString(4)}`)
const params = useParams()
const pathname = usePathname()
const removeCodeBlocks = (inputText: any) => {
const codeBlockRegex = /```[\s\S]*?```/g
return inputText.replace(codeBlockRegex, '')
}

const playAudio = async () => {
const formData = new FormData()
if (value !== '') {
formData.append('text', removeCodeBlocks(value))

let url = '/universal-chat/text-to-audio'
let isPublic = false

if (params.token) {
url = '/text-to-audio'
isPublic = true
}
else if (params.appId) {
if (pathname.search('explore/installed') > -1)
url = `/installed-apps/${params.appId}/text-to-audio`
else
url = `/apps/${params.appId}/text-to-audio`
}

try {
const audioResponse = await textToAudio(url, isPublic, formData)
const blob_bytes = Buffer.from(audioResponse.data, 'latin1')
const blob = new Blob([blob_bytes], { type: 'audio/wav' })
const audioUrl = URL.createObjectURL(blob)
const audio = new Audio(audioUrl)
audioRef.current = audio
audio.play().then(() => {
setIsPlaying(true)
}).catch(() => {
setIsPlaying(false)
URL.revokeObjectURL(audioUrl)
})
audio.onended = () => setHasEnded(true)
}
catch (error) {
setIsPlaying(false)
console.error('Error playing audio:', error)
}
}
}

const togglePlayPause = () => {
if (audioRef.current) {
if (isPlaying) {
setPause(true)
audioRef.current.pause()
}
else if (!hasEnded) {
setPause(false)
audioRef.current.play()
}
else if (!isPlaying) {
playAudio().then()
}
setIsPlaying(prevIsPlaying => !prevIsPlaying)
}
else {
playAudio().then()
}
}

return (
<div className={`${(isPlaying && !hasEnded) ? 'mr-1' : className}`}>
<Tooltip
selector={selector.current}
content={(!isPause ? ((isPlaying && !hasEnded) ? t('appApi.playing') : t('appApi.play')) : t('appApi.pause')) as string}
className='z-10'
>
<div
className={'box-border p-0.5 flex items-center justify-center rounded-md bg-white cursor-pointer'}
style={{ boxShadow: '0px 4px 8px -2px rgba(16, 24, 40, 0.1), 0px 2px 4px -2px rgba(16, 24, 40, 0.06)' }}
onClick={togglePlayPause}>
<div className={`w-6 h-6 rounded-md hover:bg-gray-50 ${!isPause ? ((isPlaying && !hasEnded) ? s.playIcon : s.stopIcon) : s.pauseIcon}`}></div>
</div>
</Tooltip>
</div>
)
}

export default AudioBtn

+ 16
- 0
web/app/components/base/audio-btn/style.module.css View File

.playIcon {
background-image: url(~@/app/components/develop/secret-key/assets/play.svg);
background-position: center;
background-repeat: no-repeat;
}
.pauseIcon {
background-image: url(~@/app/components/develop/secret-key/assets/pause.svg);
background-position: center;
background-repeat: no-repeat;
}

.stopIcon {
background-position: center;
background-repeat: no-repeat;
background-image: url(~@/app/components/develop/secret-key/assets/stop.svg);
}

+ 15
- 0
web/app/components/base/icons/assets/vender/line/mediaAndDevices/speaker.svg View File

<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_109_6694)">
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z" fill="#444CE7"/>
<path d="M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z" fill="#444CE7"/>
<path d="M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z" fill="#444CE7"/>
<path d="M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z" fill="#444CE7"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z" fill="#444CE7"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z" fill="#444CE7"/>
</g>
<defs>
<clipPath id="clip0_109_6694">
<rect width="16" height="16" fill="white"/>
</clipPath>
</defs>
</svg>

+ 15
- 0
web/app/components/base/icons/assets/vender/solid/mediaAndDevices/speaker.svg View File

<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_109_6694)">
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z" fill="#444CE7"/>
<path d="M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z" fill="#444CE7"/>
<path d="M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z" fill="#444CE7"/>
<path d="M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z" fill="#444CE7"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z" fill="#444CE7"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z" fill="#444CE7"/>
</g>
<defs>
<clipPath id="clip0_109_6694">
<rect width="16" height="16" fill="white"/>
</clipPath>
</defs>
</svg>

+ 112
- 0
web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.json View File

{
"icon": {
"type": "element",
"isRootNode": true,
"name": "svg",
"attributes": {
"width": "16",
"height": "16",
"viewBox": "0 0 16 16",
"fill": "none",
"xmlns": "http://www.w3.org/2000/svg"
},
"children": [
{
"type": "element",
"name": "g",
"attributes": {
"clip-path": "url(#clip0_109_6694)"
},
"children": [
{
"type": "element",
"name": "path",
"attributes": {
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"d": "M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"d": "M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"d": "M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z",
"fill": "currentColor"
},
"children": []
}
]
},
{
"type": "element",
"name": "defs",
"attributes": {},
"children": [
{
"type": "element",
"name": "clipPath",
"attributes": {
"id": "clip0_109_6694"
},
"children": [
{
"type": "element",
"name": "rect",
"attributes": {
"width": "16",
"height": "16",
"fill": "white"
},
"children": []
}
]
}
]
}
]
},
"name": "Speaker"
}

+ 16
- 0
web/app/components/base/icons/src/vender/line/mediaAndDevices/Speaker.tsx View File

// GENERATE BY script
// DON NOT EDIT IT MANUALLY

import * as React from 'react'
import data from './Speaker.json'
import IconBase from '@/app/components/base/icons/IconBase'
import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'

const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
props,
ref,
) => <IconBase {...props} ref={ref} data={data as IconData} />)

Icon.displayName = 'Speaker'

export default Icon

+ 1
- 0
web/app/components/base/icons/src/vender/line/mediaAndDevices/index.ts View File

export { default as Microphone01 } from './Microphone01' export { default as Microphone01 } from './Microphone01'
export { default as SlidersH } from './SlidersH' export { default as SlidersH } from './SlidersH'
export { default as Speaker } from './Speaker'

+ 112
- 0
web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.json View File

{
"icon": {
"type": "element",
"isRootNode": true,
"name": "svg",
"attributes": {
"width": "16",
"height": "16",
"viewBox": "0 0 16 16",
"fill": "none",
"xmlns": "http://www.w3.org/2000/svg"
},
"children": [
{
"type": "element",
"name": "g",
"attributes": {
"clip-path": "url(#clip0_109_6694)"
},
"children": [
{
"type": "element",
"name": "path",
"attributes": {
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M0 2.86666C0 2.05664 0.656649 1.39999 1.46667 1.39999H5.86667C6.67668 1.39999 7.33333 2.05664 7.33333 2.86666C7.33333 3.27167 7.00501 3.59999 6.6 3.59999C6.19499 3.59999 5.86667 3.27167 5.86667 2.86666H4.4V7.99999C4.80501 7.99999 5.13333 8.32831 5.13333 8.73332C5.13333 9.13833 4.80501 9.46666 4.4 9.46666H2.93333C2.52832 9.46666 2.2 9.13833 2.2 8.73332C2.2 8.32831 2.52832 7.99999 2.93333 7.99999V2.86666H1.46667C1.46667 3.27167 1.13834 3.59999 0.733333 3.59999C0.328324 3.59999 0 3.27167 0 2.86666Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"d": "M13.8205 0.782296C13.7434 0.62811 13.5233 0.62811 13.4462 0.782296C12.9664 1.74206 12.8754 1.83302 11.9156 2.3129C11.7615 2.39 11.7615 2.61003 11.9156 2.68712C12.8754 3.167 12.9664 3.25797 13.4462 4.21773C13.5233 4.37191 13.7434 4.37191 13.8205 4.21773C14.3003 3.25797 14.3913 3.167 15.3511 2.68712C15.5053 2.61003 15.5053 2.39 15.3511 2.3129C14.3913 1.83302 14.3003 1.74206 13.8205 0.782296Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"d": "M9.79394 2.25319C9.71404 2.09337 9.48596 2.09337 9.40605 2.25319C9.04994 2.96543 8.96544 3.04993 8.2532 3.40605C8.09338 3.48595 8.09338 3.71402 8.2532 3.79393C8.96544 4.15005 9.04994 4.23455 9.40606 4.94679C9.48596 5.10661 9.71404 5.10661 9.79394 4.94679C10.1501 4.23455 10.2346 4.15005 10.9468 3.79393C11.1066 3.71402 11.1066 3.48595 10.9468 3.40605C10.2346 3.04993 10.1501 2.96543 9.79394 2.25319Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"d": "M2.75377 11.049C2.67668 10.8948 2.45665 10.8948 2.37956 11.049C1.89969 12.0087 1.80872 12.0997 0.848971 12.5796C0.694788 12.6566 0.694787 12.8767 0.848971 12.9538C1.80872 13.4336 1.89969 13.5246 2.37956 14.4844C2.45665 14.6385 2.67668 14.6385 2.75377 14.4844C3.23365 13.5246 3.32461 13.4336 4.28436 12.9538C4.43855 12.8767 4.43855 12.6566 4.28436 12.5796C3.32461 12.0997 3.23365 12.0087 2.75377 11.049Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M14.6741 8.65106C14.8886 8.50146 15.1837 8.55405 15.3333 8.76853C15.7614 9.38226 16.0125 10.1292 16.0125 10.9333C16.0125 11.7375 15.7614 12.4844 15.3333 13.0981C15.1837 13.3126 14.8886 13.3652 14.6741 13.2156C14.4596 13.066 14.407 12.7708 14.5567 12.5564C14.8775 12.0964 15.0656 11.5375 15.0656 10.9333C15.0656 10.3291 14.8775 9.77025 14.5567 9.31028C14.407 9.09581 14.4596 8.80066 14.6741 8.65106Z",
"fill": "currentColor"
},
"children": []
},
{
"type": "element",
"name": "path",
"attributes": {
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M12.5674 6.53771C12.794 6.51987 13.0155 6.61161 13.1632 6.78449C13.2954 6.93929 13.3164 7.12549 13.3244 7.21587C13.3334 7.31718 13.3334 7.44301 13.3333 7.57103C13.3333 7.57691 13.3333 7.58278 13.3333 7.58866L13.3333 14.3C13.3334 14.428 13.3334 14.5539 13.3244 14.6552C13.3164 14.7455 13.2954 14.9317 13.1632 15.0865C13.0155 15.2594 12.794 15.3512 12.5674 15.3333C12.3644 15.3173 12.2179 15.2005 12.1484 15.1423C12.0704 15.077 11.9814 14.988 11.8909 14.8975L10.3795 13.3861C10.3357 13.3423 10.3137 13.3205 10.2971 13.3053L10.2958 13.3041L10.2941 13.3041C10.2716 13.303 10.2407 13.3029 10.1787 13.3029L9.34101 13.3029C9.22151 13.3029 9.10513 13.3029 9.00657 13.2949C8.89833 13.286 8.77062 13.2652 8.6421 13.1997C8.46392 13.1089 8.31906 12.964 8.22827 12.7859C8.16279 12.6574 8.14192 12.5296 8.13308 12.4214C8.12503 12.3228 8.12504 12.2065 8.12505 12.087V9.79916C8.12505 9.79413 8.12505 9.78909 8.12505 9.78406C8.12504 9.66456 8.12503 9.54819 8.13308 9.44963C8.14192 9.34139 8.16279 9.21368 8.22827 9.08517C8.31906 8.90699 8.46392 8.76212 8.6421 8.67133C8.77062 8.60585 8.89833 8.58498 9.00657 8.57614C9.10512 8.56809 9.2215 8.5681 9.341 8.56812C9.34603 8.56812 9.35106 8.56812 9.3561 8.56812H10.1787C10.2407 8.56812 10.2716 8.56801 10.2941 8.56698L10.2958 8.5669L10.2971 8.56575C10.3137 8.55058 10.3357 8.52877 10.3795 8.48491L11.8784 6.98602C11.8826 6.98186 11.8867 6.97771 11.8909 6.97355C11.9814 6.88302 12.0704 6.79403 12.1484 6.72874C12.2179 6.67049 12.3644 6.55368 12.5674 6.53771Z",
"fill": "currentColor"
},
"children": []
}
]
},
{
"type": "element",
"name": "defs",
"attributes": {},
"children": [
{
"type": "element",
"name": "clipPath",
"attributes": {
"id": "clip0_109_6694"
},
"children": [
{
"type": "element",
"name": "rect",
"attributes": {
"width": "16",
"height": "16",
"fill": "white"
},
"children": []
}
]
}
]
}
]
},
"name": "Speaker"
}

+ 16
- 0
web/app/components/base/icons/src/vender/solid/mediaAndDevices/Speaker.tsx View File

// GENERATE BY script
// DON NOT EDIT IT MANUALLY

import * as React from 'react'
import data from './Speaker.json'
import IconBase from '@/app/components/base/icons/IconBase'
import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'

const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
props,
ref,
) => <IconBase {...props} ref={ref} data={data as IconData} />)

Icon.displayName = 'Speaker'

export default Icon

+ 1
- 0
web/app/components/base/icons/src/vender/solid/mediaAndDevices/index.ts View File

export { default as Microphone01 } from './Microphone01' export { default as Microphone01 } from './Microphone01'
export { default as Robot } from './Robot' export { default as Robot } from './Robot'
export { default as Sliders02 } from './Sliders02' export { default as Sliders02 } from './Sliders02'
export { default as Speaker } from './Speaker'
export { default as StopCircle } from './StopCircle' export { default as StopCircle } from './StopCircle'

+ 10
- 0
web/app/components/develop/secret-key/assets/pause.svg View File

<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_129_2189)">
<path d="M10.6666 14V10M13.3333 14V10M18.6666 12C18.6666 15.6819 15.6819 18.6667 12 18.6667C8.31808 18.6667 5.33331 15.6819 5.33331 12C5.33331 8.3181 8.31808 5.33333 12 5.33333C15.6819 5.33333 18.6666 8.3181 18.6666 12Z" stroke="#155EEF" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
</g>
<defs>
<clipPath id="clip0_129_2189">
<rect width="16" height="16" fill="white" transform="translate(4 4)"/>
</clipPath>
</defs>
</svg>

+ 11
- 0
web/app/components/develop/secret-key/assets/play.svg View File

<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_129_107)">
<path d="M7.99991 14.6666C11.6819 14.6666 14.6666 11.6819 14.6666 7.99998C14.6666 4.31808 11.6819 1.33331 7.99998 1.33331C4.31808 1.33331 1.33331 4.31808 1.33331 7.99998C1.33331 11.6819 4.31808 14.6666 7.99998 14.6666Z" stroke="#155EEF" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M6.66665 5.33331L10.6666 7.99998L6.66665 10.6666V5.33331Z" stroke="#155EEF" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
</g>
<defs>
<clipPath id="clip0_129_107">
<rect width="16" height="16" fill="white"/>
</clipPath>
</defs>
</svg>

+ 11
- 0
web/app/components/develop/secret-key/assets/stop.svg View File

<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_129_107)">
<path d="M7.99998 14.6666C11.6819 14.6666 14.6666 11.6819 14.6666 7.99998C14.6666 4.31808 11.6819 1.33331 7.99998 1.33331C4.31808 1.33331 1.33331 4.31808 1.33331 7.99998C1.33331 11.6819 4.31808 14.6666 7.99998 14.6666Z" stroke="#667085" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M6.66665 5.33331L10.6666 7.99998L6.66665 10.6666V5.33331Z" stroke="#667085" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
</g>
<defs>
<clipPath id="clip0_129_107">
<rect width="16" height="16" fill="white"/>
</clipPath>
</defs>
</svg>

+ 63
- 15
web/app/components/develop/template/template.en.mdx View File

The text generation application offers non-session support and is ideal for translation, article writing, summarization AI, and more. The text generation application offers non-session support and is ideal for translation, article writing, summarization AI, and more.


<div> <div>
### Base URL
### Base URL
<CodeGroup title="Code" targetCode={props.appDetail.api_base_url}> <CodeGroup title="Code" targetCode={props.appDetail.api_base_url}>
```javascript ```javascript
``` ```


### Authentication ### Authentication


The Service API uses `API-Key` authentication.
The Service API uses `API-Key` authentication.
<i>**Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**</i> <i>**Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**</i>


For all API requests, include your API Key in the `Authorization` HTTP Header, as shown below:
For all API requests, include your API Key in the `Authorization` HTTP Header, as shown below:


<CodeGroup title="Code"> <CodeGroup title="Code">
```javascript ```javascript
User Input/Question content User Input/Question content
</Property> </Property>
<Property name='inputs' type='object' key='inputs'> <Property name='inputs' type='object' key='inputs'>
Allows the entry of various variable values defined by the App.
Allows the entry of various variable values defined by the App.
The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable. The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable.
The text generation application requires at least one key/value pair to be inputted. The text generation application requires at least one key/value pair to be inputted.
</Property> </Property>
<Property name='response_mode' type='string' key='response_mode'> <Property name='response_mode' type='string' key='response_mode'>
The mode of response return, supporting: The mode of response return, supporting:
- `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)). - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
- `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
- `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
<i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i> <i>Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.</i>
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>
User identifier, used to define the identity of the end-user for retrieval and statistics.
User identifier, used to define the identity of the end-user for retrieval and statistics.
Should be uniquely defined by the developer within the application. Should be uniquely defined by the developer within the application.
</Property> </Property>
<Property name='conversation_id' type='string' key='conversation_id'> <Property name='conversation_id' type='string' key='conversation_id'>
- `upload_file_id` (string) Uploaded file ID, which must be obtained by uploading through the File Upload API in advance (when the transfer method is `local_file`) - `upload_file_id` (string) Uploaded file ID, which must be obtained by uploading through the File Upload API in advance (when the transfer method is `local_file`)
</Property> </Property>
</Properties> </Properties>
### Response ### Response
When `response_mode` is `blocking`, return a CompletionResponse object.
When `response_mode` is `blocking`, return a CompletionResponse object.
When `response_mode` is `streaming`, return a ChunkCompletionResponse stream. When `response_mode` is `streaming`, return a ChunkCompletionResponse stream.


### ChatCompletionResponse ### ChatCompletionResponse
<Row> <Row>
<Col> <Col>
Upload a file (currently only images are supported) for use when sending messages, enabling multimodal understanding of images and text. Upload a file (currently only images are supported) for use when sending messages, enabling multimodal understanding of images and text.
Supports png, jpg, jpeg, webp, gif formats.
Supports png, jpg, jpeg, webp, gif formats.
<i>Uploaded files are for use by the current end-user only.</i> <i>Uploaded files are for use by the current end-user only.</i>


### Request Body ### Request Body
The file to be uploaded. The file to be uploaded.
- `user` (string) Required - `user` (string) Required
User identifier, defined by the developer's rules, must be unique within the application. User identifier, defined by the developer's rules, must be unique within the application.
### Response ### Response
After a successful upload, the server will return the file's ID and related information. After a successful upload, the server will return the file's ID and related information.
- `id` (uuid) ID - `id` (uuid) ID
- 503, `s3_permission_denied`, no permission to upload files to S3 - 503, `s3_permission_denied`, no permission to upload files to S3
- 503, `s3_file_too_large`, file exceeds S3 size limit - 503, `s3_file_too_large`, file exceeds S3 size limit
- 500, internal server error - 500, internal server error


</Col> </Col>
<Col sticky> <Col sticky>
<CodeGroup title="Response"> <CodeGroup title="Response">
```json {{ title: 'Response' }} ```json {{ title: 'Response' }}
{ {
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"name": "example.png", "name": "example.png",
"size": 1024, "size": 1024,
"extension": "png", "extension": "png",
"mime_type": "image/png", "mime_type": "image/png",
"created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",
"created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",
"created_at": 1577836800, "created_at": 1577836800,
} }
``` ```
<CodeGroup title="Request" tag="POST" label="/chat-messages/:task_id/stop" targetCode={`curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \\\n-H 'Authorization: Bearer {api_key}' \\\n-H 'Content-Type: application/json' \\\n--data-raw '{ "user": "abc-123"}`}> <CodeGroup title="Request" tag="POST" label="/chat-messages/:task_id/stop" targetCode={`curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \\\n-H 'Authorization: Bearer {api_key}' \\\n-H 'Content-Type: application/json' \\\n--data-raw '{ "user": "abc-123"}`}>
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \ curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \
-H 'Authorization: Bearer {api_key}' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer {api_key}' \
-H 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
"user": "abc-123" "user": "abc-123"
}' }'
</CodeGroup> </CodeGroup>
</Col> </Col>
</Row> </Row>

---

<Heading
url='/text-to-audio'
method='POST'
title='text to audio'
name='#audio'
/>
<Row>
<Col>
Text to speech, only supports openai model.

### Request Body

<Properties>
<Property name='text' type='str' key='text'>
Speech generated content。
</Property>
<Property name='user' type='string' key='user'>
The user identifier, defined by the developer, must ensure uniqueness within the app.
</Property>
<Property name='streaming' type='bool' key='streaming'>
Whether to enable streaming output, true、false。
</Property>
</Properties>
</Col>
<Col sticky>

<CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=Hello Dify;user=abc-123;streaming=false`}>

```bash {{ title: 'cURL' }}
curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
--form 'file=Hello Dify;user=abc-123;streaming=false'
```

</CodeGroup>

<CodeGroup title="headers">
```json {{ title: 'headers' }}
{
"Content-Type": "audio/wav"
}
```
</CodeGroup>
</Col>
</Row>

+ 62
- 14
web/app/components/develop/template/template.zh.mdx View File



### 鉴权 ### 鉴权


Dify Service API 使用 `API-Key` 进行鉴权。
<i>**强烈建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。**</i>
Dify Service API 使用 `API-Key` 进行鉴权。
<i>**强烈建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。**</i>
所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示: 所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示:


<CodeGroup title="Code"> <CodeGroup title="Code">
</Property> </Property>
<Property name='inputs' type='object' key='inputs'> <Property name='inputs' type='object' key='inputs'>
(选填)允许传入 App 定义的各变量值。 (选填)允许传入 App 定义的各变量值。
inputs 参数包含了多组键值对(Key/Value pairs),每组的键对应一个特定变量,每组的值则是该变量的具体值。
inputs 参数包含了多组键值对(Key/Value pairs),每组的键对应一个特定变量,每组的值则是该变量的具体值。
文本生成型应用要求至少传入一组键值对。 文本生成型应用要求至少传入一组键值对。
</Property> </Property>
<Property name='response_mode' type='string' key='response_mode'> <Property name='response_mode' type='string' key='response_mode'>
- `streaming` 流式模式(推荐)。基于 SSE(**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**)实现类似打字机输出方式的流式返回。 - `streaming` 流式模式(推荐)。基于 SSE(**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**)实现类似打字机输出方式的流式返回。
- `blocking` 阻塞模式,等待执行完毕后返回结果。(请求若流程较长可能会被中断)。
- `blocking` 阻塞模式,等待执行完毕后返回结果。(请求若流程较长可能会被中断)。
<i>由于 Cloudflare 限制,请求会在 100 秒超时无返回后中断。</i> <i>由于 Cloudflare 限制,请求会在 100 秒超时无返回后中断。</i>
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>
用户标识,用于定义终端用户的身份,方便检索、统计。
用户标识,用于定义终端用户的身份,方便检索、统计。
由开发者定义规则,需保证用户标识在应用内唯一。 由开发者定义规则,需保证用户标识在应用内唯一。
</Property> </Property>
<Property name='conversation_id' type='string' key='conversation_id'> <Property name='conversation_id' type='string' key='conversation_id'>


### Response ### Response
<Properties> <Properties>
当 `response_mode` 为 `blocking` 时,返回 ChatCompletionResponse object。
当 `response_mode` 为 `blocking` 时,返回 ChatCompletionResponse object。
当 `response_mode` 为 `streaming`时,返回 ChunkChatCompletionResponse object 流式序列。 当 `response_mode` 为 `streaming`时,返回 ChunkChatCompletionResponse object 流式序列。
### ChatCompletionResponse ### ChatCompletionResponse
返回完整的 App 结果,`Content-Type` 为 `application/json`。 返回完整的 App 结果,`Content-Type` 为 `application/json`。
- `message_id` (string) 消息唯一 ID - `message_id` (string) 消息唯一 ID
<Row> <Row>
<Col> <Col>
上传文件(目前仅支持图片)并在发送消息时使用,可实现图文多模态理解。 上传文件(目前仅支持图片)并在发送消息时使用,可实现图文多模态理解。
支持 png, jpg, jpeg, webp, gif 格式。
支持 png, jpg, jpeg, webp, gif 格式。
<i>上传的文件仅供当前终端用户使用。</i> <i>上传的文件仅供当前终端用户使用。</i>


### Request Body ### Request Body
<CodeGroup title="Response"> <CodeGroup title="Response">
```json {{ title: 'Response' }} ```json {{ title: 'Response' }}
{ {
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"name": "example.png", "name": "example.png",
"size": 1024, "size": 1024,
"extension": "png", "extension": "png",
"mime_type": "image/png", "mime_type": "image/png",
"created_by": 123,
"created_by": 123,
"created_at": 1577836800, "created_at": 1577836800,
} }
``` ```
仅支持流式模式。 仅支持流式模式。
### Path ### Path
- `task_id` (string) 任务 ID,可在流式返回 Chunk 中获取 - `task_id` (string) 任务 ID,可在流式返回 Chunk 中获取
### Request Body ### Request Body
- `user` (string) Required - `user` (string) Required
用户标识,用于定义终端用户的身份,必须和发送消息接口传入 user 保持一致。 用户标识,用于定义终端用户的身份,必须和发送消息接口传入 user 保持一致。
- `annotation_reply` (object) 标记回复 - `annotation_reply` (object) 标记回复
- `enabled` (bool) 是否开启 - `enabled` (bool) 是否开启
- `user_input_form` (array[object]) 用户输入表单配置 - `user_input_form` (array[object]) 用户输入表单配置
- `text-input` (object) 文本输入控件
- `text-input` (object) 文本输入控件
- `label` (string) 控件展示标签名 - `label` (string) 控件展示标签名
- `variable` (string) 控件 ID - `variable` (string) 控件 ID
- `required` (bool) 是否必填 - `required` (bool) 是否必填
- `variable` (string) 控件 ID - `variable` (string) 控件 ID
- `required` (bool) 是否必填 - `required` (bool) 是否必填
- `default` (string) 默认值 - `default` (string) 默认值
- `select` (object) 下拉控件
- `select` (object) 下拉控件
- `label` (string) 控件展示标签名 - `label` (string) 控件展示标签名
- `variable` (string) 控件 ID - `variable` (string) 控件 ID
- `required` (bool) 是否必填 - `required` (bool) 是否必填
</CodeGroup> </CodeGroup>
</Col> </Col>
</Row> </Row>

---

<Heading
url='/text-to-audio'
method='POST'
title='文字转语音'
name='#audio'
/>
<Row>
<Col>
文字转语音,仅支持 openai 模型。

### Request Body

<Properties>
<Property name='text' type='str' key='text'>
语音生成内容。
</Property>
<Property name='user' type='string' key='user'>
用户标识,由开发者定义规则,需保证用户标识在应用内唯一。
</Property>
<Property name='streaming' type='bool' key='streaming'>
是否启用流式输出true、false。
</Property>
</Properties>
</Col>
<Col sticky>

<CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=你好Dify;user=abc-123;streaming=false`}>

```bash {{ title: 'cURL' }}
curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
--form 'file=你好Dify;user=abc-123;streaming=false'
```

</CodeGroup>

<CodeGroup title="headers">
```json {{ title: 'headers' }}
{
"Content-Type": "audio/wav"
}
```
</CodeGroup>
</Col>
</Row>

+ 71
- 23
web/app/components/develop/template/template_chat.en.mdx View File

Chat applications support session persistence, allowing previous chat history to be used as context for responses. This can be applicable for chatbots, customer service AI, etc. Chat applications support session persistence, allowing previous chat history to be used as context for responses. This can be applicable for chatbots, customer service AI, etc.


<div> <div>
### Base URL
### Base URL
<CodeGroup title="Code" targetCode={props.appDetail.api_base_url}> <CodeGroup title="Code" targetCode={props.appDetail.api_base_url}>
```javascript ```javascript
``` ```


### Authentication ### Authentication


The Service API uses `API-Key` authentication.
The Service API uses `API-Key` authentication.
<i>**Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**</i> <i>**Strongly recommend storing your API Key on the server-side, not shared or stored on the client-side, to avoid possible API-Key leakage that can lead to serious consequences.**</i>


For all API requests, include your API Key in the `Authorization`HTTP Header, as shown below:
For all API requests, include your API Key in the `Authorization`HTTP Header, as shown below:


<CodeGroup title="Code"> <CodeGroup title="Code">
```javascript ```javascript
User Input/Question content User Input/Question content
</Property> </Property>
<Property name='inputs' type='object' key='inputs'> <Property name='inputs' type='object' key='inputs'>
Allows the entry of various variable values defined by the App.
Allows the entry of various variable values defined by the App.
The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable. The `inputs` parameter contains multiple key/value pairs, with each key corresponding to a specific variable and each value being the specific value for that variable.
</Property> </Property>
<Property name='response_mode' type='string' key='response_mode'> <Property name='response_mode' type='string' key='response_mode'>
The mode of response return, supporting: The mode of response return, supporting:
- `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)). - `streaming` Streaming mode (recommended), implements a typewriter-like output through SSE ([Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)).
- `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.
- `blocking` Blocking mode, returns result after execution is complete. (Requests may be interrupted if the process is long)
Due to Cloudflare restrictions, the request will be interrupted without a return after 100 seconds.
<i>Note: blocking mode is not supported in Agent Assistant mode</i> <i>Note: blocking mode is not supported in Agent Assistant mode</i>
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>
User identifier, used to define the identity of the end-user for retrieval and statistics.
User identifier, used to define the identity of the end-user for retrieval and statistics.
Should be uniquely defined by the developer within the application. Should be uniquely defined by the developer within the application.
</Property> </Property>
<Property name='conversation_id' type='string' key='conversation_id'> <Property name='conversation_id' type='string' key='conversation_id'>
Can achieve async title generation by calling the conversation rename API and setting `auto_generate` to true. Can achieve async title generation by calling the conversation rename API and setting `auto_generate` to true.
</Property> </Property>
</Properties> </Properties>
### Response ### Response
When response_mode is blocking, return a CompletionResponse object.
When response_mode is blocking, return a CompletionResponse object.
When response_mode is streaming, return a ChunkCompletionResponse stream. When response_mode is streaming, return a ChunkCompletionResponse stream.


### ChatCompletionResponse ### ChatCompletionResponse
- `tool` (string) A list of tools represents which tools are called,split by ; - `tool` (string) A list of tools represents which tools are called,split by ;
- `tool_input` (string) Input of tools in JSON format. Like: `{"dalle3": {"prompt": "a cute cat"}}`. - `tool_input` (string) Input of tools in JSON format. Like: `{"dalle3": {"prompt": "a cute cat"}}`.
- `created_at` (int) Creation timestamp, e.g., 1705395332 - `created_at` (int) Creation timestamp, e.g., 1705395332
- `message_files` (array[string]) Refer to message_file event
- `message_files` (array[string]) Refer to message_file event
- `file_id` (string) File ID - `file_id` (string) File ID
- `conversation_id` (string) Conversation ID - `conversation_id` (string) Conversation ID
- `event: message_file` Message file event, a new file has created by tool - `event: message_file` Message file event, a new file has created by tool
<Row> <Row>
<Col> <Col>
Upload a file (currently only images are supported) for use when sending messages, enabling multimodal understanding of images and text. Upload a file (currently only images are supported) for use when sending messages, enabling multimodal understanding of images and text.
Supports png, jpg, jpeg, webp, gif formats.
Supports png, jpg, jpeg, webp, gif formats.
Uploaded files are for use by the current end-user only. Uploaded files are for use by the current end-user only.


### Request Body ### Request Body
The file to be uploaded. The file to be uploaded.
- `user` (string) Required - `user` (string) Required
User identifier, defined by the developer's rules, must be unique within the application. User identifier, defined by the developer's rules, must be unique within the application.
### Response ### Response
After a successful upload, the server will return the file's ID and related information. After a successful upload, the server will return the file's ID and related information.
- `id` (uuid) ID - `id` (uuid) ID
- 503, `s3_permission_denied`, no permission to upload files to S3 - 503, `s3_permission_denied`, no permission to upload files to S3
- 503, `s3_file_too_large`, file exceeds S3 size limit - 503, `s3_file_too_large`, file exceeds S3 size limit
- 500, internal server error - 500, internal server error


</Col> </Col>
<Col sticky> <Col sticky>
<CodeGroup title="Response"> <CodeGroup title="Response">
```json {{ title: 'Response' }} ```json {{ title: 'Response' }}
{ {
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"name": "example.png", "name": "example.png",
"size": 1024, "size": 1024,
"extension": "png", "extension": "png",
"mime_type": "image/png", "mime_type": "image/png",
"created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",
"created_by": "6ad1ab0a-73ff-4ac1-b9e4-cdb312f71f13",
"created_at": 1577836800, "created_at": 1577836800,
} }
``` ```
<CodeGroup title="Request" tag="POST" label="/chat-messages/:task_id/stop" targetCode={`curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \\\n-H 'Authorization: Bearer {api_key}' \\\n-H 'Content-Type: application/json' \\\n--data-raw '{ "user": "abc-123"}`}> <CodeGroup title="Request" tag="POST" label="/chat-messages/:task_id/stop" targetCode={`curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \\\n-H 'Authorization: Bearer {api_key}' \\\n-H 'Content-Type: application/json' \\\n--data-raw '{ "user": "abc-123"}`}>
```bash {{ title: 'cURL' }} ```bash {{ title: 'cURL' }}
curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \ curl -X POST 'https://cloud.dify.ai/v1/chat-messages/:task_id/stop' \
-H 'Authorization: Bearer {api_key}' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer {api_key}' \
-H 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
"user": "abc-123" "user": "abc-123"
}' }'
Conversation ID Conversation ID
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>
User identifier, used to define the identity of the end-user for retrieval and statistics.
User identifier, used to define the identity of the end-user for retrieval and statistics.
Should be uniquely defined by the developer within the application. Should be uniquely defined by the developer within the application.
</Property> </Property>
<Property name='first_id' type='string' key='first_id'> <Property name='first_id' type='string' key='first_id'>
- `tool` (string) A list of tools represents which tools are called,split by ; - `tool` (string) A list of tools represents which tools are called,split by ;
- `tool_input` (string) Input of tools in JSON format. Like: `{"dalle3": {"prompt": "a cute cat"}}`. - `tool_input` (string) Input of tools in JSON format. Like: `{"dalle3": {"prompt": "a cute cat"}}`.
- `created_at` (int) Creation timestamp, e.g., 1705395332 - `created_at` (int) Creation timestamp, e.g., 1705395332
- `message_files` (array[string]) Refer to message_file event
- `message_files` (array[string]) Refer to message_file event
- `file_id` (string) File ID - `file_id` (string) File ID
- `answer` (string) Response message content - `answer` (string) Response message content
- `created_at` (timestamp) Creation timestamp, e.g., 1705395332 - `created_at` (timestamp) Creation timestamp, e.g., 1705395332


<Properties> <Properties>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>
User identifier, used to define the identity of the end-user for retrieval and statistics.
User identifier, used to define the identity of the end-user for retrieval and statistics.
Should be uniquely defined by the developer within the application. Should be uniquely defined by the developer within the application.
</Property> </Property>
<Property name='last_id' type='string' key='last_id'> <Property name='last_id' type='string' key='last_id'>


<Properties> <Properties>
<Property name='file' type='file' key='file'> <Property name='file' type='file' key='file'>
Audio file.
Supported formats: `['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`
Audio file.
Supported formats: `['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`
File size limit: 15MB File size limit: 15MB
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>


--- ---


<Heading
url='/text-to-audio'
method='POST'
title='text to audio'
name='#audio'
/>
<Row>
<Col>
Text to speech, only supports openai model.

### Request Body

<Properties>
<Property name='text' type='str' key='text'>
Speech generated content。
</Property>
<Property name='user' type='string' key='user'>
The user identifier, defined by the developer, must ensure uniqueness within the app.
</Property>
<Property name='streaming' type='bool' key='streaming'>
Whether to enable streaming output, true、false。
</Property>
</Properties>
</Col>
<Col sticky>

<CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=Hello Dify;user=abc-123;streaming=false`}>

```bash {{ title: 'cURL' }}
curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
--form 'file=Hello Dify;user=abc-123;streaming=false'
```

</CodeGroup>

<CodeGroup title="headers">
```json {{ title: 'headers' }}
{
"Content-Type": "audio/wav"
}
```
</CodeGroup>
</Col>
</Row>

---

<Heading <Heading
url='/parameters' url='/parameters'
method='GET' method='GET'
</Properties> </Properties>
### Response ### Response
- `tool_icons`(object[string]) tool icons - `tool_icons`(object[string]) tool icons
- `tool_name` (string)
- `tool_name` (string)
- `icon` (object|string) - `icon` (object|string)
- (object) icon object - (object) icon object
- `background` (string) background color in hex format - `background` (string) background color in hex format

+ 65
- 17
web/app/components/develop/template/template_chat.zh.mdx View File



### 鉴权 ### 鉴权


Service API 使用 `API-Key` 进行鉴权。
<i>**强烈建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。**</i>
Service API 使用 `API-Key` 进行鉴权。
<i>**强烈建议开发者把 `API-Key` 放在后端存储,而非分享或者放在客户端存储,以免 `API-Key` 泄露,导致财产损失。**</i>
所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示: 所有 API 请求都应在 **`Authorization`** HTTP Header 中包含您的 `API-Key`,如下所示:


<CodeGroup title="Code"> <CodeGroup title="Code">
用户输入/提问内容。 用户输入/提问内容。
</Property> </Property>
<Property name='inputs' type='object' key='inputs'> <Property name='inputs' type='object' key='inputs'>
(选填)允许传入 App 定义的各变量值。
(选填)允许传入 App 定义的各变量值。
inputs 参数包含了多组键值对(Key/Value pairs),每组的键对应一个特定变量,每组的值则是该变量的具体值。 inputs 参数包含了多组键值对(Key/Value pairs),每组的键对应一个特定变量,每组的值则是该变量的具体值。


</Property> </Property>
<Property name='response_mode' type='string' key='response_mode'> <Property name='response_mode' type='string' key='response_mode'>
- `streaming` 流式模式(推荐)。基于 SSE(**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**)实现类似打字机输出方式的流式返回。 - `streaming` 流式模式(推荐)。基于 SSE(**[Server-Sent Events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)**)实现类似打字机输出方式的流式返回。
- `blocking` 阻塞模式,等待执行完毕后返回结果。(请求若流程较长可能会被中断)。
<i>由于 Cloudflare 限制,请求会在 100 秒超时无返回后中断。</i>
- `blocking` 阻塞模式,等待执行完毕后返回结果。(请求若流程较长可能会被中断)。
<i>由于 Cloudflare 限制,请求会在 100 秒超时无返回后中断。</i>
注:Agent模式下不允许blocking。 注:Agent模式下不允许blocking。
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>


### Response ### Response
<Properties> <Properties>
当 `response_mode` 为 `blocking` 时,返回 ChatCompletionResponse object。
当 `response_mode` 为 `blocking` 时,返回 ChatCompletionResponse object。
当 `response_mode` 为 `streaming`时,返回 ChunkChatCompletionResponse object 流式序列。 当 `response_mode` 为 `streaming`时,返回 ChunkChatCompletionResponse object 流式序列。
### ChatCompletionResponse ### ChatCompletionResponse


返回完整的 App 结果,`Content-Type` 为 `application/json`。 返回完整的 App 结果,`Content-Type` 为 `application/json`。
- `conversation_id` (string) 会话 ID - `conversation_id` (string) 会话 ID
- `answer` (string) 替换内容(直接替换 LLM 所有回复文本) - `answer` (string) 替换内容(直接替换 LLM 所有回复文本)
- `created_at` (int) 创建时间戳,如:1705395332 - `created_at` (int) 创建时间戳,如:1705395332
- `event: error`
- `event: error`
流式输出过程中出现的异常会以 stream event 形式输出,收到异常事件后即结束。 流式输出过程中出现的异常会以 stream event 形式输出,收到异常事件后即结束。
- `task_id` (string) 任务 ID,用于请求跟踪和下方的停止响应接口 - `task_id` (string) 任务 ID,用于请求跟踪和下方的停止响应接口
- `message_id` (string) 消息唯一 ID - `message_id` (string) 消息唯一 ID
<Row> <Row>
<Col> <Col>
上传文件(目前仅支持图片)并在发送消息时使用,可实现图文多模态理解。 上传文件(目前仅支持图片)并在发送消息时使用,可实现图文多模态理解。
支持 png, jpg, jpeg, webp, gif 格式。
支持 png, jpg, jpeg, webp, gif 格式。
<i>上传的文件仅供当前终端用户使用。</i> <i>上传的文件仅供当前终端用户使用。</i>


### Request Body ### Request Body
<CodeGroup title="Response"> <CodeGroup title="Response">
```json {{ title: 'Response' }} ```json {{ title: 'Response' }}
{ {
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"id": "72fa9618-8f89-4a37-9b33-7e1178a24a67",
"name": "example.png", "name": "example.png",
"size": 1024, "size": 1024,
"extension": "png", "extension": "png",
"mime_type": "image/png", "mime_type": "image/png",
"created_by": 123,
"created_by": 123,
"created_at": 1577836800, "created_at": 1577836800,
} }
``` ```
仅支持流式模式。 仅支持流式模式。
### Path ### Path
- `task_id` (string) 任务 ID,可在流式返回 Chunk 中获取 - `task_id` (string) 任务 ID,可在流式返回 Chunk 中获取
### Request Body ### Request Body
- `user` (string) Required - `user` (string) Required
用户标识,用于定义终端用户的身份,必须和发送消息接口传入 user 保持一致。 用户标识,用于定义终端用户的身份,必须和发送消息接口传入 user 保持一致。
</Properties> </Properties>


### Response ### Response
- `result` (string) 固定返回 success
- `result` (string) 固定返回 success
</Col> </Col>
<Col sticky> <Col sticky>


<Properties> <Properties>
<Property name='file' type='file' key='file'> <Property name='file' type='file' key='file'>
语音文件。 语音文件。
支持格式:`['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`
支持格式:`['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']`
文件大小限制:15MB 文件大小限制:15MB
</Property> </Property>
<Property name='user' type='string' key='user'> <Property name='user' type='string' key='user'>


--- ---


<Heading
url='/text-to-audio'
method='POST'
title='文字转语音'
name='#audio'
/>
<Row>
<Col>
文字转语音,仅支持 openai 模型。

### Request Body

<Properties>
<Property name='text' type='str' key='text'>
语音生成内容。
</Property>
<Property name='user' type='string' key='user'>
用户标识,由开发者定义规则,需保证用户标识在应用内唯一。
</Property>
<Property name='streaming' type='bool' key='streaming'>
是否启用流式输出true、false。
</Property>
</Properties>
</Col>
<Col sticky>

<CodeGroup title="Request" tag="POST" label="/text-to-audio" targetCode={`curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \\\n--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \\\n--form 'text=你好Dify;user=abc-123;streaming=false`}>

```bash {{ title: 'cURL' }}
curl --location --request POST '${props.appDetail.api_base_url}/text-to-audio' \
--header 'Authorization: Bearer ENTER-YOUR-SECRET-KEY' \
--form 'file=你好Dify;user=abc-123;streaming=false'
```

</CodeGroup>

<CodeGroup title="headers">
```json {{ title: 'headers' }}
{
"Content-Type": "audio/wav"
}
```
</CodeGroup>
</Col>
</Row>

---

<Heading <Heading
url='/parameters' url='/parameters'
method='GET' method='GET'
- `annotation_reply` (object) 标记回复 - `annotation_reply` (object) 标记回复
- `enabled` (bool) 是否开启 - `enabled` (bool) 是否开启
- `user_input_form` (array[object]) 用户输入表单配置 - `user_input_form` (array[object]) 用户输入表单配置
- `text-input` (object) 文本输入控件
- `text-input` (object) 文本输入控件
- `label` (string) 控件展示标签名 - `label` (string) 控件展示标签名
- `variable` (string) 控件 ID - `variable` (string) 控件 ID
- `required` (bool) 是否必填 - `required` (bool) 是否必填
- `variable` (string) 控件 ID - `variable` (string) 控件 ID
- `required` (bool) 是否必填 - `required` (bool) 是否必填
- `default` (string) 默认值 - `default` (string) 默认值
- `select` (object) 下拉控件
- `select` (object) 下拉控件
- `label` (string) 控件展示标签名 - `label` (string) 控件展示标签名
- `variable` (string) 控件 ID - `variable` (string) 控件 ID
- `required` (bool) 是否必填 - `required` (bool) 是否必填
</Properties> </Properties>
### Response ### Response
- `tool_icons`(object[string]) 工具图标 - `tool_icons`(object[string]) 工具图标
- `工具名称` (string)
- `工具名称` (string)
- `icon` (object|string) - `icon` (object|string)
- (object) 图标 - (object) 图标
- `background` (string) hex格式的背景色 - `background` (string) hex格式的背景色

+ 2
- 0
web/app/components/header/account-setting/model-provider-page/declarations.ts View File

rerank = 'rerank', rerank = 'rerank',
speech2text = 'speech2text', speech2text = 'speech2text',
moderation = 'moderation', moderation = 'moderation',
tts = 'tts',
} }


export const MODEL_TYPE_TEXT = { export const MODEL_TYPE_TEXT = {
[ModelTypeEnum.rerank]: 'Rerank', [ModelTypeEnum.rerank]: 'Rerank',
[ModelTypeEnum.speech2text]: 'Speech2text', [ModelTypeEnum.speech2text]: 'Speech2text',
[ModelTypeEnum.moderation]: 'Moderation', [ModelTypeEnum.moderation]: 'Moderation',
[ModelTypeEnum.tts]: 'TTS',
} }


export enum ConfigurateMethodEnum { export enum ConfigurateMethodEnum {

+ 2
- 1
web/app/components/header/account-setting/model-provider-page/hooks.ts View File

return value return value
} }


export type ModelTypeIndex = 1 | 2 | 3 | 4
export type ModelTypeIndex = 1 | 2 | 3 | 4 | 5
export const MODEL_TYPE_MAPS = { export const MODEL_TYPE_MAPS = {
1: ModelTypeEnum.textGeneration, 1: ModelTypeEnum.textGeneration,
2: ModelTypeEnum.textEmbedding, 2: ModelTypeEnum.textEmbedding,
3: ModelTypeEnum.rerank, 3: ModelTypeEnum.rerank,
4: ModelTypeEnum.speech2text, 4: ModelTypeEnum.speech2text,
5: ModelTypeEnum.tts,
} }


export const useModelList = (type: ModelTypeIndex) => { export const useModelList = (type: ModelTypeIndex) => {

+ 3
- 1
web/app/components/header/account-setting/model-provider-page/index.tsx View File

const { data: embeddingsDefaultModel } = useDefaultModel(2) const { data: embeddingsDefaultModel } = useDefaultModel(2)
const { data: rerankDefaultModel } = useDefaultModel(3) const { data: rerankDefaultModel } = useDefaultModel(3)
const { data: speech2textDefaultModel } = useDefaultModel(4) const { data: speech2textDefaultModel } = useDefaultModel(4)
const { data: ttsDefaultModel } = useDefaultModel(5)
const { modelProviders: providers } = useProviderContext() const { modelProviders: providers } = useProviderContext()
const { setShowModelModal } = useModalContext() const { setShowModelModal } = useModalContext()
const defaultModelNotConfigured = !textGenerationDefaultModel && !embeddingsDefaultModel && !speech2textDefaultModel && !rerankDefaultModel
const defaultModelNotConfigured = !textGenerationDefaultModel && !embeddingsDefaultModel && !speech2textDefaultModel && !rerankDefaultModel && !ttsDefaultModel
const [configedProviders, notConfigedProviders] = useMemo(() => { const [configedProviders, notConfigedProviders] = useMemo(() => {
const configedProviders: ModelProvider[] = [] const configedProviders: ModelProvider[] = []
const notConfigedProviders: ModelProvider[] = [] const notConfigedProviders: ModelProvider[] = []
embeddingsDefaultModel={embeddingsDefaultModel} embeddingsDefaultModel={embeddingsDefaultModel}
rerankDefaultModel={rerankDefaultModel} rerankDefaultModel={rerankDefaultModel}
speech2textDefaultModel={speech2textDefaultModel} speech2textDefaultModel={speech2textDefaultModel}
ttsDefaultModel={ttsDefaultModel}
/> />
</div> </div>
{ {

+ 35
- 5
web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx View File

embeddingsDefaultModel: DefaultModelResponse | undefined embeddingsDefaultModel: DefaultModelResponse | undefined
rerankDefaultModel: DefaultModelResponse | undefined rerankDefaultModel: DefaultModelResponse | undefined
speech2textDefaultModel: DefaultModelResponse | undefined speech2textDefaultModel: DefaultModelResponse | undefined
ttsDefaultModel: DefaultModelResponse | undefined
} }
const SystemModel: FC<SystemModelSelectorProps> = ({ const SystemModel: FC<SystemModelSelectorProps> = ({
textGenerationDefaultModel, textGenerationDefaultModel,
embeddingsDefaultModel, embeddingsDefaultModel,
rerankDefaultModel, rerankDefaultModel,
speech2textDefaultModel, speech2textDefaultModel,
ttsDefaultModel,
}) => { }) => {
const { t } = useTranslation() const { t } = useTranslation()
const { notify } = useToastContext() const { notify } = useToastContext()
const { data: embeddingModelList } = useModelList(2) const { data: embeddingModelList } = useModelList(2)
const { data: rerankModelList } = useModelList(3) const { data: rerankModelList } = useModelList(3)
const { data: speech2textModelList } = useModelList(4) const { data: speech2textModelList } = useModelList(4)
const { data: ttsModelList } = useModelList(5)
const [changedModelTypes, setChangedModelTypes] = useState<ModelTypeEnum[]>([]) const [changedModelTypes, setChangedModelTypes] = useState<ModelTypeEnum[]>([])
const [currentTextGenerationDefaultModel, changeCurrentTextGenerationDefaultModel] = useSystemDefaultModelAndModelList(textGenerationDefaultModel, textGenerationModelList) const [currentTextGenerationDefaultModel, changeCurrentTextGenerationDefaultModel] = useSystemDefaultModelAndModelList(textGenerationDefaultModel, textGenerationModelList)
const [currentEmbeddingsDefaultModel, changeCurrentEmbeddingsDefaultModel] = useSystemDefaultModelAndModelList(embeddingsDefaultModel, embeddingModelList) const [currentEmbeddingsDefaultModel, changeCurrentEmbeddingsDefaultModel] = useSystemDefaultModelAndModelList(embeddingsDefaultModel, embeddingModelList)
const [currentRerankDefaultModel, changeCurrentRerankDefaultModel] = useSystemDefaultModelAndModelList(rerankDefaultModel, rerankModelList) const [currentRerankDefaultModel, changeCurrentRerankDefaultModel] = useSystemDefaultModelAndModelList(rerankDefaultModel, rerankModelList)
const [currentSpeech2textDefaultModel, changeCurrentSpeech2textDefaultModel] = useSystemDefaultModelAndModelList(speech2textDefaultModel, speech2textModelList) const [currentSpeech2textDefaultModel, changeCurrentSpeech2textDefaultModel] = useSystemDefaultModelAndModelList(speech2textDefaultModel, speech2textModelList)
const [currentTTSDefaultModel, changeCurrentTTSDefaultModel] = useSystemDefaultModelAndModelList(ttsDefaultModel, ttsModelList)
const [open, setOpen] = useState(false) const [open, setOpen] = useState(false)


const getCurrentDefaultModelByModelType = (modelType: ModelTypeEnum) => { const getCurrentDefaultModelByModelType = (modelType: ModelTypeEnum) => {
return currentRerankDefaultModel return currentRerankDefaultModel
else if (modelType === ModelTypeEnum.speech2text) else if (modelType === ModelTypeEnum.speech2text)
return currentSpeech2textDefaultModel return currentSpeech2textDefaultModel
else if (modelType === ModelTypeEnum.tts)
return currentTTSDefaultModel


return undefined return undefined
} }
changeCurrentRerankDefaultModel(model) changeCurrentRerankDefaultModel(model)
else if (modelType === ModelTypeEnum.speech2text) else if (modelType === ModelTypeEnum.speech2text)
changeCurrentSpeech2textDefaultModel(model) changeCurrentSpeech2textDefaultModel(model)
else if (modelType === ModelTypeEnum.tts)
changeCurrentTTSDefaultModel(model)


if (!changedModelTypes.includes(modelType)) if (!changedModelTypes.includes(modelType))
setChangedModelTypes([...changedModelTypes, modelType]) setChangedModelTypes([...changedModelTypes, modelType])
const res = await updateDefaultModel({ const res = await updateDefaultModel({
url: '/workspaces/current/default-model', url: '/workspaces/current/default-model',
body: { body: {
model_settings: [ModelTypeEnum.textGeneration, ModelTypeEnum.textEmbedding, ModelTypeEnum.rerank, ModelTypeEnum.speech2text].map((modelType) => {
model_settings: [ModelTypeEnum.textGeneration, ModelTypeEnum.textEmbedding, ModelTypeEnum.rerank, ModelTypeEnum.speech2text, ModelTypeEnum.tts].map((modelType) => {
return { return {
model_type: modelType, model_type: modelType,
provider: getCurrentDefaultModelByModelType(modelType)?.provider, provider: getCurrentDefaultModelByModelType(modelType)?.provider,
updateModelList(modelType) updateModelList(modelType)
else if (modelType === ModelTypeEnum.speech2text) else if (modelType === ModelTypeEnum.speech2text)
updateModelList(modelType) updateModelList(modelType)
else if (modelType === ModelTypeEnum.tts)
updateModelList(modelType)
}) })
} }
} }
<div className='w-[261px] text-gray-500'>{t('common.modelProvider.systemReasoningModel.tip')}</div> <div className='w-[261px] text-gray-500'>{t('common.modelProvider.systemReasoningModel.tip')}</div>
} }
> >
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
</Tooltip> </Tooltip>
</div> </div>
<div> <div>
<div className='w-[261px] text-gray-500'>{t('common.modelProvider.embeddingModel.tip')}</div> <div className='w-[261px] text-gray-500'>{t('common.modelProvider.embeddingModel.tip')}</div>
} }
> >
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
</Tooltip> </Tooltip>
</div> </div>
<div> <div>
<div className='w-[261px] text-gray-500'>{t('common.modelProvider.rerankModel.tip')}</div> <div className='w-[261px] text-gray-500'>{t('common.modelProvider.rerankModel.tip')}</div>
} }
> >
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
</Tooltip> </Tooltip>
</div> </div>
<div> <div>
<div className='w-[261px] text-gray-500'>{t('common.modelProvider.speechToTextModel.tip')}</div> <div className='w-[261px] text-gray-500'>{t('common.modelProvider.speechToTextModel.tip')}</div>
} }
> >
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400' />
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
</Tooltip> </Tooltip>
</div> </div>
<div> <div>
/> />
</div> </div>
</div> </div>
<div className='px-6 py-1'>
<div className='flex items-center h-8 text-[13px] font-medium text-gray-900'>
{t('common.modelProvider.ttsModel.key')}
<Tooltip
selector='model-page-system-tts-model-tip'
htmlContent={
<div className='w-[261px] text-gray-500'>{t('common.modelProvider.ttsModel.tip')}</div>
}
>
<HelpCircle className='ml-0.5 w-[14px] h-[14px] text-gray-400'/>
</Tooltip>
</div>
<div>
<ModelSelector
defaultModel={currentTTSDefaultModel}
modelList={ttsModelList}
onSelect={model => handleChangeDefaultModel(ModelTypeEnum.tts, model)}
/>
</div>
</div>
<div className='flex items-center justify-end px-6 py-4'> <div className='flex items-center justify-end px-6 py-4'>
<Button <Button
className='mr-2 !h-8 !text-[13px]' className='mr-2 !h-8 !text-[13px]'

+ 15
- 4
web/app/components/share/chat/index.tsx View File

updateFeedback, updateFeedback,
} from '@/service/share' } from '@/service/share'
import type { AppMeta, ConversationItem, SiteInfo } from '@/models/share' import type { AppMeta, ConversationItem, SiteInfo } from '@/models/share'
import type { PromptConfig, SuggestedQuestionsAfterAnswerConfig } from '@/models/debug'

import type {
CitationConfig,
PromptConfig,
SpeechToTextConfig,
SuggestedQuestionsAfterAnswerConfig,
TextToSpeechConfig,
} from '@/models/debug'
import type { Feedbacktype, IChatItem } from '@/app/components/app/chat/type' import type { Feedbacktype, IChatItem } from '@/app/components/app/chat/type'
import Chat from '@/app/components/app/chat' import Chat from '@/app/components/app/chat'
import { changeLanguage } from '@/i18n/i18next-config' import { changeLanguage } from '@/i18n/i18next-config'
} }


const [suggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null) const [suggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
const [speechToTextConfig, setSpeechToTextConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
const [citationConfig, setCitationConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
const [speechToTextConfig, setSpeechToTextConfig] = useState<SpeechToTextConfig | null>(null)
const [textToSpeechConfig, setTextToSpeechConfig] = useState<TextToSpeechConfig | null>(null)
const [citationConfig, setCitationConfig] = useState<CitationConfig | null>(null)


const [conversationIdChangeBecauseOfNew, setConversationIdChangeBecauseOfNew, getConversationIdChangeBecauseOfNew] = useGetState(false) const [conversationIdChangeBecauseOfNew, setConversationIdChangeBecauseOfNew, getConversationIdChangeBecauseOfNew] = useGetState(false)
const [isChatStarted, { setTrue: setChatStarted, setFalse: setChatNotStarted }] = useBoolean(false) const [isChatStarted, { setTrue: setChatStarted, setFalse: setChatNotStarted }] = useBoolean(false)
const caculatedPromptVariables = inputs || currInputs || null const caculatedPromptVariables = inputs || currInputs || null
if (caculatedIntroduction && caculatedPromptVariables) if (caculatedIntroduction && caculatedPromptVariables)
caculatedIntroduction = replaceStringWithValues(caculatedIntroduction, promptConfig?.prompt_variables || [], caculatedPromptVariables) caculatedIntroduction = replaceStringWithValues(caculatedIntroduction, promptConfig?.prompt_variables || [], caculatedPromptVariables)

const openstatement = { const openstatement = {
id: `${Date.now()}`, id: `${Date.now()}`,
content: caculatedIntroduction, content: caculatedIntroduction,
const isNotNewConversation = allConversations.some(item => item.id === _conversationId) const isNotNewConversation = allConversations.some(item => item.id === _conversationId)
setAllConversationList(allConversations) setAllConversationList(allConversations)
// fetch new conversation info // fetch new conversation info
const { user_input_form, opening_statement: introduction, suggested_questions, suggested_questions_after_answer, speech_to_text, retriever_resource, file_upload, sensitive_word_avoidance }: any = appParams
const { user_input_form, opening_statement: introduction, suggested_questions, suggested_questions_after_answer, speech_to_text, text_to_speech, retriever_resource, file_upload, sensitive_word_avoidance }: any = appParams
setVisionConfig({ setVisionConfig({
...file_upload.image, ...file_upload.image,
image_file_size_limit: appParams?.system_parameters?.image_file_size_limit, image_file_size_limit: appParams?.system_parameters?.image_file_size_limit,
} as PromptConfig) } as PromptConfig)
setSuggestedQuestionsAfterAnswerConfig(suggested_questions_after_answer) setSuggestedQuestionsAfterAnswerConfig(suggested_questions_after_answer)
setSpeechToTextConfig(speech_to_text) setSpeechToTextConfig(speech_to_text)
setTextToSpeechConfig(text_to_speech)
setCitationConfig(retriever_resource) setCitationConfig(retriever_resource)


// setConversationList(conversations as ConversationItem[]) // setConversationList(conversations as ConversationItem[])
isShowSuggestion={doShowSuggestion} isShowSuggestion={doShowSuggestion}
suggestionList={suggestedQuestions} suggestionList={suggestedQuestions}
isShowSpeechToText={speechToTextConfig?.enabled} isShowSpeechToText={speechToTextConfig?.enabled}
isShowTextToSpeech={textToSpeechConfig?.enabled}
isShowCitation={citationConfig?.enabled && isInstalledApp} isShowCitation={citationConfig?.enabled && isInstalledApp}
visionConfig={{ visionConfig={{
...visionConfig, ...visionConfig,

+ 7
- 1
web/app/components/share/chatbot/index.tsx View File

} }
const [suggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null) const [suggestedQuestionsAfterAnswerConfig, setSuggestedQuestionsAfterAnswerConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
const [speechToTextConfig, setSpeechToTextConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null) const [speechToTextConfig, setSpeechToTextConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
const [textToSpeechConfig, setTextToSpeechConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)
const [citationConfig, setCitationConfig] = useState<SuggestedQuestionsAfterAnswerConfig | null>(null)


const [conversationIdChangeBecauseOfNew, setConversationIdChangeBecauseOfNew, getConversationIdChangeBecauseOfNew] = useGetState(false) const [conversationIdChangeBecauseOfNew, setConversationIdChangeBecauseOfNew, getConversationIdChangeBecauseOfNew] = useGetState(false)
const [isChatStarted, { setTrue: setChatStarted, setFalse: setChatNotStarted }] = useBoolean(false) const [isChatStarted, { setTrue: setChatStarted, setFalse: setChatNotStarted }] = useBoolean(false)
const isNotNewConversation = allConversations.some(item => item.id === _conversationId) const isNotNewConversation = allConversations.some(item => item.id === _conversationId)
setAllConversationList(allConversations) setAllConversationList(allConversations)
// fetch new conversation info // fetch new conversation info
const { user_input_form, opening_statement: introduction, suggested_questions_after_answer, speech_to_text, file_upload, sensitive_word_avoidance }: any = appParams
const { user_input_form, opening_statement: introduction, suggested_questions_after_answer, speech_to_text, text_to_speech, retriever_resource, file_upload, sensitive_word_avoidance }: any = appParams
setVisionConfig({ setVisionConfig({
...file_upload.image, ...file_upload.image,
image_file_size_limit: appParams?.system_parameters?.image_file_size_limit, image_file_size_limit: appParams?.system_parameters?.image_file_size_limit,
} as PromptConfig) } as PromptConfig)
setSuggestedQuestionsAfterAnswerConfig(suggested_questions_after_answer) setSuggestedQuestionsAfterAnswerConfig(suggested_questions_after_answer)
setSpeechToTextConfig(speech_to_text) setSpeechToTextConfig(speech_to_text)
setTextToSpeechConfig(text_to_speech)
setCitationConfig(retriever_resource)


// setConversationList(conversations as ConversationItem[]) // setConversationList(conversations as ConversationItem[])


suggestionList={suggestQuestions} suggestionList={suggestQuestions}
displayScene='web' displayScene='web'
isShowSpeechToText={speechToTextConfig?.enabled} isShowSpeechToText={speechToTextConfig?.enabled}
isShowTextToSpeech={textToSpeechConfig?.enabled}
isShowCitation={citationConfig?.enabled && isInstalledApp}
answerIcon={<LogoAvatar className='relative shrink-0' />} answerIcon={<LogoAvatar className='relative shrink-0' />}
visionConfig={visionConfig} visionConfig={visionConfig}
/> />

+ 13
- 4
web/app/components/share/text-generation/index.tsx View File

import RunOnce from '@/app/components/share/text-generation/run-once' import RunOnce from '@/app/components/share/text-generation/run-once'
import { fetchSavedMessage as doFetchSavedMessage, fetchAppInfo, fetchAppParams, removeMessage, saveMessage } from '@/service/share' import { fetchSavedMessage as doFetchSavedMessage, fetchAppInfo, fetchAppParams, removeMessage, saveMessage } from '@/service/share'
import type { SiteInfo } from '@/models/share' import type { SiteInfo } from '@/models/share'
import type { MoreLikeThisConfig, PromptConfig, SavedMessage } from '@/models/debug'
import type {
MoreLikeThisConfig,
PromptConfig,
SavedMessage,
TextToSpeechConfig,
} from '@/models/debug'
import AppIcon from '@/app/components/base/app-icon' import AppIcon from '@/app/components/base/app-icon'
import { changeLanguage } from '@/i18n/i18next-config' import { changeLanguage } from '@/i18n/i18next-config'
import Loading from '@/app/components/base/loading' import Loading from '@/app/components/base/loading'
const [canReplaceLogo, setCanReplaceLogo] = useState<boolean>(false) const [canReplaceLogo, setCanReplaceLogo] = useState<boolean>(false)
const [promptConfig, setPromptConfig] = useState<PromptConfig | null>(null) const [promptConfig, setPromptConfig] = useState<PromptConfig | null>(null)
const [moreLikeThisConfig, setMoreLikeThisConfig] = useState<MoreLikeThisConfig | null>(null) const [moreLikeThisConfig, setMoreLikeThisConfig] = useState<MoreLikeThisConfig | null>(null)
const [textToSpeechConfig, setTextToSpeechConfig] = useState<TextToSpeechConfig | null>(null)


// save message // save message
const [savedMessages, setSavedMessages] = useState<SavedMessage[]>([]) const [savedMessages, setSavedMessages] = useState<SavedMessage[]>([])
return return
} }
} }
if (varItem.required === false)
if (!varItem.required)
return return


if (item[varIndex].trim() === '') { if (item[varIndex].trim() === '') {
setCanReplaceLogo(can_replace_logo) setCanReplaceLogo(can_replace_logo)
changeLanguage(siteInfo.default_language) changeLanguage(siteInfo.default_language)


const { user_input_form, more_like_this, file_upload, sensitive_word_avoidance }: any = appParams
const { user_input_form, more_like_this, file_upload, text_to_speech, sensitive_word_avoidance }: any = appParams
setVisionConfig({ setVisionConfig({
...file_upload.image, ...file_upload.image,
image_file_size_limit: appParams?.system_parameters?.image_file_size_limit, image_file_size_limit: appParams?.system_parameters?.image_file_size_limit,
prompt_variables, prompt_variables,
} as PromptConfig) } as PromptConfig)
setMoreLikeThisConfig(more_like_this) setMoreLikeThisConfig(more_like_this)
setTextToSpeechConfig(text_to_speech)
})() })()
}, []) }, [])


isCallBatchAPI={isCallBatchAPI} isCallBatchAPI={isCallBatchAPI}
isPC={isPC} isPC={isPC}
isMobile={isMobile} isMobile={isMobile}
isInstalledApp={!!isInstalledApp}
isInstalledApp={isInstalledApp}
installedAppInfo={installedAppInfo} installedAppInfo={installedAppInfo}
isError={task?.status === TaskStatus.failed} isError={task?.status === TaskStatus.failed}
promptConfig={promptConfig} promptConfig={promptConfig}
onCompleted={handleCompleted} onCompleted={handleCompleted}
visionConfig={visionConfig} visionConfig={visionConfig}
completionFiles={completionFiles} completionFiles={completionFiles}
isShowTextToSpeech={!!textToSpeechConfig?.enabled}
/>) />)


const renderBatchRes = () => { const renderBatchRes = () => {
{currTab === 'saved' && ( {currTab === 'saved' && (
<SavedItems <SavedItems
className='mt-4' className='mt-4'
isShowTextToSpeech={textToSpeechConfig?.enabled}
list={savedMessages} list={savedMessages}
onRemove={handleRemoveSavedMessage} onRemove={handleRemoveSavedMessage}
onStartCreateContent={() => setCurrTab('create')} onStartCreateContent={() => setCurrTab('create')}

+ 3
- 0
web/app/components/share/text-generation/result/index.tsx View File

isInstalledApp: boolean isInstalledApp: boolean
installedAppInfo?: InstalledApp installedAppInfo?: InstalledApp
isError: boolean isError: boolean
isShowTextToSpeech: boolean
promptConfig: PromptConfig | null promptConfig: PromptConfig | null
moreLikeThisEnabled: boolean moreLikeThisEnabled: boolean
inputs: Record<string, any> inputs: Record<string, any>
isInstalledApp, isInstalledApp,
installedAppInfo, installedAppInfo,
isError, isError,
isShowTextToSpeech,
promptConfig, promptConfig,
moreLikeThisEnabled, moreLikeThisEnabled,
inputs, inputs,
isLoading={isCallBatchAPI ? (!completionRes && isResponsing) : false} isLoading={isCallBatchAPI ? (!completionRes && isResponsing) : false}
taskId={isCallBatchAPI ? ((taskId as number) < 10 ? `0${taskId}` : `${taskId}`) : undefined} taskId={isCallBatchAPI ? ((taskId as number) < 10 ? `0${taskId}` : `${taskId}`) : undefined}
controlClearMoreLikeThis={controlClearMoreLikeThis} controlClearMoreLikeThis={controlClearMoreLikeThis}
isShowTextToSpeech={isShowTextToSpeech}
/> />
) )



+ 8
- 0
web/context/debug-configuration.ts View File

PromptItem, PromptItem,
SpeechToTextConfig, SpeechToTextConfig,
SuggestedQuestionsAfterAnswerConfig, SuggestedQuestionsAfterAnswerConfig,
TextToSpeechConfig,
} from '@/models/debug' } from '@/models/debug'
import type { ExternalDataTool } from '@/models/common' import type { ExternalDataTool } from '@/models/common'
import type { DataSet } from '@/models/datasets' import type { DataSet } from '@/models/datasets'
setSuggestedQuestionsAfterAnswerConfig: (suggestedQuestionsAfterAnswerConfig: SuggestedQuestionsAfterAnswerConfig) => void setSuggestedQuestionsAfterAnswerConfig: (suggestedQuestionsAfterAnswerConfig: SuggestedQuestionsAfterAnswerConfig) => void
speechToTextConfig: SpeechToTextConfig speechToTextConfig: SpeechToTextConfig
setSpeechToTextConfig: (speechToTextConfig: SpeechToTextConfig) => void setSpeechToTextConfig: (speechToTextConfig: SpeechToTextConfig) => void
textToSpeechConfig: TextToSpeechConfig
setTextToSpeechConfig: (textToSpeechConfig: TextToSpeechConfig) => void
citationConfig: CitationConfig citationConfig: CitationConfig
setCitationConfig: (citationConfig: CitationConfig) => void setCitationConfig: (citationConfig: CitationConfig) => void
annotationConfig: AnnotationReplyConfig annotationConfig: AnnotationReplyConfig
enabled: false, enabled: false,
}, },
setSpeechToTextConfig: () => { }, setSpeechToTextConfig: () => { },
textToSpeechConfig: {
enabled: false,
},
setTextToSpeechConfig: () => { },
citationConfig: { citationConfig: {
enabled: false, enabled: false,
}, },
more_like_this: null, more_like_this: null,
suggested_questions_after_answer: null, suggested_questions_after_answer: null,
speech_to_text: null, speech_to_text: null,
text_to_speech: null,
retriever_resource: null, retriever_resource: null,
sensitive_word_avoidance: null, sensitive_word_avoidance: null,
dataSets: [], dataSets: [],

+ 3
- 0
web/i18n/lang/app-api.en.ts View File

ok: 'In Service', ok: 'In Service',
copy: 'Copy', copy: 'Copy',
copied: 'Copied', copied: 'Copied',
play: 'Play',
pause: 'Pause',
playing: 'Playing',
merMaind: { merMaind: {
rerender: 'Redo Rerender', rerender: 'Redo Rerender',
}, },

+ 4
- 1
web/i18n/lang/app-api.zh.ts View File

disabled: '已停用', disabled: '已停用',
ok: '运行中', ok: '运行中',
copy: '复制', copy: '复制',
copied: '已复制',
play: '播放',
pause: '暂停',
playing: '播放中',
merMaind: { merMaind: {
rerender: '重新渲染', rerender: '重新渲染',
}, },
copied: '已复制',
never: '从未', never: '从未',
apiKeyModal: { apiKeyModal: {
apiSecretKey: 'API 密钥', apiSecretKey: 'API 密钥',

+ 5
- 0
web/i18n/lang/app-debug.en.ts View File

description: 'Once enabled, you can use voice input.', description: 'Once enabled, you can use voice input.',
resDes: 'Voice input is enabled', resDes: 'Voice input is enabled',
}, },
textToSpeech: {
title: 'Text to Speech',
description: 'Once enabled, text can be converted to speech.',
resDes: 'Text to Audio is enabled',
},
citation: { citation: {
title: 'Citations and Attributions', title: 'Citations and Attributions',
description: 'Once enabled, show source document and attributed section of the generated content.', description: 'Once enabled, show source document and attributed section of the generated content.',

+ 5
- 0
web/i18n/lang/app-debug.zh.ts View File

description: '启用后,您可以使用语音输入。', description: '启用后,您可以使用语音输入。',
resDes: '语音输入已启用', resDes: '语音输入已启用',
}, },
textToSpeech: {
title: '文字转语音',
description: '启用后,文本可以转换成语音。',
resDes: '文本转音频已启用',
},
citation: { citation: {
title: '引用和归属', title: '引用和归属',
description: '启用后,显示源文档和生成内容的归属部分。', description: '启用后,显示源文档和生成内容的归属部分。',

+ 4
- 0
web/i18n/lang/common.en.ts View File

key: 'Speech-to-Text Model', key: 'Speech-to-Text Model',
tip: 'Set the default model for speech-to-text input in conversation.', tip: 'Set the default model for speech-to-text input in conversation.',
}, },
ttsModel: {
key: 'Speech-to-Text Model',
tip: 'Set the default model for speech-to-text input in conversation.',
},
rerankModel: { rerankModel: {
key: 'Rerank Model', key: 'Rerank Model',
tip: 'Rerank model will reorder the candidate document list based on the semantic match with user query, improving the results of semantic ranking', tip: 'Rerank model will reorder the candidate document list based on the semantic match with user query, improving the results of semantic ranking',

+ 4
- 0
web/i18n/lang/common.zh.ts View File

key: '语音转文本模型', key: '语音转文本模型',
tip: '设置对话中语音转文字输入的默认使用模型。', tip: '设置对话中语音转文字输入的默认使用模型。',
}, },
ttsModel: {
key: '文本转语音模型',
tip: '设置对话中文字转语音输出的默认使用模型。',
},
rerankModel: { rerankModel: {
key: 'Rerank 模型', key: 'Rerank 模型',
tip: '重排序模型将根据候选文档列表与用户问题语义匹配度进行重新排序,从而改进语义排序的结果', tip: '重排序模型将根据候选文档列表与用户问题语义匹配度进行重新排序,从而改进语义排序的结果',

+ 3
- 0
web/models/debug.ts View File



export type SpeechToTextConfig = MoreLikeThisConfig export type SpeechToTextConfig = MoreLikeThisConfig


export type TextToSpeechConfig = MoreLikeThisConfig

export type CitationConfig = MoreLikeThisConfig export type CitationConfig = MoreLikeThisConfig


export type AnnotationReplyConfig = { export type AnnotationReplyConfig = {
more_like_this: MoreLikeThisConfig | null more_like_this: MoreLikeThisConfig | null
suggested_questions_after_answer: SuggestedQuestionsAfterAnswerConfig | null suggested_questions_after_answer: SuggestedQuestionsAfterAnswerConfig | null
speech_to_text: SpeechToTextConfig | null speech_to_text: SpeechToTextConfig | null
text_to_speech: TextToSpeechConfig | null
retriever_resource: RetrieverResourceConfig | null retriever_resource: RetrieverResourceConfig | null
sensitive_word_avoidance: ModerationConfig | null sensitive_word_avoidance: ModerationConfig | null
dataSets: any[] dataSets: any[]

+ 4
- 0
web/service/share.ts View File

return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ text: string }> return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ text: string }>
} }


export const textToAudio = (url: string, isPublicAPI: boolean, body: FormData) => {
return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ data: string }>
}

export const fetchAccessToken = async (appCode: string) => { export const fetchAccessToken = async (appCode: string) => {
const headers = new Headers() const headers = new Headers()
headers.append('X-App-Code', appCode) headers.append('X-App-Code', appCode)

+ 3
- 0
web/types/app.ts View File

speech_to_text: { speech_to_text: {
enabled: boolean enabled: boolean
} }
text_to_speech: {
enabled: boolean
}
retriever_resource: { retriever_resource: {
enabled: boolean enabled: boolean
} }

Loading…
Cancel
Save