Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>tags/0.5.6
| @@ -53,7 +53,7 @@ class TTSModel(AIModel): | |||
| """ | |||
| raise NotImplementedError | |||
| def get_tts_model_voices(self, model: str, credentials: dict, language: str) -> list: | |||
| def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list: | |||
| """ | |||
| Get voice for given tts model voices | |||
| @@ -66,7 +66,10 @@ class TTSModel(AIModel): | |||
| if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties: | |||
| voices = model_schema.model_properties[ModelPropertyKey.VOICES] | |||
| return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')] | |||
| if language: | |||
| return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')] | |||
| else: | |||
| return [{'name': d['name'], 'value': d['mode']} for d in voices] | |||
| def _get_model_default_voice(self, model: str, credentials: dict) -> any: | |||
| """ | |||
| @@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): | |||
| """ | |||
| # transform credentials to kwargs for model instance | |||
| credentials_kwargs = self._to_credential_kwargs(credentials) | |||
| if not voice: | |||
| if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials): | |||
| voice = self._get_model_default_voice(model, credentials) | |||
| word_limit = self._get_model_word_limit(model, credentials) | |||
| audio_type = self._get_model_audio_type(model, credentials) | |||
| @@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel): | |||
| :return: text translated to audio file | |||
| """ | |||
| audio_type = self._get_model_audio_type(model, credentials) | |||
| if not voice: | |||
| if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials): | |||
| voice = self._get_model_default_voice(model, credentials) | |||
| if streaming: | |||
| return Response(stream_with_context(self._tts_invoke_streaming(model=model, | |||
| @@ -3,7 +3,7 @@ import type { FC } from 'react' | |||
| import { memo, useState } from 'react' | |||
| import { useTranslation } from 'react-i18next' | |||
| import cn from 'classnames' | |||
| import ParamConfigContent from './param-config-content' | |||
| import VoiceParamConfig from './param-config-content' | |||
| import { Settings01 } from '@/app/components/base/icons/src/vender/line/general' | |||
| import { | |||
| PortalToFollowElem, | |||
| @@ -27,12 +27,12 @@ const ParamsConfig: FC = () => { | |||
| <PortalToFollowElemTrigger onClick={() => setOpen(v => !v)}> | |||
| <div className={cn('flex items-center rounded-md h-7 px-3 space-x-1 text-gray-700 cursor-pointer hover:bg-gray-200', open && 'bg-gray-200')}> | |||
| <Settings01 className='w-3.5 h-3.5 ' /> | |||
| <div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.vision.settings')}</div> | |||
| <div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.voice.settings')}</div> | |||
| </div> | |||
| </PortalToFollowElemTrigger> | |||
| <PortalToFollowElemContent style={{ zIndex: 50 }}> | |||
| <div className='w-80 sm:w-[412px] p-4 bg-white rounded-lg border-[0.5px] border-gray-200 shadow-lg space-y-3'> | |||
| <ParamConfigContent /> | |||
| <VoiceParamConfig /> | |||
| </div> | |||
| </PortalToFollowElemContent> | |||
| </PortalToFollowElem> | |||
| @@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => { | |||
| const languageItem = languages.find(item => item.value === textToSpeechConfig.language) | |||
| const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select') | |||
| const voiceItems = useSWR({ url: `/apps/${appId}/text-to-audio/voices?language=${languageItem ? languageItem.value : 'en-US'}` }, fetchAppVoices).data | |||
| const language = languageItem?.value | |||
| const voiceItems = useSWR({ appId, language }, fetchAppVoices).data | |||
| const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice) | |||
| const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select') | |||
| @@ -1,17 +1,27 @@ | |||
| 'use client' | |||
| import useSWR from 'swr' | |||
| import React, { type FC } from 'react' | |||
| import { useTranslation } from 'react-i18next' | |||
| import { useContext } from 'use-context-selector' | |||
| import { usePathname } from 'next/navigation' | |||
| import Panel from '@/app/components/app/configuration/base/feature-panel' | |||
| import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices' | |||
| import ConfigContext from '@/context/debug-configuration' | |||
| import { languages } from '@/utils/language' | |||
| import { fetchAppVoices } from '@/service/apps' | |||
| const TextToSpeech: FC = () => { | |||
| const { t } = useTranslation() | |||
| const { | |||
| textToSpeechConfig, | |||
| } = useContext(ConfigContext) | |||
| const pathname = usePathname() | |||
| const matched = pathname.match(/\/app\/([^/]+)/) | |||
| const appId = (matched?.length && matched[1]) ? matched[1] : '' | |||
| const language = textToSpeechConfig.language | |||
| const voiceItems = useSWR({ appId, language }, fetchAppVoices).data | |||
| const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice) | |||
| return ( | |||
| <Panel | |||
| title={ | |||
| @@ -22,7 +32,7 @@ const TextToSpeech: FC = () => { | |||
| headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />} | |||
| headerRight={ | |||
| <div className='text-xs text-gray-500'> | |||
| {languages.find(i => i.value === textToSpeechConfig.language)?.name} {textToSpeechConfig.voice} | |||
| {languages.find(i => i.value === textToSpeechConfig.language)?.name} - {voiceItem?.name ?? t('appDebug.voice.defaultDisplay')} | |||
| </div> | |||
| } | |||
| noBodySpacing | |||
| @@ -300,6 +300,7 @@ const translation = { | |||
| }, | |||
| voice: { | |||
| name: 'Voice', | |||
| defaultDisplay: 'Default Voice', | |||
| description: 'Text to speech voice Settings', | |||
| settings: 'Settings', | |||
| voiceSettings: { | |||
| @@ -300,6 +300,7 @@ const translation = { | |||
| }, | |||
| voice: { | |||
| name: 'voz', | |||
| defaultDisplay: 'Voz padrão', | |||
| description: 'Texto para configurações de timbre de voz', | |||
| settings: 'As configurações', | |||
| voiceSettings: { | |||
| @@ -296,6 +296,7 @@ const translation = { | |||
| }, | |||
| voice: { | |||
| name: '音色', | |||
| defaultDisplay: '缺省音色', | |||
| description: '文本转语音音色设置', | |||
| settings: '设置', | |||
| voiceSettings: { | |||
| @@ -94,6 +94,6 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u | |||
| return post<GenerationIntroductionResponse>(url, { body }) | |||
| } | |||
| export const fetchAppVoices: Fetcher<AppVoicesListResponse, { url: string }> = ({ url }) => { | |||
| return get<AppVoicesListResponse>(url) | |||
| export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => { | |||
| return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`) | |||
| } | |||