| @@ -174,6 +174,12 @@ CELERY_MIN_WORKERS= | |||
| API_TOOL_DEFAULT_CONNECT_TIMEOUT=10 | |||
| API_TOOL_DEFAULT_READ_TIMEOUT=60 | |||
| # ------------------------------- | |||
| # Datasource Configuration | |||
| # -------------------------------- | |||
| ENABLE_WEBSITE_JINAREADER=true | |||
| ENABLE_WEBSITE_FIRECRAWL=true | |||
| ENABLE_WEBSITE_WATERCRAWL=true | |||
| # ------------------------------ | |||
| # Database Configuration | |||
| @@ -75,7 +75,9 @@ services: | |||
| MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} | |||
| MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} | |||
| MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} | |||
| ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} | |||
| ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} | |||
| ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} | |||
| # The postgres database. | |||
| db: | |||
| image: postgres:15-alpine | |||
| @@ -43,6 +43,9 @@ x-shared-env: &shared-api-worker-env | |||
| CELERY_MIN_WORKERS: ${CELERY_MIN_WORKERS:-} | |||
| API_TOOL_DEFAULT_CONNECT_TIMEOUT: ${API_TOOL_DEFAULT_CONNECT_TIMEOUT:-10} | |||
| API_TOOL_DEFAULT_READ_TIMEOUT: ${API_TOOL_DEFAULT_READ_TIMEOUT:-60} | |||
| ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} | |||
| ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} | |||
| ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} | |||
| DB_USERNAME: ${DB_USERNAME:-postgres} | |||
| DB_PASSWORD: ${DB_PASSWORD:-difyai123456} | |||
| DB_HOST: ${DB_HOST:-db} | |||
| @@ -543,7 +546,9 @@ services: | |||
| MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} | |||
| MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} | |||
| MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} | |||
| ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} | |||
| ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} | |||
| ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} | |||
| # The postgres database. | |||
| db: | |||
| image: postgres:15-alpine | |||
| @@ -49,3 +49,8 @@ NEXT_PUBLIC_MAX_PARALLEL_LIMIT=10 | |||
| # The maximum number of iterations for agent setting | |||
| NEXT_PUBLIC_MAX_ITERATIONS_NUM=5 | |||
| NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=true | |||
| NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=true | |||
| NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=true | |||
| @@ -20,7 +20,7 @@ import { useProviderContext } from '@/context/provider-context' | |||
| import VectorSpaceFull from '@/app/components/billing/vector-space-full' | |||
| import classNames from '@/utils/classnames' | |||
| import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' | |||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||
| type IStepOneProps = { | |||
| datasetId?: string | |||
| dataSourceType?: DataSourceType | |||
| @@ -126,9 +126,7 @@ const StepOne = ({ | |||
| return true | |||
| if (files.some(file => !file.file.id)) | |||
| return true | |||
| if (isShowVectorSpaceFull) | |||
| return true | |||
| return false | |||
| return isShowVectorSpaceFull | |||
| }, [files, isShowVectorSpaceFull]) | |||
| return ( | |||
| @@ -193,7 +191,8 @@ const StepOne = ({ | |||
| {t('datasetCreation.stepOne.dataSourceType.notion')} | |||
| </span> | |||
| </div> | |||
| <div | |||
| {(ENABLE_WEBSITE_FIRECRAWL || ENABLE_WEBSITE_JINAREADER || ENABLE_WEBSITE_WATERCRAWL) && ( | |||
| <div | |||
| className={cn( | |||
| s.dataSourceItem, | |||
| 'system-sm-medium', | |||
| @@ -201,7 +200,7 @@ const StepOne = ({ | |||
| dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled, | |||
| )} | |||
| onClick={() => changeType(DataSourceType.WEB)} | |||
| > | |||
| > | |||
| <span className={cn(s.datasetIcon, s.web)} /> | |||
| <span | |||
| title={t('datasetCreation.stepOne.dataSourceType.web')} | |||
| @@ -209,7 +208,8 @@ const StepOne = ({ | |||
| > | |||
| {t('datasetCreation.stepOne.dataSourceType.web')} | |||
| </span> | |||
| </div> | |||
| </div> | |||
| )} | |||
| </div> | |||
| ) | |||
| } | |||
| @@ -12,6 +12,7 @@ import { useModalContext } from '@/context/modal-context' | |||
| import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' | |||
| import { fetchDataSources } from '@/service/datasets' | |||
| import { type DataSourceItem, DataSourceProvider } from '@/models/common' | |||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||
| type Props = { | |||
| onPreview: (payload: CrawlResultItem) => void | |||
| @@ -84,7 +85,7 @@ const Website: FC<Props> = ({ | |||
| {t('datasetCreation.stepOne.website.chooseProvider')} | |||
| </div> | |||
| <div className="flex space-x-2"> | |||
| <button | |||
| {ENABLE_WEBSITE_JINAREADER && <button | |||
| className={cn('flex items-center justify-center rounded-lg px-4 py-2', | |||
| selectedProvider === DataSourceProvider.jinaReader | |||
| ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | |||
| @@ -95,8 +96,8 @@ const Website: FC<Props> = ({ | |||
| > | |||
| <span className={cn(s.jinaLogo, 'mr-2')}/> | |||
| <span>Jina Reader</span> | |||
| </button> | |||
| <button | |||
| </button>} | |||
| {ENABLE_WEBSITE_FIRECRAWL && <button | |||
| className={cn('rounded-lg px-4 py-2', | |||
| selectedProvider === DataSourceProvider.fireCrawl | |||
| ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | |||
| @@ -106,8 +107,8 @@ const Website: FC<Props> = ({ | |||
| onClick={() => setSelectedProvider(DataSourceProvider.fireCrawl)} | |||
| > | |||
| 🔥 Firecrawl | |||
| </button> | |||
| <button | |||
| </button>} | |||
| {ENABLE_WEBSITE_WATERCRAWL && <button | |||
| className={cn('flex items-center justify-center rounded-lg px-4 py-2', | |||
| selectedProvider === DataSourceProvider.waterCrawl | |||
| ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | |||
| @@ -118,7 +119,7 @@ const Website: FC<Props> = ({ | |||
| > | |||
| <span className={cn(s.watercrawlLogo, 'mr-2')}/> | |||
| <span>WaterCrawl</span> | |||
| </button> | |||
| </button>} | |||
| </div> | |||
| </div> | |||
| {source && selectedProvider === DataSourceProvider.fireCrawl && ( | |||
| @@ -6,6 +6,7 @@ import s from './index.module.css' | |||
| import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' | |||
| import Button from '@/app/components/base/button' | |||
| import { DataSourceProvider } from '@/models/common' | |||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||
| const I18N_PREFIX = 'datasetCreation.stepOne.website' | |||
| @@ -16,29 +17,30 @@ type Props = { | |||
| const NoData: FC<Props> = ({ | |||
| onConfig, | |||
| provider, | |||
| }) => { | |||
| const { t } = useTranslation() | |||
| const providerConfig = { | |||
| [DataSourceProvider.jinaReader]: { | |||
| [DataSourceProvider.jinaReader]: ENABLE_WEBSITE_JINAREADER ? { | |||
| emoji: <span className={s.jinaLogo} />, | |||
| title: t(`${I18N_PREFIX}.jinaReaderNotConfigured`), | |||
| description: t(`${I18N_PREFIX}.jinaReaderNotConfiguredDescription`), | |||
| }, | |||
| [DataSourceProvider.fireCrawl]: { | |||
| } : null, | |||
| [DataSourceProvider.fireCrawl]: ENABLE_WEBSITE_FIRECRAWL ? { | |||
| emoji: '🔥', | |||
| title: t(`${I18N_PREFIX}.fireCrawlNotConfigured`), | |||
| description: t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`), | |||
| }, | |||
| [DataSourceProvider.waterCrawl]: { | |||
| emoji: <span className={s.watercrawlLogo} />, | |||
| } : null, | |||
| [DataSourceProvider.waterCrawl]: ENABLE_WEBSITE_WATERCRAWL ? { | |||
| emoji: '💧', | |||
| title: t(`${I18N_PREFIX}.waterCrawlNotConfigured`), | |||
| description: t(`${I18N_PREFIX}.waterCrawlNotConfiguredDescription`), | |||
| }, | |||
| } : null, | |||
| } | |||
| const currentProvider = providerConfig[provider] | |||
| const currentProvider = Object.values(providerConfig).find(provider => provider !== null) || providerConfig[DataSourceProvider.jinaReader] | |||
| if (!currentProvider) return null | |||
| return ( | |||
| <> | |||
| @@ -3,6 +3,7 @@ import DataSourceNotion from './data-source-notion' | |||
| import DataSourceWebsite from './data-source-website' | |||
| import { fetchDataSource } from '@/service/common' | |||
| import { DataSourceProvider } from '@/models/common' | |||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||
| export default function DataSourcePage() { | |||
| const { data } = useSWR({ url: 'data-source/integrates' }, fetchDataSource) | |||
| @@ -11,9 +12,9 @@ export default function DataSourcePage() { | |||
| return ( | |||
| <div className='mb-8'> | |||
| <DataSourceNotion workspaces={notionWorkspaces} /> | |||
| <DataSourceWebsite provider={DataSourceProvider.jinaReader} /> | |||
| <DataSourceWebsite provider={DataSourceProvider.fireCrawl} /> | |||
| <DataSourceWebsite provider={DataSourceProvider.waterCrawl} /> | |||
| {ENABLE_WEBSITE_JINAREADER && <DataSourceWebsite provider={DataSourceProvider.jinaReader} />} | |||
| {ENABLE_WEBSITE_FIRECRAWL && <DataSourceWebsite provider={DataSourceProvider.fireCrawl} />} | |||
| {ENABLE_WEBSITE_WATERCRAWL && <DataSourceWebsite provider={DataSourceProvider.waterCrawl} />} | |||
| </div> | |||
| ) | |||
| } | |||
| @@ -302,3 +302,15 @@ else if (globalThis.document?.body?.getAttribute('data-public-max-iterations-num | |||
| maxIterationsNum = Number.parseInt(globalThis.document.body.getAttribute('data-public-max-iterations-num') as string) | |||
| export const MAX_ITERATIONS_NUM = maxIterationsNum | |||
| export const ENABLE_WEBSITE_JINAREADER = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER !== undefined | |||
| ? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER === 'true' | |||
| : true | |||
| export const ENABLE_WEBSITE_FIRECRAWL = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL !== undefined | |||
| ? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL === 'true' | |||
| : true | |||
| export const ENABLE_WEBSITE_WATERCRAWL = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL !== undefined | |||
| ? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL === 'true' | |||
| : true | |||
| @@ -28,5 +28,7 @@ export NEXT_PUBLIC_CSP_WHITELIST=${CSP_WHITELIST} | |||
| export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE} | |||
| export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} | |||
| export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM} | |||
| export NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=${ENABLE_WEBSITE_JINAREADER:-true} | |||
| export NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=${ENABLE_WEBSITE_FIRECRAWL:-true} | |||
| export NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=${ENABLE_WEBSITE_WATERCRAWL:-true} | |||
| pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon | |||