| API_TOOL_DEFAULT_CONNECT_TIMEOUT=10 | API_TOOL_DEFAULT_CONNECT_TIMEOUT=10 | ||||
| API_TOOL_DEFAULT_READ_TIMEOUT=60 | API_TOOL_DEFAULT_READ_TIMEOUT=60 | ||||
| # ------------------------------- | |||||
| # Datasource Configuration | |||||
| # -------------------------------- | |||||
| ENABLE_WEBSITE_JINAREADER=true | |||||
| ENABLE_WEBSITE_FIRECRAWL=true | |||||
| ENABLE_WEBSITE_WATERCRAWL=true | |||||
| # ------------------------------ | # ------------------------------ | ||||
| # Database Configuration | # Database Configuration |
| MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} | MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} | ||||
| MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} | MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} | ||||
| MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} | MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} | ||||
| ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} | |||||
| ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} | |||||
| ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} | |||||
| # The postgres database. | # The postgres database. | ||||
| db: | db: | ||||
| image: postgres:15-alpine | image: postgres:15-alpine |
| CELERY_MIN_WORKERS: ${CELERY_MIN_WORKERS:-} | CELERY_MIN_WORKERS: ${CELERY_MIN_WORKERS:-} | ||||
| API_TOOL_DEFAULT_CONNECT_TIMEOUT: ${API_TOOL_DEFAULT_CONNECT_TIMEOUT:-10} | API_TOOL_DEFAULT_CONNECT_TIMEOUT: ${API_TOOL_DEFAULT_CONNECT_TIMEOUT:-10} | ||||
| API_TOOL_DEFAULT_READ_TIMEOUT: ${API_TOOL_DEFAULT_READ_TIMEOUT:-60} | API_TOOL_DEFAULT_READ_TIMEOUT: ${API_TOOL_DEFAULT_READ_TIMEOUT:-60} | ||||
| ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} | |||||
| ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} | |||||
| ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} | |||||
| DB_USERNAME: ${DB_USERNAME:-postgres} | DB_USERNAME: ${DB_USERNAME:-postgres} | ||||
| DB_PASSWORD: ${DB_PASSWORD:-difyai123456} | DB_PASSWORD: ${DB_PASSWORD:-difyai123456} | ||||
| DB_HOST: ${DB_HOST:-db} | DB_HOST: ${DB_HOST:-db} | ||||
| MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} | MAX_TOOLS_NUM: ${MAX_TOOLS_NUM:-10} | ||||
| MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} | MAX_PARALLEL_LIMIT: ${MAX_PARALLEL_LIMIT:-10} | ||||
| MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} | MAX_ITERATIONS_NUM: ${MAX_ITERATIONS_NUM:-5} | ||||
| ENABLE_WEBSITE_JINAREADER: ${ENABLE_WEBSITE_JINAREADER:-true} | |||||
| ENABLE_WEBSITE_FIRECRAWL: ${ENABLE_WEBSITE_FIRECRAWL:-true} | |||||
| ENABLE_WEBSITE_WATERCRAWL: ${ENABLE_WEBSITE_WATERCRAWL:-true} | |||||
| # The postgres database. | # The postgres database. | ||||
| db: | db: | ||||
| image: postgres:15-alpine | image: postgres:15-alpine |
| # The maximum number of iterations for agent setting | # The maximum number of iterations for agent setting | ||||
| NEXT_PUBLIC_MAX_ITERATIONS_NUM=5 | NEXT_PUBLIC_MAX_ITERATIONS_NUM=5 | ||||
| NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=true | |||||
| NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=true | |||||
| NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=true | |||||
| import VectorSpaceFull from '@/app/components/billing/vector-space-full' | import VectorSpaceFull from '@/app/components/billing/vector-space-full' | ||||
| import classNames from '@/utils/classnames' | import classNames from '@/utils/classnames' | ||||
| import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' | import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' | ||||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||||
| type IStepOneProps = { | type IStepOneProps = { | ||||
| datasetId?: string | datasetId?: string | ||||
| dataSourceType?: DataSourceType | dataSourceType?: DataSourceType | ||||
| return true | return true | ||||
| if (files.some(file => !file.file.id)) | if (files.some(file => !file.file.id)) | ||||
| return true | return true | ||||
| if (isShowVectorSpaceFull) | |||||
| return true | |||||
| return false | |||||
| return isShowVectorSpaceFull | |||||
| }, [files, isShowVectorSpaceFull]) | }, [files, isShowVectorSpaceFull]) | ||||
| return ( | return ( | ||||
| {t('datasetCreation.stepOne.dataSourceType.notion')} | {t('datasetCreation.stepOne.dataSourceType.notion')} | ||||
| </span> | </span> | ||||
| </div> | </div> | ||||
| <div | |||||
| {(ENABLE_WEBSITE_FIRECRAWL || ENABLE_WEBSITE_JINAREADER || ENABLE_WEBSITE_WATERCRAWL) && ( | |||||
| <div | |||||
| className={cn( | className={cn( | ||||
| s.dataSourceItem, | s.dataSourceItem, | ||||
| 'system-sm-medium', | 'system-sm-medium', | ||||
| dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled, | dataSourceTypeDisable && dataSourceType !== DataSourceType.WEB && s.disabled, | ||||
| )} | )} | ||||
| onClick={() => changeType(DataSourceType.WEB)} | onClick={() => changeType(DataSourceType.WEB)} | ||||
| > | |||||
| > | |||||
| <span className={cn(s.datasetIcon, s.web)} /> | <span className={cn(s.datasetIcon, s.web)} /> | ||||
| <span | <span | ||||
| title={t('datasetCreation.stepOne.dataSourceType.web')} | title={t('datasetCreation.stepOne.dataSourceType.web')} | ||||
| > | > | ||||
| {t('datasetCreation.stepOne.dataSourceType.web')} | {t('datasetCreation.stepOne.dataSourceType.web')} | ||||
| </span> | </span> | ||||
| </div> | |||||
| </div> | |||||
| )} | |||||
| </div> | </div> | ||||
| ) | ) | ||||
| } | } |
| import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' | import type { CrawlOptions, CrawlResultItem } from '@/models/datasets' | ||||
| import { fetchDataSources } from '@/service/datasets' | import { fetchDataSources } from '@/service/datasets' | ||||
| import { type DataSourceItem, DataSourceProvider } from '@/models/common' | import { type DataSourceItem, DataSourceProvider } from '@/models/common' | ||||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||||
| type Props = { | type Props = { | ||||
| onPreview: (payload: CrawlResultItem) => void | onPreview: (payload: CrawlResultItem) => void | ||||
| {t('datasetCreation.stepOne.website.chooseProvider')} | {t('datasetCreation.stepOne.website.chooseProvider')} | ||||
| </div> | </div> | ||||
| <div className="flex space-x-2"> | <div className="flex space-x-2"> | ||||
| <button | |||||
| {ENABLE_WEBSITE_JINAREADER && <button | |||||
| className={cn('flex items-center justify-center rounded-lg px-4 py-2', | className={cn('flex items-center justify-center rounded-lg px-4 py-2', | ||||
| selectedProvider === DataSourceProvider.jinaReader | selectedProvider === DataSourceProvider.jinaReader | ||||
| ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | ||||
| > | > | ||||
| <span className={cn(s.jinaLogo, 'mr-2')}/> | <span className={cn(s.jinaLogo, 'mr-2')}/> | ||||
| <span>Jina Reader</span> | <span>Jina Reader</span> | ||||
| </button> | |||||
| <button | |||||
| </button>} | |||||
| {ENABLE_WEBSITE_FIRECRAWL && <button | |||||
| className={cn('rounded-lg px-4 py-2', | className={cn('rounded-lg px-4 py-2', | ||||
| selectedProvider === DataSourceProvider.fireCrawl | selectedProvider === DataSourceProvider.fireCrawl | ||||
| ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | ||||
| onClick={() => setSelectedProvider(DataSourceProvider.fireCrawl)} | onClick={() => setSelectedProvider(DataSourceProvider.fireCrawl)} | ||||
| > | > | ||||
| 🔥 Firecrawl | 🔥 Firecrawl | ||||
| </button> | |||||
| <button | |||||
| </button>} | |||||
| {ENABLE_WEBSITE_WATERCRAWL && <button | |||||
| className={cn('flex items-center justify-center rounded-lg px-4 py-2', | className={cn('flex items-center justify-center rounded-lg px-4 py-2', | ||||
| selectedProvider === DataSourceProvider.waterCrawl | selectedProvider === DataSourceProvider.waterCrawl | ||||
| ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | ? 'system-sm-medium border-[1.5px] border-components-option-card-option-selected-border bg-components-option-card-option-selected-bg text-text-primary' | ||||
| > | > | ||||
| <span className={cn(s.watercrawlLogo, 'mr-2')}/> | <span className={cn(s.watercrawlLogo, 'mr-2')}/> | ||||
| <span>WaterCrawl</span> | <span>WaterCrawl</span> | ||||
| </button> | |||||
| </button>} | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| {source && selectedProvider === DataSourceProvider.fireCrawl && ( | {source && selectedProvider === DataSourceProvider.fireCrawl && ( |
| import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' | import { Icon3Dots } from '@/app/components/base/icons/src/vender/line/others' | ||||
| import Button from '@/app/components/base/button' | import Button from '@/app/components/base/button' | ||||
| import { DataSourceProvider } from '@/models/common' | import { DataSourceProvider } from '@/models/common' | ||||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||||
| const I18N_PREFIX = 'datasetCreation.stepOne.website' | const I18N_PREFIX = 'datasetCreation.stepOne.website' | ||||
| const NoData: FC<Props> = ({ | const NoData: FC<Props> = ({ | ||||
| onConfig, | onConfig, | ||||
| provider, | |||||
| }) => { | }) => { | ||||
| const { t } = useTranslation() | const { t } = useTranslation() | ||||
| const providerConfig = { | const providerConfig = { | ||||
| [DataSourceProvider.jinaReader]: { | |||||
| [DataSourceProvider.jinaReader]: ENABLE_WEBSITE_JINAREADER ? { | |||||
| emoji: <span className={s.jinaLogo} />, | emoji: <span className={s.jinaLogo} />, | ||||
| title: t(`${I18N_PREFIX}.jinaReaderNotConfigured`), | title: t(`${I18N_PREFIX}.jinaReaderNotConfigured`), | ||||
| description: t(`${I18N_PREFIX}.jinaReaderNotConfiguredDescription`), | description: t(`${I18N_PREFIX}.jinaReaderNotConfiguredDescription`), | ||||
| }, | |||||
| [DataSourceProvider.fireCrawl]: { | |||||
| } : null, | |||||
| [DataSourceProvider.fireCrawl]: ENABLE_WEBSITE_FIRECRAWL ? { | |||||
| emoji: '🔥', | emoji: '🔥', | ||||
| title: t(`${I18N_PREFIX}.fireCrawlNotConfigured`), | title: t(`${I18N_PREFIX}.fireCrawlNotConfigured`), | ||||
| description: t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`), | description: t(`${I18N_PREFIX}.fireCrawlNotConfiguredDescription`), | ||||
| }, | |||||
| [DataSourceProvider.waterCrawl]: { | |||||
| emoji: <span className={s.watercrawlLogo} />, | |||||
| } : null, | |||||
| [DataSourceProvider.waterCrawl]: ENABLE_WEBSITE_WATERCRAWL ? { | |||||
| emoji: '💧', | |||||
| title: t(`${I18N_PREFIX}.waterCrawlNotConfigured`), | title: t(`${I18N_PREFIX}.waterCrawlNotConfigured`), | ||||
| description: t(`${I18N_PREFIX}.waterCrawlNotConfiguredDescription`), | description: t(`${I18N_PREFIX}.waterCrawlNotConfiguredDescription`), | ||||
| }, | |||||
| } : null, | |||||
| } | } | ||||
| const currentProvider = providerConfig[provider] | |||||
| const currentProvider = Object.values(providerConfig).find(provider => provider !== null) || providerConfig[DataSourceProvider.jinaReader] | |||||
| if (!currentProvider) return null | |||||
| return ( | return ( | ||||
| <> | <> |
| import DataSourceWebsite from './data-source-website' | import DataSourceWebsite from './data-source-website' | ||||
| import { fetchDataSource } from '@/service/common' | import { fetchDataSource } from '@/service/common' | ||||
| import { DataSourceProvider } from '@/models/common' | import { DataSourceProvider } from '@/models/common' | ||||
| import { ENABLE_WEBSITE_FIRECRAWL, ENABLE_WEBSITE_JINAREADER, ENABLE_WEBSITE_WATERCRAWL } from '@/config' | |||||
| export default function DataSourcePage() { | export default function DataSourcePage() { | ||||
| const { data } = useSWR({ url: 'data-source/integrates' }, fetchDataSource) | const { data } = useSWR({ url: 'data-source/integrates' }, fetchDataSource) | ||||
| return ( | return ( | ||||
| <div className='mb-8'> | <div className='mb-8'> | ||||
| <DataSourceNotion workspaces={notionWorkspaces} /> | <DataSourceNotion workspaces={notionWorkspaces} /> | ||||
| <DataSourceWebsite provider={DataSourceProvider.jinaReader} /> | |||||
| <DataSourceWebsite provider={DataSourceProvider.fireCrawl} /> | |||||
| <DataSourceWebsite provider={DataSourceProvider.waterCrawl} /> | |||||
| {ENABLE_WEBSITE_JINAREADER && <DataSourceWebsite provider={DataSourceProvider.jinaReader} />} | |||||
| {ENABLE_WEBSITE_FIRECRAWL && <DataSourceWebsite provider={DataSourceProvider.fireCrawl} />} | |||||
| {ENABLE_WEBSITE_WATERCRAWL && <DataSourceWebsite provider={DataSourceProvider.waterCrawl} />} | |||||
| </div> | </div> | ||||
| ) | ) | ||||
| } | } |
| maxIterationsNum = Number.parseInt(globalThis.document.body.getAttribute('data-public-max-iterations-num') as string) | maxIterationsNum = Number.parseInt(globalThis.document.body.getAttribute('data-public-max-iterations-num') as string) | ||||
| export const MAX_ITERATIONS_NUM = maxIterationsNum | export const MAX_ITERATIONS_NUM = maxIterationsNum | ||||
| export const ENABLE_WEBSITE_JINAREADER = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER !== undefined | |||||
| ? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER === 'true' | |||||
| : true | |||||
| export const ENABLE_WEBSITE_FIRECRAWL = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL !== undefined | |||||
| ? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL === 'true' | |||||
| : true | |||||
| export const ENABLE_WEBSITE_WATERCRAWL = process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL !== undefined | |||||
| ? process.env.NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL === 'true' | |||||
| : true |
| export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE} | export NEXT_PUBLIC_TOP_K_MAX_VALUE=${TOP_K_MAX_VALUE} | ||||
| export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} | export NEXT_PUBLIC_INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH} | ||||
| export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM} | export NEXT_PUBLIC_MAX_TOOLS_NUM=${MAX_TOOLS_NUM} | ||||
| export NEXT_PUBLIC_ENABLE_WEBSITE_JINAREADER=${ENABLE_WEBSITE_JINAREADER:-true} | |||||
| export NEXT_PUBLIC_ENABLE_WEBSITE_FIRECRAWL=${ENABLE_WEBSITE_FIRECRAWL:-true} | |||||
| export NEXT_PUBLIC_ENABLE_WEBSITE_WATERCRAWL=${ENABLE_WEBSITE_WATERCRAWL:-true} | |||||
| pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon | pm2 start /app/web/server.js --name dify-web --cwd /app/web -i ${PM2_INSTANCES} --no-daemon |