| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192 |
- # base stage
- FROM ubuntu:22.04 AS base
- USER root
- SHELL ["/bin/bash", "-c"]
-
- ARG NEED_MIRROR=0
- ARG LIGHTEN=0
- ENV LIGHTEN=${LIGHTEN}
-
- WORKDIR /ragflow
-
- # Copy models downloaded via download_deps.py
- RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
- RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
- tar --exclude='.*' -cf - \
- /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
- /huggingface.co/InfiniFlow/deepdoc \
- | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
- RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
- if [ "$LIGHTEN" != "1" ]; then \
- (tar -cf - \
- /huggingface.co/BAAI/bge-large-zh-v1.5 \
- /huggingface.co/BAAI/bge-reranker-v2-m3 \
- /huggingface.co/maidalun1020/bce-embedding-base_v1 \
- /huggingface.co/maidalun1020/bce-reranker-base_v1 \
- | tar -xf - --strip-components=2 -C /root/.ragflow) \
- fi
-
- # https://github.com/chrismattmann/tika-python
- # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
- RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
- cp -r /deps/nltk_data /root/ && \
- cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
- cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
-
- ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
- ENV DEBIAN_FRONTEND=noninteractive
-
- # Setup apt
- # Python package and implicit dependencies:
- # opencv-python: libglib2.0-0 libglx-mesa0 libgl1
- # aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
- # python-pptx: default-jdk tika-server-standard-3.0.0.jar
- # selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
- # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
- RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
- if [ "$NEED_MIRROR" == "1" ]; then \
- sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
- fi; \
- rm -f /etc/apt/apt.conf.d/docker-clean && \
- echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
- chmod 1777 /tmp && \
- apt update && \
- apt --no-install-recommends install -y ca-certificates && \
- apt update && \
- apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
- apt install -y pkg-config libicu-dev libgdiplus && \
- apt install -y default-jdk && \
- apt install -y libatk-bridge2.0-0 && \
- apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
- apt install -y python3-pip pipx nginx unzip curl wget git vim less
-
- RUN if [ "$NEED_MIRROR" == "1" ]; then \
- pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
- pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
- fi; \
- pipx install poetry; \
- if [ "$NEED_MIRROR" == "1" ]; then \
- pipx inject poetry poetry-plugin-pypi-mirror; \
- fi
-
- ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
- ENV PATH=/root/.local/bin:$PATH
- # Configure Poetry
- ENV POETRY_NO_INTERACTION=1
- ENV POETRY_VIRTUALENVS_IN_PROJECT=true
- ENV POETRY_VIRTUALENVS_CREATE=true
- ENV POETRY_REQUESTS_TIMEOUT=15
-
- # nodejs 12.22 on Ubuntu 22.04 is too old
- RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
- curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
- apt purge -y nodejs npm && \
- apt autoremove && \
- apt update && \
- apt install -y nodejs cargo
-
-
- # Add msssql ODBC driver
- # macOS ARM64 environment, install msodbcsql18.
- # general x86_64 environment, install msodbcsql17.
- RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
- curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
- curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
- apt update && \
- if [ -n "$ARCH" ] && [ "$ARCH" = "arm64" ]; then \
- # MacOS ARM64
- ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
- else \
- # (x86_64)
- ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
- fi || \
- { echo "Failed to install ODBC driver"; exit 1; }
-
-
-
- # Add dependencies of selenium
- RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
- unzip /chrome-linux64.zip && \
- mv chrome-linux64 /opt/chrome && \
- ln -s /opt/chrome/chrome /usr/local/bin/
- RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
- unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
- mv chromedriver /usr/local/bin/ && \
- rm -f /usr/bin/google-chrome
-
- # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
- # aspose-slides on linux/arm64 is unavailable
- RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
- if [ "$(uname -m)" = "x86_64" ]; then \
- dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
- elif [ "$(uname -m)" = "aarch64" ]; then \
- dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
- fi
-
-
- # builder stage
- FROM base AS builder
- USER root
-
- WORKDIR /ragflow
-
- # install dependencies from poetry.lock file
- COPY pyproject.toml poetry.toml poetry.lock ./
-
- RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \
- if [ "$NEED_MIRROR" == "1" ]; then \
- export POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/; \
- fi; \
- if [ "$LIGHTEN" == "1" ]; then \
- poetry install --no-root; \
- else \
- poetry install --no-root --with=full; \
- fi
-
- COPY web web
- COPY docs docs
- RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
- cd web && npm install --force && npm run build
-
- COPY .git /ragflow/.git
-
- RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
- if [ "$LIGHTEN" == "1" ]; then \
- version_info="$version_info slim"; \
- else \
- version_info="$version_info full"; \
- fi; \
- echo "RAGFlow version: $version_info"; \
- echo $version_info > /ragflow/VERSION
-
- # production stage
- FROM base AS production
- USER root
-
- WORKDIR /ragflow
-
- # Copy Python environment and packages
- ENV VIRTUAL_ENV=/ragflow/.venv
- COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
- ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
-
- ENV PYTHONPATH=/ragflow/
-
- COPY web web
- COPY api api
- COPY conf conf
- COPY deepdoc deepdoc
- COPY rag rag
- COPY agent agent
- COPY graphrag graphrag
- COPY pyproject.toml poetry.toml poetry.lock ./
-
- COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
- COPY docker/entrypoint.sh ./entrypoint.sh
- RUN chmod +x ./entrypoint.sh
-
- # Copy compiled web pages
- COPY --from=builder /ragflow/web/dist /ragflow/web/dist
-
- COPY --from=builder /ragflow/VERSION /ragflow/VERSION
- ENTRYPOINT ["./entrypoint.sh"]
|