You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Dockerfile.slim 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # base stage
  2. FROM ubuntu:24.04 AS base
  3. USER root
  4. ARG ARCH=amd64
  5. ENV LIGHTEN=1
  6. WORKDIR /ragflow
  7. RUN rm -f /etc/apt/apt.conf.d/docker-clean \
  8. && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
  9. RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
  10. apt update && apt-get --no-install-recommends install -y ca-certificates
  11. # If you download Python modules too slow, you can use a pip mirror site to speed up apt and poetry
  12. RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
  13. RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
  14. apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
  15. libasound2t64 libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget \
  16. && rm -rf /var/lib/apt/lists/* && \
  17. wget -q -O chrome-linux64.zip https://bit.ly/chrome-linux64-121-0-6167-85 && \
  18. unzip chrome-linux64.zip && \
  19. rm chrome-linux64.zip && \
  20. mv chrome-linux64 /opt/chrome/ && \
  21. ln -s /opt/chrome/chrome /usr/local/bin/ && \
  22. wget -q -O chromedriver-linux64.zip https://bit.ly/chromedriver-linux64-121-0-6167-85 && \
  23. unzip -j chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
  24. rm chromedriver-linux64.zip && \
  25. mv chromedriver /usr/local/bin/ && rm -f /usr/bin/google-chrome
  26. RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
  27. && pipx install poetry \
  28. && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
  29. # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
  30. # aspose-slides on linux/arm64 is unavailable
  31. RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
  32. if [ "${ARCH}" = "amd64" ]; then \
  33. dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
  34. fi
  35. ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
  36. ENV PATH=/root/.local/bin:$PATH
  37. # Configure Poetry
  38. ENV POETRY_NO_INTERACTION=1
  39. ENV POETRY_VIRTUALENVS_IN_PROJECT=true
  40. ENV POETRY_VIRTUALENVS_CREATE=true
  41. ENV POETRY_REQUESTS_TIMEOUT=15
  42. ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
  43. # builder stage
  44. FROM base AS builder
  45. USER root
  46. WORKDIR /ragflow
  47. RUN --mount=type=cache,id=ragflow_builder_apt,target=/var/cache/apt,sharing=locked \
  48. apt update && apt install -y nodejs npm cargo && \
  49. rm -rf /var/lib/apt/lists/*
  50. COPY web web
  51. COPY docs docs
  52. RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
  53. cd web && npm i && npm run build
  54. # install dependencies from poetry.lock file
  55. COPY pyproject.toml poetry.toml poetry.lock ./
  56. RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
  57. if [ "$LIGHTEN" -eq 0 ]; then \
  58. poetry install --no-root --with=full; \
  59. else \
  60. poetry install --no-root; \
  61. fi
  62. # production stage
  63. FROM base AS production
  64. USER root
  65. WORKDIR /ragflow
  66. # Install python packages' dependencies
  67. # cv2 requires libGL.so.1
  68. RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
  69. apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
  70. rm -rf /var/lib/apt/lists/*
  71. COPY web web
  72. COPY api api
  73. COPY conf conf
  74. COPY deepdoc deepdoc
  75. COPY rag rag
  76. COPY agent agent
  77. COPY graphrag graphrag
  78. COPY pyproject.toml poetry.toml poetry.lock ./
  79. # Copy models downloaded via download_deps.py
  80. RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
  81. RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
  82. tar --exclude='.*' -cf - \
  83. /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
  84. /huggingface.co/InfiniFlow/deepdoc \
  85. | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
  86. # Copy nltk data downloaded via download_deps.py
  87. COPY nltk_data /root/nltk_data
  88. # https://github.com/chrismattmann/tika-python
  89. # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
  90. COPY tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 ./
  91. ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"
  92. # Copy compiled web pages
  93. COPY --from=builder /ragflow/web/dist /ragflow/web/dist
  94. # Copy Python environment and packages
  95. ENV VIRTUAL_ENV=/ragflow/.venv
  96. COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
  97. ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
  98. ENV PYTHONPATH=/ragflow/
  99. COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
  100. COPY docker/entrypoint.sh ./entrypoint.sh
  101. RUN chmod +x ./entrypoint.sh
  102. ENTRYPOINT ["./entrypoint.sh"]