You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Dockerfile 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. # base stage
  2. FROM ubuntu:22.04 AS base
  3. USER root
  4. SHELL ["/bin/bash", "-c"]
  5. ARG NEED_MIRROR=0
  6. ARG LIGHTEN=0
  7. ENV LIGHTEN=${LIGHTEN}
  8. WORKDIR /ragflow
  9. # Copy models downloaded via download_deps.py
  10. RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
  11. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
  12. tar --exclude='.*' -cf - \
  13. /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
  14. /huggingface.co/InfiniFlow/deepdoc \
  15. | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
  16. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
  17. if [ "$LIGHTEN" != "1" ]; then \
  18. (tar -cf - \
  19. /huggingface.co/BAAI/bge-large-zh-v1.5 \
  20. /huggingface.co/BAAI/bge-reranker-v2-m3 \
  21. /huggingface.co/maidalun1020/bce-embedding-base_v1 \
  22. /huggingface.co/maidalun1020/bce-reranker-base_v1 \
  23. | tar -xf - --strip-components=2 -C /root/.ragflow) \
  24. fi
  25. # https://github.com/chrismattmann/tika-python
  26. # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
  27. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
  28. cp -r /deps/nltk_data /root/ && \
  29. cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
  30. cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
  31. ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
  32. ENV DEBIAN_FRONTEND=noninteractive
  33. # Setup apt
  34. # Python package and implicit dependencies:
  35. # opencv-python: libglib2.0-0 libglx-mesa0 libgl1
  36. # aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
  37. # python-pptx: default-jdk tika-server-standard-3.0.0.jar
  38. # selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
  39. # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
  40. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  41. if [ "$NEED_MIRROR" == "1" ]; then \
  42. sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
  43. fi; \
  44. rm -f /etc/apt/apt.conf.d/docker-clean && \
  45. echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
  46. chmod 1777 /tmp && \
  47. apt update && \
  48. apt --no-install-recommends install -y ca-certificates && \
  49. apt update && \
  50. apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
  51. apt install -y pkg-config libicu-dev libgdiplus && \
  52. apt install -y default-jdk && \
  53. apt install -y libatk-bridge2.0-0 && \
  54. apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
  55. apt install -y python3-pip pipx nginx unzip curl wget git vim less
  56. RUN if [ "$NEED_MIRROR" == "1" ]; then \
  57. pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
  58. pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
  59. fi; \
  60. pipx install poetry; \
  61. if [ "$NEED_MIRROR" == "1" ]; then \
  62. pipx inject poetry poetry-plugin-pypi-mirror; \
  63. fi
  64. ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
  65. ENV PATH=/root/.local/bin:$PATH
  66. # Configure Poetry
  67. ENV POETRY_NO_INTERACTION=1
  68. ENV POETRY_VIRTUALENVS_IN_PROJECT=true
  69. ENV POETRY_VIRTUALENVS_CREATE=true
  70. ENV POETRY_REQUESTS_TIMEOUT=15
  71. # nodejs 12.22 on Ubuntu 22.04 is too old
  72. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  73. curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
  74. apt purge -y nodejs npm && \
  75. apt autoremove && \
  76. apt update && \
  77. apt install -y nodejs cargo
  78. # Add msssql17
  79. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  80. curl https://packages.microsoft.com/keys/microsoft.asc | tee /etc/apt/trusted.gpg.d/microsoft.asc && \
  81. curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list | tee /etc/apt/sources.list.d/mssql-release.list && \
  82. apt update && \
  83. ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17
  84. # Add dependencies of selenium
  85. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
  86. unzip /chrome-linux64.zip && \
  87. mv chrome-linux64 /opt/chrome && \
  88. ln -s /opt/chrome/chrome /usr/local/bin/
  89. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
  90. unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
  91. mv chromedriver /usr/local/bin/ && \
  92. rm -f /usr/bin/google-chrome
  93. # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
  94. # aspose-slides on linux/arm64 is unavailable
  95. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
  96. if [ "$(uname -m)" = "x86_64" ]; then \
  97. dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
  98. elif [ "$(uname -m)" = "aarch64" ]; then \
  99. dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
  100. fi
  101. # builder stage
  102. FROM base AS builder
  103. USER root
  104. WORKDIR /ragflow
  105. # install dependencies from poetry.lock file
  106. COPY pyproject.toml poetry.toml poetry.lock ./
  107. RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \
  108. if [ "$NEED_MIRROR" == "1" ]; then \
  109. export POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/; \
  110. fi; \
  111. if [ "$LIGHTEN" == "1" ]; then \
  112. poetry install --no-root; \
  113. else \
  114. poetry install --no-root --with=full; \
  115. fi
  116. COPY web web
  117. COPY docs docs
  118. RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
  119. cd web && npm install --force && npm run build
  120. COPY .git /ragflow/.git
  121. RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
  122. if [ "$LIGHTEN" == "1" ]; then \
  123. version_info="$version_info slim"; \
  124. else \
  125. version_info="$version_info full"; \
  126. fi; \
  127. echo "RAGFlow version: $version_info"; \
  128. echo $version_info > /ragflow/VERSION
  129. # production stage
  130. FROM base AS production
  131. USER root
  132. WORKDIR /ragflow
  133. # Copy Python environment and packages
  134. ENV VIRTUAL_ENV=/ragflow/.venv
  135. COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
  136. ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
  137. ENV PYTHONPATH=/ragflow/
  138. COPY web web
  139. COPY api api
  140. COPY conf conf
  141. COPY deepdoc deepdoc
  142. COPY rag rag
  143. COPY agent agent
  144. COPY graphrag graphrag
  145. COPY pyproject.toml poetry.toml poetry.lock ./
  146. COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
  147. COPY docker/entrypoint.sh ./entrypoint.sh
  148. RUN chmod +x ./entrypoint.sh
  149. # Copy compiled web pages
  150. COPY --from=builder /ragflow/web/dist /ragflow/web/dist
  151. COPY --from=builder /ragflow/VERSION /ragflow/VERSION
  152. ENTRYPOINT ["./entrypoint.sh"]