Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

Dockerfile 6.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # base stage
  2. FROM ubuntu:22.04 AS base
  3. USER root
  4. SHELL ["/bin/bash", "-c"]
  5. ARG NEED_MIRROR=0
  6. ARG LIGHTEN=0
  7. ENV LIGHTEN=${LIGHTEN}
  8. WORKDIR /ragflow
  9. # Copy models downloaded via download_deps.py
  10. RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
  11. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
  12. tar --exclude='.*' -cf - \
  13. /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
  14. /huggingface.co/InfiniFlow/deepdoc \
  15. | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
  16. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
  17. if [ "$LIGHTEN" != "1" ]; then \
  18. (tar -cf - \
  19. /huggingface.co/BAAI/bge-large-zh-v1.5 \
  20. /huggingface.co/BAAI/bge-reranker-v2-m3 \
  21. /huggingface.co/maidalun1020/bce-embedding-base_v1 \
  22. /huggingface.co/maidalun1020/bce-reranker-base_v1 \
  23. | tar -xf - --strip-components=2 -C /root/.ragflow) \
  24. fi
  25. # https://github.com/chrismattmann/tika-python
  26. # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
  27. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
  28. cp -r /deps/nltk_data /root/ && \
  29. cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
  30. cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
  31. ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
  32. # Setup apt
  33. # cv2 requires libGL.so.1
  34. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  35. if [ "$NEED_MIRROR" == "1" ]; then \
  36. sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
  37. fi; \
  38. rm -f /etc/apt/apt.conf.d/docker-clean && \
  39. echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
  40. chmod 1777 /tmp && \
  41. apt update && \
  42. apt --no-install-recommends install -y ca-certificates && \
  43. apt update && \
  44. DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
  45. libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git nginx libgl1 vim less
  46. RUN if [ "$NEED_MIRROR" == "1" ]; then \
  47. pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
  48. pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
  49. fi; \
  50. pipx install poetry; \
  51. if [ "$NEED_MIRROR" == "1" ]; then \
  52. pipx inject poetry poetry-plugin-pypi-mirror; \
  53. fi
  54. ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
  55. ENV PATH=/root/.local/bin:$PATH
  56. # Configure Poetry
  57. ENV POETRY_NO_INTERACTION=1
  58. ENV POETRY_VIRTUALENVS_IN_PROJECT=true
  59. ENV POETRY_VIRTUALENVS_CREATE=true
  60. ENV POETRY_REQUESTS_TIMEOUT=15
  61. # nodejs 12.22 on Ubuntu 22.04 is too old
  62. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  63. curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
  64. apt purge -y nodejs npm && \
  65. apt autoremove && \
  66. apt update && \
  67. apt install -y nodejs cargo
  68. # Add dependencies of selenium
  69. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
  70. unzip /chrome-linux64.zip && \
  71. mv chrome-linux64 /opt/chrome && \
  72. ln -s /opt/chrome/chrome /usr/local/bin/
  73. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
  74. unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
  75. mv chromedriver /usr/local/bin/ && \
  76. rm -f /usr/bin/google-chrome
  77. # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
  78. # aspose-slides on linux/arm64 is unavailable
  79. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
  80. if [ "$(uname -m)" = "x86_64" ]; then \
  81. dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
  82. elif [ "$(uname -m)" = "aarch64" ]; then \
  83. dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
  84. fi
  85. # builder stage
  86. FROM base AS builder
  87. USER root
  88. WORKDIR /ragflow
  89. # install dependencies from poetry.lock file
  90. COPY pyproject.toml poetry.toml poetry.lock ./
  91. RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \
  92. if [ "$NEED_MIRROR" == "1" ]; then \
  93. export POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/; \
  94. fi; \
  95. if [ "$LIGHTEN" == "1" ]; then \
  96. poetry install --no-root; \
  97. else \
  98. poetry install --no-root --with=full; \
  99. fi
  100. COPY web web
  101. COPY docs docs
  102. RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
  103. cd web && npm install --force && npm run build
  104. COPY .git /ragflow/.git
  105. RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
  106. if [ "$LIGHTEN" == "1" ]; then \
  107. version_info="$version_info slim"; \
  108. else \
  109. version_info="$version_info full"; \
  110. fi; \
  111. echo "RAGFlow version: $version_info"; \
  112. echo $version_info > /ragflow/VERSION
  113. # production stage
  114. FROM base AS production
  115. USER root
  116. WORKDIR /ragflow
  117. # Copy Python environment and packages
  118. ENV VIRTUAL_ENV=/ragflow/.venv
  119. COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
  120. ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
  121. ENV PYTHONPATH=/ragflow/
  122. COPY web web
  123. COPY api api
  124. COPY conf conf
  125. COPY deepdoc deepdoc
  126. COPY rag rag
  127. COPY agent agent
  128. COPY graphrag graphrag
  129. COPY pyproject.toml poetry.toml poetry.lock ./
  130. COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
  131. COPY docker/entrypoint.sh ./entrypoint.sh
  132. RUN chmod +x ./entrypoint.sh
  133. # Copy compiled web pages
  134. COPY --from=builder /ragflow/web/dist /ragflow/web/dist
  135. COPY --from=builder /ragflow/VERSION /ragflow/VERSION
  136. ENTRYPOINT ["./entrypoint.sh"]