Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

Dockerfile 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # base stage
  2. FROM ubuntu:22.04 AS base
  3. USER root
  4. SHELL ["/bin/bash", "-c"]
  5. ARG NEED_MIRROR=0
  6. ARG LIGHTEN=0
  7. ENV LIGHTEN=${LIGHTEN}
  8. WORKDIR /ragflow
  9. # Copy models downloaded via download_deps.py
  10. RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
  11. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
  12. tar --exclude='.*' -cf - \
  13. /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
  14. /huggingface.co/InfiniFlow/deepdoc \
  15. | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
  16. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
  17. if [ "$LIGHTEN" != "1" ]; then \
  18. (tar -cf - \
  19. /huggingface.co/BAAI/bge-large-zh-v1.5 \
  20. /huggingface.co/BAAI/bge-reranker-v2-m3 \
  21. /huggingface.co/maidalun1020/bce-embedding-base_v1 \
  22. /huggingface.co/maidalun1020/bce-reranker-base_v1 \
  23. | tar -xf - --strip-components=2 -C /root/.ragflow) \
  24. fi
  25. # https://github.com/chrismattmann/tika-python
  26. # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
  27. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
  28. cp -r /deps/nltk_data /root/ && \
  29. cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
  30. cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
  31. ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
  32. # Setup apt
  33. # cv2 requires libGL.so.1
  34. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  35. if [ "$NEED_MIRROR" == "1" ]; then \
  36. sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
  37. fi; \
  38. rm -f /etc/apt/apt.conf.d/docker-clean && \
  39. echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
  40. apt update && \
  41. apt --no-install-recommends install -y ca-certificates && \
  42. apt update && \
  43. DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
  44. libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git nginx libgl1 vim less
  45. RUN if [ "$NEED_MIRROR" == "1" ]; then \
  46. pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
  47. pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
  48. fi; \
  49. pipx install poetry; \
  50. if [ "$NEED_MIRROR" == "1" ]; then \
  51. pipx inject poetry poetry-plugin-pypi-mirror; \
  52. fi
  53. ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
  54. ENV PATH=/root/.local/bin:$PATH
  55. # Configure Poetry
  56. ENV POETRY_NO_INTERACTION=1
  57. ENV POETRY_VIRTUALENVS_IN_PROJECT=true
  58. ENV POETRY_VIRTUALENVS_CREATE=true
  59. ENV POETRY_REQUESTS_TIMEOUT=15
  60. # nodejs 12.22 on Ubuntu 22.04 is too old
  61. RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
  62. curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
  63. apt purge -y nodejs npm && \
  64. apt autoremove && \
  65. apt update && \
  66. apt install -y nodejs cargo
  67. # Add dependencies of selenium
  68. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
  69. unzip /chrome-linux64.zip && \
  70. mv chrome-linux64 /opt/chrome && \
  71. ln -s /opt/chrome/chrome /usr/local/bin/
  72. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
  73. unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
  74. mv chromedriver /usr/local/bin/ && \
  75. rm -f /usr/bin/google-chrome
  76. # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
  77. # aspose-slides on linux/arm64 is unavailable
  78. RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
  79. if [ "$(uname -m)" = "x86_64" ]; then \
  80. dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
  81. elif [ "$(uname -m)" = "aarch64" ]; then \
  82. dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
  83. fi
  84. # builder stage
  85. FROM base AS builder
  86. USER root
  87. WORKDIR /ragflow
  88. # install dependencies from poetry.lock file
  89. COPY pyproject.toml poetry.toml poetry.lock ./
  90. RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \
  91. if [ "$NEED_MIRROR" == "1" ]; then \
  92. export POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/; \
  93. fi; \
  94. if [ "$LIGHTEN" == "1" ]; then \
  95. poetry install --no-root; \
  96. else \
  97. poetry install --no-root --with=full; \
  98. fi
  99. COPY web web
  100. COPY docs docs
  101. RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
  102. cd web && npm install --force && npm run build
  103. COPY .git /ragflow/.git
  104. RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
  105. if [ "$LIGHTEN" == "1" ]; then \
  106. version_info="$version_info slim"; \
  107. else \
  108. version_info="$version_info full"; \
  109. fi; \
  110. echo "RAGFlow version: $version_info"; \
  111. echo $version_info > /ragflow/VERSION
  112. # production stage
  113. FROM base AS production
  114. USER root
  115. WORKDIR /ragflow
  116. # Copy Python environment and packages
  117. ENV VIRTUAL_ENV=/ragflow/.venv
  118. COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
  119. ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
  120. ENV PYTHONPATH=/ragflow/
  121. COPY web web
  122. COPY api api
  123. COPY conf conf
  124. COPY deepdoc deepdoc
  125. COPY rag rag
  126. COPY agent agent
  127. COPY graphrag graphrag
  128. COPY pyproject.toml poetry.toml poetry.lock ./
  129. COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
  130. COPY docker/entrypoint.sh ./entrypoint.sh
  131. RUN chmod +x ./entrypoint.sh
  132. # Copy compiled web pages
  133. COPY --from=builder /ragflow/web/dist /ragflow/web/dist
  134. COPY --from=builder /ragflow/VERSION /ragflow/VERSION
  135. ENTRYPOINT ["./entrypoint.sh"]