Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

Dockerfile 6.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. # base stage
  2. FROM ubuntu:22.04 AS base
  3. USER root
  4. SHELL ["/bin/bash", "-c"]
  5. ENV LIGHTEN=0
  6. WORKDIR /ragflow
  7. RUN rm -f /etc/apt/apt.conf.d/docker-clean \
  8. && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
  9. RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
  10. apt update && apt-get --no-install-recommends install -y ca-certificates
  11. # Setup apt mirror site
  12. RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
  13. RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
  14. apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
  15. libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
  16. && rm -rf /var/lib/apt/lists/*
  17. RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
  18. && pipx install poetry \
  19. && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
  20. # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
  21. # aspose-slides on linux/arm64 is unavailable
  22. RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
  23. --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
  24. if [ "$(uname -m)" = "x86_64" ]; then \
  25. dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
  26. elif [ "$(uname -m)" = "aarch64" ]; then \
  27. dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
  28. fi
  29. ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
  30. ENV PATH=/root/.local/bin:$PATH
  31. # Configure Poetry
  32. ENV POETRY_NO_INTERACTION=1
  33. ENV POETRY_VIRTUALENVS_IN_PROJECT=true
  34. ENV POETRY_VIRTUALENVS_CREATE=true
  35. ENV POETRY_REQUESTS_TIMEOUT=15
  36. ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
  37. # nodejs 12.22 on Ubuntu 22.04 is too old
  38. RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
  39. curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
  40. apt purge -y nodejs npm && \
  41. apt autoremove && \
  42. apt update && \
  43. apt install -y nodejs cargo && \
  44. rm -rf /var/lib/apt/lists/*
  45. # builder stage
  46. FROM base AS builder
  47. USER root
  48. WORKDIR /ragflow
  49. # install dependencies from poetry.lock file
  50. COPY pyproject.toml poetry.toml poetry.lock ./
  51. RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
  52. if [ "$LIGHTEN" == "1" ]; then \
  53. poetry install --no-root; \
  54. else \
  55. poetry install --no-root --with=full; \
  56. fi
  57. COPY web web
  58. COPY docs docs
  59. RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
  60. cd web && npm install --force && npm run build
  61. COPY .git /ragflow/.git
  62. RUN current_commit=$(git rev-parse --short HEAD); \
  63. last_tag=$(git describe --tags --abbrev=0); \
  64. commit_count=$(git rev-list --count "$last_tag..HEAD"); \
  65. version_info=""; \
  66. if [ "$commit_count" -eq 0 ]; then \
  67. version_info=$last_tag; \
  68. else \
  69. version_info="$current_commit($last_tag~$commit_count)"; \
  70. fi; \
  71. if [ "$LIGHTEN" == "1" ]; then \
  72. version_info="$version_info slim"; \
  73. else \
  74. version_info="$version_info full"; \
  75. fi; \
  76. echo "RAGFlow version: $version_info"; \
  77. echo $version_info > /ragflow/VERSION
  78. # production stage
  79. FROM base AS production
  80. USER root
  81. WORKDIR /ragflow
  82. # Copy Python environment and packages
  83. ENV VIRTUAL_ENV=/ragflow/.venv
  84. COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
  85. ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
  86. # Install python packages' dependencies
  87. # cv2 requires libGL.so.1
  88. RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
  89. apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
  90. rm -rf /var/lib/apt/lists/*
  91. # Copy models downloaded via download_deps.py
  92. RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
  93. RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
  94. tar --exclude='.*' -cf - \
  95. /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
  96. /huggingface.co/InfiniFlow/deepdoc \
  97. | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
  98. RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
  99. tar -cf - \
  100. /huggingface.co/BAAI/bge-large-zh-v1.5 \
  101. /huggingface.co/BAAI/bge-reranker-v2-m3 \
  102. /huggingface.co/maidalun1020/bce-embedding-base_v1 \
  103. /huggingface.co/maidalun1020/bce-reranker-base_v1 \
  104. | tar -xf - --strip-components=2 -C /root/.ragflow
  105. # Copy nltk data downloaded via download_deps.py
  106. COPY nltk_data /root/nltk_data
  107. # https://github.com/chrismattmann/tika-python
  108. # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
  109. COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar
  110. COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5
  111. ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"
  112. # Copy cl100k_base
  113. COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
  114. # Add dependencies of selenium
  115. RUN --mount=type=bind,source=chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
  116. unzip /chrome-linux64.zip && \
  117. mv chrome-linux64 /opt/chrome && \
  118. ln -s /opt/chrome/chrome /usr/local/bin/
  119. RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
  120. unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
  121. mv chromedriver /usr/local/bin/ && \
  122. rm -f /usr/bin/google-chrome
  123. ENV PYTHONPATH=/ragflow/
  124. COPY web web
  125. COPY api api
  126. COPY conf conf
  127. COPY deepdoc deepdoc
  128. COPY rag rag
  129. COPY agent agent
  130. COPY graphrag graphrag
  131. COPY pyproject.toml poetry.toml poetry.lock ./
  132. COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
  133. COPY docker/entrypoint.sh ./entrypoint.sh
  134. RUN chmod +x ./entrypoint.sh
  135. # Copy compiled web pages
  136. COPY --from=builder /ragflow/web/dist /ragflow/web/dist
  137. COPY --from=builder /ragflow/VERSION /ragflow/VERSION
  138. ENTRYPOINT ["./entrypoint.sh"]