浏览代码

make it lighten (#2577)

### What problem does this PR solve?

#2295

### Type of change

- [x] Refactoring
tags/v0.12.0
Kevin Hu 1年前
父节点
当前提交
dda1367ab2
没有帐户链接到提交者的电子邮件
共有 4 个文件被更改,包括 13 次插入12 次删除
  1. 6
    5
      Dockerfile.scratch
  2. 1
    1
      pyproject.toml
  3. 4
    4
      rag/llm/embedding_model.py
  4. 2
    2
      rag/llm/rerank_model.py

+ 6
- 5
Dockerfile.scratch 查看文件

RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources


RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y curl libpython3-dev nginx openmpi-bin openmpi-common libopenmpi-dev libglib2.0-0 libglx-mesa0 \
apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& curl -sSL https://install.python-poetry.org | python3 - && curl -sSL https://install.python-poetry.org | python3 -


WORKDIR /ragflow WORKDIR /ragflow


RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y nodejs npm cargo && \
apt update && apt install -y nodejs npm && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*


COPY web web COPY web web
# install dependencies from poetry.lock file # install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./ COPY pyproject.toml poetry.toml poetry.lock ./
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \ RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-root
/root/.local/bin/poetry lock

RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-cache --no-root


# production stage # production stage
FROM base AS production FROM base AS production
# Download nltk data # Download nltk data
RUN python3 -m nltk.downloader wordnet punkt punkt_tab RUN python3 -m nltk.downloader wordnet punkt punkt_tab


# Copy models downloaded via download_deps.sh
COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/


ENV PYTHONPATH=/ragflow/ ENV PYTHONPATH=/ragflow/



+ 1
- 1
pyproject.toml 查看文件

tiktoken = "0.6.0" tiktoken = "0.6.0"
torch = "2.3.0" torch = "2.3.0"
transformers = "4.38.1" transformers = "4.38.1"
umap = "0.1.1"
umap_learn = "0.5.6"
vertexai = "1.64.0" vertexai = "1.64.0"
volcengine = "1.0.146" volcengine = "1.0.146"
voyageai = "0.2.3" voyageai = "0.2.3"

+ 4
- 4
rag/llm/embedding_model.py 查看文件

threads: Optional[int] = None, threads: Optional[int] = None,
**kwargs, **kwargs,
): ):
from fastembed import TextEmbedding
if not FastEmbed._model:
if not LIGHTEN and not FastEmbed._model:
from fastembed import TextEmbedding
self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs) self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs)


def encode(self, texts: list, batch_size=32): def encode(self, texts: list, batch_size=32):
_client = None _client = None


def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs): def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs):
from BCEmbedding import EmbeddingModel as qanthing
if not YoudaoEmbed._client:
if not LIGHTEN and not YoudaoEmbed._client:
from BCEmbedding import EmbeddingModel as qanthing
try: try:
print("LOADING BCE...") print("LOADING BCE...")
YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join( YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join(

+ 2
- 2
rag/llm/rerank_model.py 查看文件

_model_lock = threading.Lock() _model_lock = threading.Lock()


def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs): def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs):
from BCEmbedding import RerankerModel
if not YoudaoRerank._model:
if not LIGHTEN and not YoudaoRerank._model:
from BCEmbedding import RerankerModel
with YoudaoRerank._model_lock: with YoudaoRerank._model_lock:
if not YoudaoRerank._model: if not YoudaoRerank._model:
try: try:

正在加载...
取消
保存