### What problem does this PR solve? ### Type of change - [x] Refactoring - [x] Performance Improvementtags/v0.18.0
| @@ -21,9 +21,7 @@ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co | |||
| if [ "$LIGHTEN" != "1" ]; then \ | |||
| (tar -cf - \ | |||
| /huggingface.co/BAAI/bge-large-zh-v1.5 \ | |||
| /huggingface.co/BAAI/bge-reranker-v2-m3 \ | |||
| /huggingface.co/maidalun1020/bce-embedding-base_v1 \ | |||
| /huggingface.co/maidalun1020/bce-reranker-base_v1 \ | |||
| | tar -xf - --strip-components=2 -C /root/.ragflow) \ | |||
| fi | |||
| @@ -147,16 +147,6 @@ def create(tenant_id): | |||
| else: | |||
| valid_embedding_models = [ | |||
| "BAAI/bge-large-zh-v1.5", | |||
| "BAAI/bge-base-en-v1.5", | |||
| "BAAI/bge-large-en-v1.5", | |||
| "BAAI/bge-small-en-v1.5", | |||
| "BAAI/bge-small-zh-v1.5", | |||
| "jinaai/jina-embeddings-v2-base-en", | |||
| "jinaai/jina-embeddings-v2-small-en", | |||
| "nomic-ai/nomic-embed-text-v1.5", | |||
| "sentence-transformers/all-MiniLM-L6-v2", | |||
| "text-embedding-v2", | |||
| "text-embedding-v3", | |||
| "maidalun1020/bce-embedding-base_v1", | |||
| ] | |||
| embd_model = LLMService.query( | |||
| @@ -415,56 +415,7 @@ | |||
| "logo": "", | |||
| "tags": "TEXT EMBEDDING", | |||
| "status": "1", | |||
| "llm": [ | |||
| { | |||
| "llm_name": "BAAI/bge-small-en-v1.5", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 512, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "BAAI/bge-small-zh-v1.5", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 512, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "BAAI/bge-base-en-v1.5", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 512, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "BAAI/bge-large-en-v1.5", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 512, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "sentence-transformers/all-MiniLM-L6-v2", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 512, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "nomic-ai/nomic-embed-text-v1.5", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 8192, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "jinaai/jina-embeddings-v2-small-en", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 2147483648, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "jinaai/jina-embeddings-v2-base-en", | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 2147483648, | |||
| "model_type": "embedding" | |||
| } | |||
| ] | |||
| "llm": [] | |||
| }, | |||
| { | |||
| "name": "Xinference", | |||
| @@ -484,12 +435,6 @@ | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 512, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "maidalun1020/bce-reranker-base_v1", | |||
| "tags": "RE-RANK, 512", | |||
| "max_tokens": 512, | |||
| "model_type": "rerank" | |||
| } | |||
| ] | |||
| }, | |||
| @@ -679,12 +624,6 @@ | |||
| "tags": "TEXT EMBEDDING,", | |||
| "max_tokens": 1024, | |||
| "model_type": "embedding" | |||
| }, | |||
| { | |||
| "llm_name": "BAAI/bge-reranker-v2-m3", | |||
| "tags": "RE-RANK,2k", | |||
| "max_tokens": 2048, | |||
| "model_type": "rerank" | |||
| } | |||
| ] | |||
| }, | |||
| @@ -82,18 +82,8 @@ The [.env](./.env) file contains important environment variables for Docker. | |||
| - `infiniflow/ragflow:v0.17.2`: The RAGFlow Docker image with embedding models including: | |||
| - Built-in embedding models: | |||
| - `BAAI/bge-large-zh-v1.5` | |||
| - `BAAI/bge-reranker-v2-m3` | |||
| - `maidalun1020/bce-embedding-base_v1` | |||
| - `maidalun1020/bce-reranker-base_v1` | |||
| - Embedding models that will be downloaded once you select them in the RAGFlow UI: | |||
| - `BAAI/bge-base-en-v1.5` | |||
| - `BAAI/bge-large-en-v1.5` | |||
| - `BAAI/bge-small-en-v1.5` | |||
| - `BAAI/bge-small-zh-v1.5` | |||
| - `jinaai/jina-embeddings-v2-base-en` | |||
| - `jinaai/jina-embeddings-v2-small-en` | |||
| - `nomic-ai/nomic-embed-text-v1.5` | |||
| - `sentence-transformers/all-MiniLM-L6-v2` | |||
| > [!TIP] | |||
| > If you cannot download the RAGFlow Docker image, try the following mirrors. | |||
| @@ -101,19 +101,9 @@ The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file con | |||
| - `infiniflow/ragflow:v0.17.2`: The RAGFlow Docker image with embedding models including: | |||
| - Built-in embedding models: | |||
| - `BAAI/bge-large-zh-v1.5` | |||
| - `BAAI/bge-reranker-v2-m3` | |||
| - `maidalun1020/bce-embedding-base_v1` | |||
| - `maidalun1020/bce-reranker-base_v1` | |||
| - Embedding models that will be downloaded once you select them in the RAGFlow UI: | |||
| - `BAAI/bge-base-en-v1.5` | |||
| - `BAAI/bge-large-en-v1.5` | |||
| - `BAAI/bge-small-en-v1.5` | |||
| - `BAAI/bge-small-zh-v1.5` | |||
| - `jinaai/jina-embeddings-v2-base-en` | |||
| - `jinaai/jina-embeddings-v2-small-en` | |||
| - `nomic-ai/nomic-embed-text-v1.5` | |||
| - `sentence-transformers/all-MiniLM-L6-v2` | |||
| :::tip NOTE | |||
| If you cannot download the RAGFlow Docker image, try the following mirrors. | |||
| @@ -29,9 +29,7 @@ repos = [ | |||
| "InfiniFlow/deepdoc", | |||
| "InfiniFlow/huqie", | |||
| "BAAI/bge-large-zh-v1.5", | |||
| "BAAI/bge-reranker-v2-m3", | |||
| "maidalun1020/bce-embedding-base_v1", | |||
| "maidalun1020/bce-reranker-base_v1", | |||
| ] | |||
| def download_model(repo_id): | |||
| @@ -228,6 +228,7 @@ class TestChunksRetrieval: | |||
| else: | |||
| assert expected_message in res["message"] | |||
| @pytest.mark.skip | |||
| @pytest.mark.parametrize( | |||
| "payload, expected_code, expected_message", | |||
| [ | |||
| @@ -145,28 +145,6 @@ class TestAdvancedConfigurations: | |||
| "name, embedding_model, expected_code", | |||
| [ | |||
| ("BAAI/bge-large-zh-v1.5", "BAAI/bge-large-zh-v1.5", 0), | |||
| ("BAAI/bge-base-en-v1.5", "BAAI/bge-base-en-v1.5", 0), | |||
| ("BAAI/bge-large-en-v1.5", "BAAI/bge-large-en-v1.5", 0), | |||
| ("BAAI/bge-small-en-v1.5", "BAAI/bge-small-en-v1.5", 0), | |||
| ("BAAI/bge-small-zh-v1.5", "BAAI/bge-small-zh-v1.5", 0), | |||
| ( | |||
| "jinaai/jina-embeddings-v2-base-en", | |||
| "jinaai/jina-embeddings-v2-base-en", | |||
| 0, | |||
| ), | |||
| ( | |||
| "jinaai/jina-embeddings-v2-small-en", | |||
| "jinaai/jina-embeddings-v2-small-en", | |||
| 0, | |||
| ), | |||
| ("nomic-ai/nomic-embed-text-v1.5", "nomic-ai/nomic-embed-text-v1.5", 0), | |||
| ( | |||
| "sentence-transformers/all-MiniLM-L6-v2", | |||
| "sentence-transformers/all-MiniLM-L6-v2", | |||
| 0, | |||
| ), | |||
| ("text-embedding-v2", "text-embedding-v2", 0), | |||
| ("text-embedding-v3", "text-embedding-v3", 0), | |||
| ( | |||
| "maidalun1020/bce-embedding-base_v1", | |||
| "maidalun1020/bce-embedding-base_v1", | |||
| @@ -86,16 +86,6 @@ class TestDatasetUpdate: | |||
| "embedding_model, expected_code, expected_message", | |||
| [ | |||
| ("BAAI/bge-large-zh-v1.5", 0, ""), | |||
| ("BAAI/bge-base-en-v1.5", 0, ""), | |||
| ("BAAI/bge-large-en-v1.5", 0, ""), | |||
| ("BAAI/bge-small-en-v1.5", 0, ""), | |||
| ("BAAI/bge-small-zh-v1.5", 0, ""), | |||
| ("jinaai/jina-embeddings-v2-base-en", 0, ""), | |||
| ("jinaai/jina-embeddings-v2-small-en", 0, ""), | |||
| ("nomic-ai/nomic-embed-text-v1.5", 0, ""), | |||
| ("sentence-transformers/all-MiniLM-L6-v2", 0, ""), | |||
| ("text-embedding-v2", 0, ""), | |||
| ("text-embedding-v3", 0, ""), | |||
| ("maidalun1020/bce-embedding-base_v1", 0, ""), | |||
| ( | |||
| "other_embedding_model", | |||