1 yıl önce · 95733796f0
--- a/api/core/model_runtime/model_providers/xinference/xinference_helper.py
+++ b/api/core/model_runtime/model_providers/xinference/xinference_helper.py
@@ -1,10 +1,10 @@
 from os import path
 from threading import Lock
 from time import time

 from requests.adapters import HTTPAdapter
 from requests.exceptions import ConnectionError, MissingSchema, Timeout
 from requests.sessions import Session
 from yarl import URL


 class XinferenceModelExtraParameter:
@@ -55,7 +55,10 @@ class XinferenceHelper:
            get xinference model extra parameter like model_format and model_handle_type
        """

        url = path.join(server_url, 'v1/models', model_uid)
        if not model_uid or not model_uid.strip() or not server_url or not server_url.strip():
            raise RuntimeError('model_uid is empty')

        url = str(URL(server_url) / 'v1' / 'models' / model_uid)

        # this method is surrounded by a lock, and default requests may hang forever, so we just set a Adapter with max_retries=3
        session = Session()
@@ -66,7 +69,6 @@ class XinferenceHelper:
            response = session.get(url, timeout=10)
        except (MissingSchema, ConnectionError, Timeout) as e:
            raise RuntimeError(f'get xinference model extra parameter failed, url: {url}, error: {e}')

        if response.status_code != 200:
            raise RuntimeError(f'get xinference model extra parameter failed, status code: {response.status_code}, response: {response.text}')
        
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -68,4 +68,5 @@ pydub~=0.25.1
 gmpy2~=2.1.5
 numexpr~=2.9.0
 duckduckgo-search==4.4.3
 arxiv==2.1.0
 arxiv==2.1.0
 yarl~=1.9.4
--- a/api/tests/integration_tests/model_runtime/__mock/xinference.py
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -32,68 +32,70 @@ class MockXinferenceClass(object):
        response = Response()
        if 'v1/models/' in url:
            # get model uid
            model_uid = url.split('/')[-1]
            model_uid = url.split('/')[-1] or ''
            if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \
                model_uid not in ['generate', 'chat', 'embedding', 'rerank']:
                response.status_code = 404
                response._content = b'{}'
                return response

            # check if url is valid
            if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url):
                response.status_code = 404
                response._content = b'{}'
                return response
            
            if model_uid in ['generate', 'chat']:
                response.status_code = 200
                response._content = b'''{
        "model_type": "LLM",
        "address": "127.0.0.1:43877",
        "accelerators": [
            "0",
            "1"
        ],
        "model_name": "chatglm3-6b",
        "model_lang": [
            "en"
        ],
        "model_ability": [
            "generate",
            "chat"
        ],
        "model_description": "latest chatglm3",
        "model_format": "pytorch",
        "model_size_in_billions": 7,
        "quantization": "none",
        "model_hub": "huggingface",
        "revision": null,
        "context_length": 2048,
        "replica": 1
    }'''
                    "model_type": "LLM",
                    "address": "127.0.0.1:43877",
                    "accelerators": [
                        "0",
                        "1"
                    ],
                    "model_name": "chatglm3-6b",
                    "model_lang": [
                        "en"
                    ],
                    "model_ability": [
                        "generate",
                        "chat"
                    ],
                    "model_description": "latest chatglm3",
                    "model_format": "pytorch",
                    "model_size_in_billions": 7,
                    "quantization": "none",
                    "model_hub": "huggingface",
                    "revision": null,
                    "context_length": 2048,
                    "replica": 1
                }'''
                return response
            
            elif model_uid == 'embedding':
                response.status_code = 200
                response._content = b'''{
        "model_type": "embedding",
        "address": "127.0.0.1:43877",
        "accelerators": [
            "0",
            "1"
        ],
        "model_name": "bge",
        "model_lang": [
            "en"
        ],
        "revision": null,
        "max_tokens": 512
 }'''
                    "model_type": "embedding",
                    "address": "127.0.0.1:43877",
                    "accelerators": [
                        "0",
                        "1"
                    ],
                    "model_name": "bge",
                    "model_lang": [
                        "en"
                    ],
                    "revision": null,
                    "max_tokens": 512
                }'''
                return response
            
        elif 'v1/cluster/auth' in url:
            response.status_code = 200
            response._content = b'''{
    "auth": true
 }'''
                "auth": true
            }'''
            return response
        
    def _check_cluster_authenticated(self):