|
|
|
@@ -32,68 +32,70 @@ class MockXinferenceClass(object): |
|
|
|
response = Response() |
|
|
|
if 'v1/models/' in url: |
|
|
|
# get model uid |
|
|
|
model_uid = url.split('/')[-1] |
|
|
|
model_uid = url.split('/')[-1] or '' |
|
|
|
if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', model_uid) and \ |
|
|
|
model_uid not in ['generate', 'chat', 'embedding', 'rerank']: |
|
|
|
response.status_code = 404 |
|
|
|
response._content = b'{}' |
|
|
|
return response |
|
|
|
|
|
|
|
# check if url is valid |
|
|
|
if not re.match(r'^(https?):\/\/[^\s\/$.?#].[^\s]*$', url): |
|
|
|
response.status_code = 404 |
|
|
|
response._content = b'{}' |
|
|
|
return response |
|
|
|
|
|
|
|
if model_uid in ['generate', 'chat']: |
|
|
|
response.status_code = 200 |
|
|
|
response._content = b'''{ |
|
|
|
"model_type": "LLM", |
|
|
|
"address": "127.0.0.1:43877", |
|
|
|
"accelerators": [ |
|
|
|
"0", |
|
|
|
"1" |
|
|
|
], |
|
|
|
"model_name": "chatglm3-6b", |
|
|
|
"model_lang": [ |
|
|
|
"en" |
|
|
|
], |
|
|
|
"model_ability": [ |
|
|
|
"generate", |
|
|
|
"chat" |
|
|
|
], |
|
|
|
"model_description": "latest chatglm3", |
|
|
|
"model_format": "pytorch", |
|
|
|
"model_size_in_billions": 7, |
|
|
|
"quantization": "none", |
|
|
|
"model_hub": "huggingface", |
|
|
|
"revision": null, |
|
|
|
"context_length": 2048, |
|
|
|
"replica": 1 |
|
|
|
}''' |
|
|
|
"model_type": "LLM", |
|
|
|
"address": "127.0.0.1:43877", |
|
|
|
"accelerators": [ |
|
|
|
"0", |
|
|
|
"1" |
|
|
|
], |
|
|
|
"model_name": "chatglm3-6b", |
|
|
|
"model_lang": [ |
|
|
|
"en" |
|
|
|
], |
|
|
|
"model_ability": [ |
|
|
|
"generate", |
|
|
|
"chat" |
|
|
|
], |
|
|
|
"model_description": "latest chatglm3", |
|
|
|
"model_format": "pytorch", |
|
|
|
"model_size_in_billions": 7, |
|
|
|
"quantization": "none", |
|
|
|
"model_hub": "huggingface", |
|
|
|
"revision": null, |
|
|
|
"context_length": 2048, |
|
|
|
"replica": 1 |
|
|
|
}''' |
|
|
|
return response |
|
|
|
|
|
|
|
elif model_uid == 'embedding': |
|
|
|
response.status_code = 200 |
|
|
|
response._content = b'''{ |
|
|
|
"model_type": "embedding", |
|
|
|
"address": "127.0.0.1:43877", |
|
|
|
"accelerators": [ |
|
|
|
"0", |
|
|
|
"1" |
|
|
|
], |
|
|
|
"model_name": "bge", |
|
|
|
"model_lang": [ |
|
|
|
"en" |
|
|
|
], |
|
|
|
"revision": null, |
|
|
|
"max_tokens": 512 |
|
|
|
}''' |
|
|
|
"model_type": "embedding", |
|
|
|
"address": "127.0.0.1:43877", |
|
|
|
"accelerators": [ |
|
|
|
"0", |
|
|
|
"1" |
|
|
|
], |
|
|
|
"model_name": "bge", |
|
|
|
"model_lang": [ |
|
|
|
"en" |
|
|
|
], |
|
|
|
"revision": null, |
|
|
|
"max_tokens": 512 |
|
|
|
}''' |
|
|
|
return response |
|
|
|
|
|
|
|
elif 'v1/cluster/auth' in url: |
|
|
|
response.status_code = 200 |
|
|
|
response._content = b'''{ |
|
|
|
"auth": true |
|
|
|
}''' |
|
|
|
"auth": true |
|
|
|
}''' |
|
|
|
return response |
|
|
|
|
|
|
|
def _check_cluster_authenticated(self): |