소스 검색

refactor: replace gevent threadpool with ProcessPoolExecutor in GPT2Tokenizer (#12316)

Signed-off-by: -LAN- <laipz8200@outlook.com>
tags/0.15.0
-LAN- 10 달 전
부모
커밋
6f5a8a33d9
No account linked to committer's email address
1개의 변경된 파일4개의 추가작업 그리고 4개의 파일을 삭제
  1. 4
    4
      api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py

+ 4
- 4
api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py 파일 보기

from concurrent.futures import ProcessPoolExecutor
from os.path import abspath, dirname, join from os.path import abspath, dirname, join
from threading import Lock from threading import Lock
from typing import Any, cast from typing import Any, cast


import gevent.threadpool # type: ignore
from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore


_tokenizer: Any = None _tokenizer: Any = None
_lock = Lock() _lock = Lock()
_pool = gevent.threadpool.ThreadPool(1)
_executor = ProcessPoolExecutor(max_workers=1)




class GPT2Tokenizer: class GPT2Tokenizer:


@staticmethod @staticmethod
def get_num_tokens(text: str) -> int: def get_num_tokens(text: str) -> int:
future = _pool.spawn(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
result = future.get(block=True)
future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
result = future.result()
return cast(int, result) return cast(int, result)


@staticmethod @staticmethod

Loading…
취소
저장