### What problem does this PR solve? #1514 ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.9.0
| @@ -335,6 +335,8 @@ def upload(): | |||
| doc["parser_id"] = request.form.get("parser_id").strip() | |||
| if doc["type"] == FileType.VISUAL: | |||
| doc["parser_id"] = ParserType.PICTURE.value | |||
| if doc["type"] == FileType.AURAL: | |||
| doc["parser_id"] = ParserType.AUDIO.value | |||
| if re.search(r"\.(ppt|pptx|pages)$", filename): | |||
| doc["parser_id"] = ParserType.PRESENTATION.value | |||
| @@ -581,4 +583,4 @@ def completion_faq(): | |||
| return response | |||
| except Exception as e: | |||
| return server_error_response(e) | |||
| return server_error_response(e) | |||
| @@ -39,7 +39,7 @@ from api.utils import get_uuid | |||
| from api.utils.api_utils import construct_json_result, construct_error_response | |||
| from api.utils.api_utils import construct_result, validate_request | |||
| from api.utils.file_utils import filename_type, thumbnail | |||
| from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture | |||
| from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio | |||
| from rag.nlp import search | |||
| from rag.utils.es_conn import ELASTICSEARCH | |||
| from rag.utils.minio_conn import MINIO | |||
| @@ -377,6 +377,8 @@ def upload_documents(dataset_id): | |||
| } | |||
| if doc["type"] == FileType.VISUAL: | |||
| doc["parser_id"] = ParserType.PICTURE.value | |||
| if doc["type"] == FileType.AURAL: | |||
| doc["parser_id"] = ParserType.AUDIO.value | |||
| if re.search(r"\.(ppt|pptx|pages)$", filename): | |||
| doc["parser_id"] = ParserType.PRESENTATION.value | |||
| DocumentService.insert(doc) | |||
| @@ -648,6 +650,8 @@ def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id): | |||
| resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id)) | |||
| case "table": | |||
| table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id)) | |||
| case "audio": | |||
| audio.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id)) | |||
| case _: | |||
| return False | |||
| @@ -105,6 +105,8 @@ def upload(): | |||
| } | |||
| if doc["type"] == FileType.VISUAL: | |||
| doc["parser_id"] = ParserType.PICTURE.value | |||
| if doc["type"] == FileType.AURAL: | |||
| doc["parser_id"] = ParserType.AUDIO.value | |||
| if re.search(r"\.(ppt|pptx|pages)$", filename): | |||
| doc["parser_id"] = ParserType.PRESENTATION.value | |||
| DocumentService.insert(doc) | |||
| @@ -171,6 +173,8 @@ def web_crawl(): | |||
| } | |||
| if doc["type"] == FileType.VISUAL: | |||
| doc["parser_id"] = ParserType.PICTURE.value | |||
| if doc["type"] == FileType.AURAL: | |||
| doc["parser_id"] = ParserType.AUDIO.value | |||
| if re.search(r"\.(ppt|pptx|pages)$", filename): | |||
| doc["parser_id"] = ParserType.PRESENTATION.value | |||
| DocumentService.insert(doc) | |||
| @@ -84,6 +84,7 @@ class ParserType(StrEnum): | |||
| NAIVE = "naive" | |||
| PICTURE = "picture" | |||
| ONE = "one" | |||
| AUDIO = "audio" | |||
| class FileSource(StrEnum): | |||
| @@ -96,4 +97,4 @@ class CanvasType(StrEnum): | |||
| ChatBot = "chatbot" | |||
| DocBot = "docbot" | |||
| KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase" | |||
| KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase" | |||
| @@ -121,6 +121,8 @@ def init_llm_factory(): | |||
| LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"]) | |||
| LLMService.filter_delete([LLMService.model.fid == "QAnything"]) | |||
| TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"}) | |||
| TenantService.filter_update([1 == 1], { | |||
| "parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio"}) | |||
| ## insert openai two embedding models to the current openai user. | |||
| print("Start to insert 2 OpenAI embedding models...") | |||
| tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()]) | |||
| @@ -143,7 +145,7 @@ def init_llm_factory(): | |||
| """ | |||
| drop table llm; | |||
| drop table llm_factories; | |||
| update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One'; | |||
| update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio'; | |||
| alter table knowledgebase modify avatar longtext; | |||
| alter table user modify avatar longtext; | |||
| alter table dialog modify icon longtext; | |||
| @@ -15,7 +15,7 @@ | |||
| # | |||
| from api.db.services.user_service import TenantService | |||
| from api.settings import database_logger | |||
| from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel | |||
| from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel | |||
| from api.db import LLMType | |||
| from api.db.db_models import DB, UserTenant | |||
| from api.db.db_models import LLMFactories, LLM, TenantLLM | |||
| @@ -120,6 +120,14 @@ class TenantLLMService(CommonService): | |||
| return ChatModel[model_config["llm_factory"]]( | |||
| model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"]) | |||
| if llm_type == LLMType.SPEECH2TEXT: | |||
| if model_config["llm_factory"] not in Seq2txtModel: | |||
| return | |||
| return Seq2txtModel[model_config["llm_factory"]]( | |||
| model_config["api_key"], model_config["llm_name"], lang, | |||
| base_url=model_config["api_base"] | |||
| ) | |||
| @classmethod | |||
| @DB.connection_context() | |||
| def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None): | |||
| @@ -207,6 +215,14 @@ class LLMBundle(object): | |||
| "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id)) | |||
| return txt | |||
| def transcription(self, audio): | |||
| txt, used_tokens = self.mdl.transcription(audio) | |||
| if not TenantLLMService.increase_usage( | |||
| self.tenant_id, self.llm_type, used_tokens): | |||
| database_logger.error( | |||
| "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id)) | |||
| return txt | |||
| def chat(self, system, history, gen_conf): | |||
| txt, used_tokens = self.mdl.chat(system, history, gen_conf) | |||
| if not TenantLLMService.increase_usage( | |||
| @@ -131,7 +131,7 @@ IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"] | |||
| API_KEY = LLM.get("api_key", "") | |||
| PARSERS = LLM.get( | |||
| "parsers", | |||
| "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One") | |||
| "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio") | |||
| # distribution | |||
| DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False) | |||
| @@ -0,0 +1,42 @@ | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # | |||
| import io | |||
| import re | |||
| import numpy as np | |||
| from api.db import LLMType | |||
| from rag.nlp import rag_tokenizer | |||
| from api.db.services.llm_service import LLMBundle | |||
| from rag.nlp import tokenize | |||
| def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): | |||
| doc = { | |||
| "docnm_kwd": filename, | |||
| "title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)) | |||
| } | |||
| doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"]) | |||
| # is it English | |||
| eng = lang.lower() == "english" # is_english(sections) | |||
| try: | |||
| callback(0.1, "USE Sequence2Txt LLM to transcription the audio") | |||
| seq2txt_mdl = LLMBundle(tenant_id, LLMType.SPEECH2TEXT, lang=lang) | |||
| ans = seq2txt_mdl.transcription(binary) | |||
| callback(0.8, "Sequence2Txt LLM respond: %s ..." % ans[:32]) | |||
| tokenize(doc, ans, eng) | |||
| return [doc] | |||
| except Exception as e: | |||
| callback(prog=-1, msg=str(e)) | |||
| return [] | |||
| @@ -42,7 +42,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): | |||
| callback(0.4, "Use CV LLM to describe the picture.") | |||
| cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang) | |||
| ans = cv_mdl.describe(binary) | |||
| callback(0.8, "CV LLM respoond: %s ..." % ans[:32]) | |||
| callback(0.8, "CV LLM respond: %s ..." % ans[:32]) | |||
| txt += "\n" + ans | |||
| tokenize(doc, txt, eng) | |||
| return [doc] | |||
| @@ -45,7 +45,7 @@ from rag.nlp import search, rag_tokenizer | |||
| from io import BytesIO | |||
| import pandas as pd | |||
| from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one | |||
| from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one, audio | |||
| from api.db import LLMType, ParserType | |||
| from api.db.services.document_service import DocumentService | |||
| @@ -68,6 +68,7 @@ FACTORY = { | |||
| ParserType.RESUME.value: resume, | |||
| ParserType.PICTURE.value: picture, | |||
| ParserType.ONE.value: one, | |||
| ParserType.AUDIO.value: audio | |||
| } | |||