Browse Source

Refactor: use DatasourceType.XX.value instead of hardcoded (#25015)

Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
tags/1.8.1
Yongtao Huang 2 months ago
parent
commit
bc9efa7ea8
No account linked to committer's email address

+ 0
- 1
api/controllers/console/app/workflow.py View File

) )


app_model.workflow_id = workflow.id app_model.workflow_id = workflow.id
db.session.commit()


workflow_created_at = TimestampField().format(workflow.created_at) workflow_created_at = TimestampField().format(workflow.created_at)



+ 2
- 1
api/controllers/console/datasets/data_source.py View File

from controllers.console import api from controllers.console import api
from controllers.console.wraps import account_initialization_required, setup_required from controllers.console.wraps import account_initialization_required, setup_required
from core.indexing_runner import IndexingRunner from core.indexing_runner import IndexingRunner
from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.extractor.notion_extractor import NotionExtractor from core.rag.extractor.notion_extractor import NotionExtractor
from extensions.ext_database import db from extensions.ext_database import db
workspace_id = notion_info["workspace_id"] workspace_id = notion_info["workspace_id"]
for page in notion_info["pages"]: for page in notion_info["pages"]:
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="notion_import",
datasource_type=DatasourceType.NOTION.value,
notion_info={ notion_info={
"notion_workspace_id": workspace_id, "notion_workspace_id": workspace_id,
"notion_obj_id": page["page_id"], "notion_obj_id": page["page_id"],

+ 6
- 3
api/controllers/console/datasets/datasets.py View File

from core.plugin.entities.plugin import ModelProviderID from core.plugin.entities.plugin import ModelProviderID
from core.provider_manager import ProviderManager from core.provider_manager import ProviderManager
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db from extensions.ext_database import db
if file_details: if file_details:
for file_detail in file_details: for file_detail in file_details:
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="upload_file", upload_file=file_detail, document_model=args["doc_form"]
datasource_type=DatasourceType.FILE.value,
upload_file=file_detail,
document_model=args["doc_form"],
) )
extract_settings.append(extract_setting) extract_settings.append(extract_setting)
elif args["info_list"]["data_source_type"] == "notion_import": elif args["info_list"]["data_source_type"] == "notion_import":
workspace_id = notion_info["workspace_id"] workspace_id = notion_info["workspace_id"]
for page in notion_info["pages"]: for page in notion_info["pages"]:
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="notion_import",
datasource_type=DatasourceType.NOTION.value,
notion_info={ notion_info={
"notion_workspace_id": workspace_id, "notion_workspace_id": workspace_id,
"notion_obj_id": page["page_id"], "notion_obj_id": page["page_id"],
website_info_list = args["info_list"]["website_info_list"] website_info_list = args["info_list"]["website_info_list"]
for url in website_info_list["urls"]: for url in website_info_list["urls"]:
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="website_crawl",
datasource_type=DatasourceType.WEBSITE.value,
website_info={ website_info={
"provider": website_info_list["provider"], "provider": website_info_list["provider"],
"job_id": website_info_list["job_id"], "job_id": website_info_list["job_id"],

+ 5
- 4
api/controllers/console/datasets/datasets_document.py View File

from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.model_runtime.errors.invoke import InvokeAuthorizationError
from core.plugin.impl.exc import PluginDaemonClientSideError from core.plugin.impl.exc import PluginDaemonClientSideError
from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from extensions.ext_database import db from extensions.ext_database import db
from fields.document_fields import ( from fields.document_fields import (
raise NotFound("File not found.") raise NotFound("File not found.")


extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="upload_file", upload_file=file, document_model=document.doc_form
datasource_type=DatasourceType.FILE.value, upload_file=file, document_model=document.doc_form
) )


indexing_runner = IndexingRunner() indexing_runner = IndexingRunner()
raise NotFound("File not found.") raise NotFound("File not found.")


extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="upload_file", upload_file=file_detail, document_model=document.doc_form
datasource_type=DatasourceType.FILE.value, upload_file=file_detail, document_model=document.doc_form
) )
extract_settings.append(extract_setting) extract_settings.append(extract_setting)


elif document.data_source_type == "notion_import": elif document.data_source_type == "notion_import":
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="notion_import",
datasource_type=DatasourceType.NOTION.value,
notion_info={ notion_info={
"notion_workspace_id": data_source_info["notion_workspace_id"], "notion_workspace_id": data_source_info["notion_workspace_id"],
"notion_obj_id": data_source_info["notion_page_id"], "notion_obj_id": data_source_info["notion_page_id"],
extract_settings.append(extract_setting) extract_settings.append(extract_setting)
elif document.data_source_type == "website_crawl": elif document.data_source_type == "website_crawl":
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="website_crawl",
datasource_type=DatasourceType.WEBSITE.value,
website_info={ website_info={
"provider": data_source_info["provider"], "provider": data_source_info["provider"],
"job_id": data_source_info["job_id"], "job_id": data_source_info["job_id"],

+ 6
- 3
api/core/indexing_runner.py View File

from core.rag.cleaner.clean_processor import CleanProcessor from core.rag.cleaner.clean_processor import CleanProcessor
from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.docstore.dataset_docstore import DatasetDocumentStore from core.rag.docstore.dataset_docstore import DatasetDocumentStore
from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.extractor.entity.extract_setting import ExtractSetting
from core.rag.index_processor.constant.index_type import IndexType from core.rag.index_processor.constant.index_type import IndexType
from core.rag.index_processor.index_processor_base import BaseIndexProcessor from core.rag.index_processor.index_processor_base import BaseIndexProcessor


if file_detail: if file_detail:
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="upload_file", upload_file=file_detail, document_model=dataset_document.doc_form
datasource_type=DatasourceType.FILE.value,
upload_file=file_detail,
document_model=dataset_document.doc_form,
) )
text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"]) text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"])
elif dataset_document.data_source_type == "notion_import": elif dataset_document.data_source_type == "notion_import":
): ):
raise ValueError("no notion import info found") raise ValueError("no notion import info found")
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="notion_import",
datasource_type=DatasourceType.NOTION.value,
notion_info={ notion_info={
"notion_workspace_id": data_source_info["notion_workspace_id"], "notion_workspace_id": data_source_info["notion_workspace_id"],
"notion_obj_id": data_source_info["notion_page_id"], "notion_obj_id": data_source_info["notion_page_id"],
): ):
raise ValueError("no website import info found") raise ValueError("no website import info found")
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="website_crawl",
datasource_type=DatasourceType.WEBSITE.value,
website_info={ website_info={
"provider": data_source_info["provider"], "provider": data_source_info["provider"],
"job_id": data_source_info["job_id"], "job_id": data_source_info["job_id"],

+ 2
- 2
api/core/rag/extractor/extract_processor.py View File

cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False
) -> Union[list[Document], str]: ) -> Union[list[Document], str]:
extract_setting = ExtractSetting( extract_setting = ExtractSetting(
datasource_type="upload_file", upload_file=upload_file, document_model="text_model"
datasource_type=DatasourceType.FILE.value, upload_file=upload_file, document_model="text_model"
) )
if return_text: if return_text:
delimiter = "\n" delimiter = "\n"
# https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521 # https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}" file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
Path(file_path).write_bytes(response.content) Path(file_path).write_bytes(response.content)
extract_setting = ExtractSetting(datasource_type="upload_file", document_model="text_model")
extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE.value, document_model="text_model")
if return_text: if return_text:
delimiter = "\n" delimiter = "\n"
return delimiter.join( return delimiter.join(

Loading…
Cancel
Save