|
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
|
|
# See the License for the specific language governing permissions and |
|
|
# limitations under the License. |
|
|
# limitations under the License. |
|
|
# |
|
|
# |
|
|
|
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
|
|
|
from peewee import fn |
|
|
|
|
|
|
|
|
from api.db import StatusEnum, TenantPermission |
|
|
from api.db import StatusEnum, TenantPermission |
|
|
from api.db.db_models import Knowledgebase, DB, Tenant, User, UserTenant,Document |
|
|
|
|
|
|
|
|
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant |
|
|
from api.db.services.common_service import CommonService |
|
|
from api.db.services.common_service import CommonService |
|
|
from peewee import fn |
|
|
|
|
|
|
|
|
from api.utils import current_timestamp, datetime_format |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class KnowledgebaseService(CommonService): |
|
|
class KnowledgebaseService(CommonService): |
|
|
"""Service class for managing knowledge base operations. |
|
|
"""Service class for managing knowledge base operations. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
This class extends CommonService to provide specialized functionality for knowledge base |
|
|
This class extends CommonService to provide specialized functionality for knowledge base |
|
|
management, including document parsing status tracking, access control, and configuration |
|
|
management, including document parsing status tracking, access control, and configuration |
|
|
management. It handles operations such as listing, creating, updating, and deleting |
|
|
management. It handles operations such as listing, creating, updating, and deleting |
|
|
knowledge bases, as well as managing their associated documents and permissions. |
|
|
knowledge bases, as well as managing their associated documents and permissions. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
The class implements a comprehensive set of methods for: |
|
|
The class implements a comprehensive set of methods for: |
|
|
- Document parsing status verification |
|
|
- Document parsing status verification |
|
|
- Knowledge base access control |
|
|
- Knowledge base access control |
|
|
- Parser configuration management |
|
|
- Parser configuration management |
|
|
- Tenant-based knowledge base organization |
|
|
- Tenant-based knowledge base organization |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Attributes: |
|
|
Attributes: |
|
|
model: The Knowledgebase model class for database operations. |
|
|
model: The Knowledgebase model class for database operations. |
|
|
""" |
|
|
""" |
|
|
|
|
|
|
|
|
@DB.connection_context() |
|
|
@DB.connection_context() |
|
|
def accessible4deletion(cls, kb_id, user_id): |
|
|
def accessible4deletion(cls, kb_id, user_id): |
|
|
"""Check if a knowledge base can be deleted by a specific user. |
|
|
"""Check if a knowledge base can be deleted by a specific user. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
This method verifies whether a user has permission to delete a knowledge base |
|
|
This method verifies whether a user has permission to delete a knowledge base |
|
|
by checking if they are the creator of that knowledge base. |
|
|
by checking if they are the creator of that knowledge base. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
Args: |
|
|
kb_id (str): The unique identifier of the knowledge base to check. |
|
|
kb_id (str): The unique identifier of the knowledge base to check. |
|
|
user_id (str): The unique identifier of the user attempting the deletion. |
|
|
user_id (str): The unique identifier of the user attempting the deletion. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
Returns: |
|
|
bool: True if the user has permission to delete the knowledge base, |
|
|
bool: True if the user has permission to delete the knowledge base, |
|
|
False if the user doesn't have permission or the knowledge base doesn't exist. |
|
|
False if the user doesn't have permission or the knowledge base doesn't exist. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Example: |
|
|
Example: |
|
|
>>> KnowledgebaseService.accessible4deletion("kb123", "user456") |
|
|
>>> KnowledgebaseService.accessible4deletion("kb123", "user456") |
|
|
True |
|
|
True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Note: |
|
|
Note: |
|
|
- This method only checks creator permissions |
|
|
- This method only checks creator permissions |
|
|
- A return value of False can mean either: |
|
|
- A return value of False can mean either: |
|
|
|
|
|
|
|
|
@DB.connection_context() |
|
|
@DB.connection_context() |
|
|
def is_parsed_done(cls, kb_id): |
|
|
def is_parsed_done(cls, kb_id): |
|
|
# Check if all documents in the knowledge base have completed parsing |
|
|
# Check if all documents in the knowledge base have completed parsing |
|
|
# |
|
|
|
|
|
|
|
|
# |
|
|
# Args: |
|
|
# Args: |
|
|
# kb_id: Knowledge base ID |
|
|
# kb_id: Knowledge base ID |
|
|
# |
|
|
|
|
|
|
|
|
# |
|
|
# Returns: |
|
|
# Returns: |
|
|
# If all documents are parsed successfully, returns (True, None) |
|
|
# If all documents are parsed successfully, returns (True, None) |
|
|
# If any document is not fully parsed, returns (False, error_message) |
|
|
# If any document is not fully parsed, returns (False, error_message) |
|
|
from api.db import TaskStatus |
|
|
from api.db import TaskStatus |
|
|
from api.db.services.document_service import DocumentService |
|
|
from api.db.services.document_service import DocumentService |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Get knowledge base information |
|
|
# Get knowledge base information |
|
|
kbs = cls.query(id=kb_id) |
|
|
kbs = cls.query(id=kb_id) |
|
|
if not kbs: |
|
|
if not kbs: |
|
|
return False, "Knowledge base not found" |
|
|
return False, "Knowledge base not found" |
|
|
kb = kbs[0] |
|
|
kb = kbs[0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Get all documents in the knowledge base |
|
|
# Get all documents in the knowledge base |
|
|
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "") |
|
|
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Check parsing status of each document |
|
|
# Check parsing status of each document |
|
|
for doc in docs: |
|
|
for doc in docs: |
|
|
# If document is being parsed, don't allow chat creation |
|
|
# If document is being parsed, don't allow chat creation |
|
|
|
|
|
|
|
|
# If document is not yet parsed and has no chunks, don't allow chat creation |
|
|
# If document is not yet parsed and has no chunks, don't allow chat creation |
|
|
if doc['run'] == TaskStatus.UNSTART.value and doc['chunk_num'] == 0: |
|
|
if doc['run'] == TaskStatus.UNSTART.value and doc['chunk_num'] == 0: |
|
|
return False, f"Document '{doc['name']}' in dataset '{kb.name}' has not been parsed yet. Please parse all documents before starting a chat." |
|
|
return False, f"Document '{doc['name']}' in dataset '{kb.name}' has not been parsed yet. Please parse all documents before starting a chat." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return True, None |
|
|
return True, None |
|
|
|
|
|
|
|
|
@classmethod |
|
|
@classmethod |
|
|
@DB.connection_context() |
|
|
@DB.connection_context() |
|
|
def list_documents_by_ids(cls,kb_ids): |
|
|
|
|
|
|
|
|
def list_documents_by_ids(cls, kb_ids): |
|
|
# Get document IDs associated with given knowledge base IDs |
|
|
# Get document IDs associated with given knowledge base IDs |
|
|
# Args: |
|
|
# Args: |
|
|
# kb_ids: List of knowledge base IDs |
|
|
# kb_ids: List of knowledge base IDs |
|
|
# Returns: |
|
|
# Returns: |
|
|
# List of document IDs |
|
|
# List of document IDs |
|
|
doc_ids=cls.model.select(Document.id.alias("document_id")).join(Document,on=(cls.model.id == Document.kb_id)).where( |
|
|
|
|
|
|
|
|
doc_ids = cls.model.select(Document.id.alias("document_id")).join(Document, on=(cls.model.id == Document.kb_id)).where( |
|
|
cls.model.id.in_(kb_ids) |
|
|
cls.model.id.in_(kb_ids) |
|
|
) |
|
|
) |
|
|
doc_ids =list(doc_ids.dicts()) |
|
|
|
|
|
|
|
|
doc_ids = list(doc_ids.dicts()) |
|
|
doc_ids = [doc["document_id"] for doc in doc_ids] |
|
|
doc_ids = [doc["document_id"] for doc in doc_ids] |
|
|
return doc_ids |
|
|
return doc_ids |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cls.model.parser_config, |
|
|
cls.model.parser_config, |
|
|
cls.model.pagerank] |
|
|
cls.model.pagerank] |
|
|
kbs = cls.model.select(*fields).join(Tenant, on=( |
|
|
kbs = cls.model.select(*fields).join(Tenant, on=( |
|
|
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( |
|
|
|
|
|
|
|
|
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( |
|
|
(cls.model.id == kb_id), |
|
|
(cls.model.id == kb_id), |
|
|
(cls.model.status == StatusEnum.VALID.value) |
|
|
(cls.model.status == StatusEnum.VALID.value) |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
kbs = kbs.where( |
|
|
kbs = kbs.where( |
|
|
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == |
|
|
((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == |
|
|
TenantPermission.TEAM.value)) | ( |
|
|
TenantPermission.TEAM.value)) | ( |
|
|
cls.model.tenant_id == user_id)) |
|
|
|
|
|
|
|
|
cls.model.tenant_id == user_id)) |
|
|
& (cls.model.status == StatusEnum.VALID.value) |
|
|
& (cls.model.status == StatusEnum.VALID.value) |
|
|
) |
|
|
) |
|
|
if desc: |
|
|
if desc: |
|
|
|
|
|
|
|
|
# Boolean indicating accessibility |
|
|
# Boolean indicating accessibility |
|
|
docs = cls.model.select( |
|
|
docs = cls.model.select( |
|
|
cls.model.id).join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id) |
|
|
cls.model.id).join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id) |
|
|
).where(cls.model.id == kb_id, UserTenant.user_id == user_id).paginate(0, 1) |
|
|
|
|
|
|
|
|
).where(cls.model.id == kb_id, UserTenant.user_id == user_id).paginate(0, 1) |
|
|
docs = docs.dicts() |
|
|
docs = docs.dicts() |
|
|
if not docs: |
|
|
if not docs: |
|
|
return False |
|
|
return False |
|
|
|
|
|
|
|
|
# Returns: |
|
|
# Returns: |
|
|
# List containing knowledge base information |
|
|
# List containing knowledge base information |
|
|
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id) |
|
|
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id) |
|
|
).where(cls.model.id == kb_id, UserTenant.user_id == user_id).paginate(0, 1) |
|
|
|
|
|
|
|
|
).where(cls.model.id == kb_id, UserTenant.user_id == user_id).paginate(0, 1) |
|
|
kbs = kbs.dicts() |
|
|
kbs = kbs.dicts() |
|
|
return list(kbs) |
|
|
return list(kbs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Returns: |
|
|
# Returns: |
|
|
# List containing knowledge base information |
|
|
# List containing knowledge base information |
|
|
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id) |
|
|
kbs = cls.model.select().join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id) |
|
|
).where(cls.model.name == kb_name, UserTenant.user_id == user_id).paginate(0, 1) |
|
|
|
|
|
|
|
|
).where(cls.model.name == kb_name, UserTenant.user_id == user_id).paginate(0, 1) |
|
|
kbs = kbs.dicts() |
|
|
kbs = kbs.dicts() |
|
|
return list(kbs) |
|
|
return list(kbs) |
|
|
|
|
|
|
|
|
|
|
|
@classmethod |
|
|
|
|
|
@DB.connection_context() |
|
|
|
|
|
def atomic_increase_doc_num_by_id(cls, kb_id): |
|
|
|
|
|
data = {} |
|
|
|
|
|
data["update_time"] = current_timestamp() |
|
|
|
|
|
data["update_date"] = datetime_format(datetime.now()) |
|
|
|
|
|
data["doc_num"] = cls.model.doc_num + 1 |
|
|
|
|
|
num = cls.model.update(data).where(cls.model.id == kb_id).execute() |
|
|
|
|
|
return num |