2 meses atrás · 1b3860d012
--- a/.env.example
+++ b/.env.example
--- a/.github/ISSUE_TEMPLATE/chore.yaml
+++ b/.github/ISSUE_TEMPLATE/chore.yaml
@@ -4,6 +4,23 @@ title: "[Chore/Refactor] "
 labels:
  - refactor
 body:
  - type: checkboxes
    attributes:
      label: Self Checks
      description: "To make sure we get to you in time, please check the following :)"
      options:
        - label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
          required: true
        - label: This is only for refactoring, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
          required: true
        - label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
          required: true
        - label: I confirm that I am using English to submit this report, otherwise it will be closed.
          required: true
        - label: 【中文用户 & Non English User】请使用英语提交，否则会被关闭 ：）
          required: true
        - label: "Please do not modify this template :) and fill in all the required fields."
          required: true
  - type: textarea
    id: description
    attributes:
--- a/.github/workflows/translate-i18n-base-on-english.yml
+++ b/.github/workflows/translate-i18n-base-on-english.yml
@@ -5,6 +5,10 @@ on:
    types: [closed]
    branches: [main]

 permissions:
  contents: write
  pull-requests: write

 jobs:
  check-and-update:
    if: github.event.pull_request.merged == true
@@ -16,7 +20,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 2 # last 2 commits
          persist-credentials: false
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Check for file changes in i18n/en-US
        id: check_files
@@ -49,7 +53,7 @@ jobs:
        if: env.FILES_CHANGED == 'true'
        run: pnpm install --frozen-lockfile

      - name: Run npm script
      - name: Generate i18n translations
        if: env.FILES_CHANGED == 'true'
        run: pnpm run auto-gen-i18n

@@ -57,6 +61,7 @@ jobs:
        if: env.FILES_CHANGED == 'true'
        uses: peter-evans/create-pull-request@v6
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          commit-message: Update i18n files based on en-US changes
          title: 'chore: translate i18n files'
          body: This PR was automatically created to update i18n files based on changes in en-US locale.
--- a/.gitignore
+++ b/.gitignore
@@ -215,3 +215,10 @@ mise.toml
 # AI Assistant
 .roo/
 api/.env.backup

 # Clickzetta test credentials
 .env.clickzetta
 .env.clickzetta.test

 # Clickzetta plugin development folder (keep local, ignore for PR)
 clickzetta/
--- a/api/configs/middleware/__init__.py
+++ b/api/configs/middleware/__init__.py
@@ -10,6 +10,7 @@ from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
 from .storage.amazon_s3_storage_config import S3StorageConfig
 from .storage.azure_blob_storage_config import AzureBlobStorageConfig
 from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
 from .storage.clickzetta_volume_storage_config import ClickZettaVolumeStorageConfig
 from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
 from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
 from .storage.oci_storage_config import OCIStorageConfig
@@ -20,6 +21,7 @@ from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
 from .vdb.analyticdb_config import AnalyticdbConfig
 from .vdb.baidu_vector_config import BaiduVectorDBConfig
 from .vdb.chroma_config import ChromaConfig
 from .vdb.clickzetta_config import ClickzettaConfig
 from .vdb.couchbase_config import CouchbaseConfig
 from .vdb.elasticsearch_config import ElasticsearchConfig
 from .vdb.huawei_cloud_config import HuaweiCloudConfig
@@ -52,6 +54,7 @@ class StorageConfig(BaseSettings):
        "aliyun-oss",
        "azure-blob",
        "baidu-obs",
        "clickzetta-volume",
        "google-storage",
        "huawei-obs",
        "oci-storage",
@@ -61,8 +64,9 @@ class StorageConfig(BaseSettings):
        "local",
    ] = Field(
        description="Type of storage to use."
        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
        "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', "
        "'clickzetta-volume', 'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
        "'volcengine-tos', 'supabase'. Default is 'opendal'.",
        default="opendal",
    )

@@ -303,6 +307,7 @@ class MiddlewareConfig(
    AliyunOSSStorageConfig,
    AzureBlobStorageConfig,
    BaiduOBSStorageConfig,
    ClickZettaVolumeStorageConfig,
    GoogleCloudStorageConfig,
    HuaweiCloudOBSStorageConfig,
    OCIStorageConfig,
@@ -315,6 +320,7 @@ class MiddlewareConfig(
    VectorStoreConfig,
    AnalyticdbConfig,
    ChromaConfig,
    ClickzettaConfig,
    HuaweiCloudConfig,
    MilvusConfig,
    MyScaleConfig,
--- a/api/configs/middleware/storage/clickzetta_volume_storage_config.py
+++ b/api/configs/middleware/storage/clickzetta_volume_storage_config.py
@@ -0,0 +1,65 @@
 """ClickZetta Volume Storage Configuration"""

 from typing import Optional

 from pydantic import Field
 from pydantic_settings import BaseSettings


 class ClickZettaVolumeStorageConfig(BaseSettings):
    """Configuration for ClickZetta Volume storage."""

    CLICKZETTA_VOLUME_USERNAME: Optional[str] = Field(
        description="Username for ClickZetta Volume authentication",
        default=None,
    )

    CLICKZETTA_VOLUME_PASSWORD: Optional[str] = Field(
        description="Password for ClickZetta Volume authentication",
        default=None,
    )

    CLICKZETTA_VOLUME_INSTANCE: Optional[str] = Field(
        description="ClickZetta instance identifier",
        default=None,
    )

    CLICKZETTA_VOLUME_SERVICE: str = Field(
        description="ClickZetta service endpoint",
        default="api.clickzetta.com",
    )

    CLICKZETTA_VOLUME_WORKSPACE: str = Field(
        description="ClickZetta workspace name",
        default="quick_start",
    )

    CLICKZETTA_VOLUME_VCLUSTER: str = Field(
        description="ClickZetta virtual cluster name",
        default="default_ap",
    )

    CLICKZETTA_VOLUME_SCHEMA: str = Field(
        description="ClickZetta schema name",
        default="dify",
    )

    CLICKZETTA_VOLUME_TYPE: str = Field(
        description="ClickZetta volume type (table|user|external)",
        default="user",
    )

    CLICKZETTA_VOLUME_NAME: Optional[str] = Field(
        description="ClickZetta volume name for external volumes",
        default=None,
    )

    CLICKZETTA_VOLUME_TABLE_PREFIX: str = Field(
        description="Prefix for ClickZetta volume table names",
        default="dataset_",
    )

    CLICKZETTA_VOLUME_DIFY_PREFIX: str = Field(
        description="Directory prefix for User Volume to organize Dify files",
        default="dify_km",
    )
--- a/api/configs/middleware/vdb/clickzetta_config.py
+++ b/api/configs/middleware/vdb/clickzetta_config.py
@@ -0,0 +1,69 @@
 from typing import Optional

 from pydantic import BaseModel, Field


 class ClickzettaConfig(BaseModel):
    """
    Clickzetta Lakehouse vector database configuration
    """

    CLICKZETTA_USERNAME: Optional[str] = Field(
        description="Username for authenticating with Clickzetta Lakehouse",
        default=None,
    )

    CLICKZETTA_PASSWORD: Optional[str] = Field(
        description="Password for authenticating with Clickzetta Lakehouse",
        default=None,
    )

    CLICKZETTA_INSTANCE: Optional[str] = Field(
        description="Clickzetta Lakehouse instance ID",
        default=None,
    )

    CLICKZETTA_SERVICE: Optional[str] = Field(
        description="Clickzetta API service endpoint (e.g., 'api.clickzetta.com')",
        default="api.clickzetta.com",
    )

    CLICKZETTA_WORKSPACE: Optional[str] = Field(
        description="Clickzetta workspace name",
        default="default",
    )

    CLICKZETTA_VCLUSTER: Optional[str] = Field(
        description="Clickzetta virtual cluster name",
        default="default_ap",
    )

    CLICKZETTA_SCHEMA: Optional[str] = Field(
        description="Database schema name in Clickzetta",
        default="public",
    )

    CLICKZETTA_BATCH_SIZE: Optional[int] = Field(
        description="Batch size for bulk insert operations",
        default=100,
    )

    CLICKZETTA_ENABLE_INVERTED_INDEX: Optional[bool] = Field(
        description="Enable inverted index for full-text search capabilities",
        default=True,
    )

    CLICKZETTA_ANALYZER_TYPE: Optional[str] = Field(
        description="Analyzer type for full-text search: keyword, english, chinese, unicode",
        default="chinese",
    )

    CLICKZETTA_ANALYZER_MODE: Optional[str] = Field(
        description="Analyzer mode for tokenization: max_word (fine-grained) or smart (intelligent)",
        default="smart",
    )

    CLICKZETTA_VECTOR_DISTANCE_FUNCTION: Optional[str] = Field(
        description="Distance function for vector similarity: l2_distance or cosine_distance",
        default="cosine_distance",
    )
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -694,6 +694,7 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.HUAWEI_CLOUD
                | VectorType.TENCENT
                | VectorType.MATRIXONE
                | VectorType.CLICKZETTA
            ):
                return {
                    "retrieval_method": [
@@ -742,6 +743,7 @@ class DatasetRetrievalSettingMockApi(Resource):
                | VectorType.TENCENT
                | VectorType.HUAWEI_CLOUD
                | VectorType.MATRIXONE
                | VectorType.CLICKZETTA
            ):
                return {
                    "retrieval_method": [
--- a/api/controllers/console/files.py
+++ b/api/controllers/console/files.py
@@ -49,7 +49,6 @@ class FileApi(Resource):
    @marshal_with(file_fields)
    @cloud_edition_billing_resource_check("documents")
    def post(self):
        file = request.files["file"]
        source_str = request.form.get("source")
        source: Literal["datasets"] | None = "datasets" if source_str == "datasets" else None

@@ -58,6 +57,7 @@ class FileApi(Resource):

        if len(request.files) > 1:
            raise TooManyFilesError()
        file = request.files["file"]

        if not file.filename:
            raise FilenameNotExistsError
--- a/api/controllers/console/workspace/workspace.py
+++ b/api/controllers/console/workspace/workspace.py
@@ -191,9 +191,6 @@ class WebappLogoWorkspaceApi(Resource):
    @account_initialization_required
    @cloud_edition_billing_resource_check("workspace_custom")
    def post(self):
        # get file from request
        file = request.files["file"]

        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()
@@ -201,6 +198,8 @@ class WebappLogoWorkspaceApi(Resource):
        if len(request.files) > 1:
            raise TooManyFilesError()

        # get file from request
        file = request.files["file"]
        if not file.filename:
            raise FilenameNotExistsError

--- a/api/controllers/service_api/app/file.py
+++ b/api/controllers/service_api/app/file.py
@@ -20,18 +20,17 @@ class FileApi(Resource):
    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
    @marshal_with(file_fields)
    def post(self, app_model: App, end_user: EndUser):
        file = request.files["file"]

        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()

        if not file.mimetype:
            raise UnsupportedFileTypeError()

        if len(request.files) > 1:
            raise TooManyFilesError()

        file = request.files["file"]
        if not file.mimetype:
            raise UnsupportedFileTypeError()

        if not file.filename:
            raise FilenameNotExistsError

--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -234,8 +234,6 @@ class DocumentAddByFileApi(DatasetApiResource):
                args["retrieval_model"].get("reranking_model").get("reranking_model_name"),
            )

        # save file info
        file = request.files["file"]
        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()
@@ -243,6 +241,8 @@ class DocumentAddByFileApi(DatasetApiResource):
        if len(request.files) > 1:
            raise TooManyFilesError()

        # save file info
        file = request.files["file"]
        if not file.filename:
            raise FilenameNotExistsError

--- a/api/controllers/web/files.py
+++ b/api/controllers/web/files.py
@@ -12,18 +12,17 @@ from services.file_service import FileService
 class FileApi(WebApiResource):
    @marshal_with(file_fields)
    def post(self, app_model, end_user):
        file = request.files["file"]
        source = request.form.get("source")

        if "file" not in request.files:
            raise NoFileUploadedError()

        if len(request.files) > 1:
            raise TooManyFilesError()

        file = request.files["file"]
        if not file.filename:
            raise FilenameNotExistsError

        source = request.form.get("source")
        if source not in ("datasets", None):
            source = None

--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -121,9 +121,8 @@ class TokenBufferMemory:
        curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)

        if curr_message_tokens > max_token_limit:
            pruned_memory = []
            while curr_message_tokens > max_token_limit and len(prompt_messages) > 1:
                pruned_memory.append(prompt_messages.pop(0))
                prompt_messages.pop(0)
                curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)

        return prompt_messages
--- a/api/core/rag/datasource/vdb/clickzetta/README.md
+++ b/api/core/rag/datasource/vdb/clickzetta/README.md
@@ -0,0 +1,190 @@
 # Clickzetta Vector Database Integration

 This module provides integration with Clickzetta Lakehouse as a vector database for Dify.

 ## Features

 - **Vector Storage**: Store and retrieve high-dimensional vectors using Clickzetta's native VECTOR type
 - **Vector Search**: Efficient similarity search using HNSW algorithm
 - **Full-Text Search**: Leverage Clickzetta's inverted index for powerful text search capabilities
 - **Hybrid Search**: Combine vector similarity and full-text search for better results
 - **Multi-language Support**: Built-in support for Chinese, English, and Unicode text processing
 - **Scalable**: Leverage Clickzetta's distributed architecture for large-scale deployments

 ## Configuration

 ### Required Environment Variables

 All seven configuration parameters are required:

 ```bash
 # Authentication
 CLICKZETTA_USERNAME=your_username
 CLICKZETTA_PASSWORD=your_password

 # Instance configuration
 CLICKZETTA_INSTANCE=your_instance_id
 CLICKZETTA_SERVICE=api.clickzetta.com
 CLICKZETTA_WORKSPACE=your_workspace
 CLICKZETTA_VCLUSTER=your_vcluster
 CLICKZETTA_SCHEMA=your_schema
 ```

 ### Optional Configuration

 ```bash
 # Batch processing
 CLICKZETTA_BATCH_SIZE=100

 # Full-text search configuration
 CLICKZETTA_ENABLE_INVERTED_INDEX=true
 CLICKZETTA_ANALYZER_TYPE=chinese  # Options: keyword, english, chinese, unicode
 CLICKZETTA_ANALYZER_MODE=smart    # Options: max_word, smart

 # Vector search configuration
 CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance  # Options: l2_distance, cosine_distance
 ```

 ## Usage

 ### 1. Set Clickzetta as the Vector Store

 In your Dify configuration, set:

 ```bash
 VECTOR_STORE=clickzetta
 ```

 ### 2. Table Structure

 Clickzetta will automatically create tables with the following structure:

 ```sql
 CREATE TABLE <collection_name> (
    id STRING NOT NULL,
    content STRING NOT NULL,
    metadata JSON,
    vector VECTOR(FLOAT, <dimension>) NOT NULL,
    PRIMARY KEY (id)
 );

 -- Vector index for similarity search
 CREATE VECTOR INDEX idx_<collection_name>_vec
 ON TABLE <schema>.<collection_name>(vector) 
 PROPERTIES (
    "distance.function" = "cosine_distance",
    "scalar.type" = "f32"
 );

 -- Inverted index for full-text search (if enabled)
 CREATE INVERTED INDEX idx_<collection_name>_text
 ON <schema>.<collection_name>(content)
 PROPERTIES (
    "analyzer" = "chinese",
    "mode" = "smart"
 );
 ```

 ## Full-Text Search Capabilities

 Clickzetta supports advanced full-text search with multiple analyzers:

 ### Analyzer Types

 1. **keyword**: No tokenization, treats the entire string as a single token
   - Best for: Exact matching, IDs, codes

 2. **english**: Designed for English text
   - Features: Recognizes ASCII letters and numbers, converts to lowercase
   - Best for: English content

 3. **chinese**: Chinese text tokenizer
   - Features: Recognizes Chinese and English characters, removes punctuation
   - Best for: Chinese or mixed Chinese-English content

 4. **unicode**: Multi-language tokenizer based on Unicode
   - Features: Recognizes text boundaries in multiple languages
   - Best for: Multi-language content

 ### Analyzer Modes

 - **max_word**: Fine-grained tokenization (more tokens)
 - **smart**: Intelligent tokenization (balanced)

 ### Full-Text Search Functions

 - `MATCH_ALL(column, query)`: All terms must be present
 - `MATCH_ANY(column, query)`: At least one term must be present
 - `MATCH_PHRASE(column, query)`: Exact phrase matching
 - `MATCH_PHRASE_PREFIX(column, query)`: Phrase prefix matching
 - `MATCH_REGEXP(column, pattern)`: Regular expression matching

 ## Performance Optimization

 ### Vector Search

 1. **Adjust exploration factor** for accuracy vs speed trade-off:
   ```sql
   SET cz.vector.index.search.ef=64;
   ```

 2. **Use appropriate distance functions**:
   - `cosine_distance`: Best for normalized embeddings (e.g., from language models)
   - `l2_distance`: Best for raw feature vectors

 ### Full-Text Search

 1. **Choose the right analyzer**:
   - Use `keyword` for exact matching
   - Use language-specific analyzers for better tokenization

 2. **Combine with vector search**:
   - Pre-filter with full-text search for better performance
   - Use hybrid search for improved relevance

 ## Troubleshooting

 ### Connection Issues

 1. Verify all 7 required configuration parameters are set
 2. Check network connectivity to Clickzetta service
 3. Ensure the user has proper permissions on the schema

 ### Search Performance

 1. Verify vector index exists:
   ```sql
   SHOW INDEX FROM <schema>.<table_name>;
   ```

 2. Check if vector index is being used:
   ```sql
   EXPLAIN SELECT ... WHERE l2_distance(...) < threshold;
   ```
   Look for `vector_index_search_type` in the execution plan.

 ### Full-Text Search Not Working

 1. Verify inverted index is created
 2. Check analyzer configuration matches your content language
 3. Use `TOKENIZE()` function to test tokenization:
   ```sql
   SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart'));
   ```

 ## Limitations

 1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns
 2. Full-text search relevance scores are not provided by Clickzetta
 3. Inverted index creation may fail for very large existing tables (continue without error)
 4. Index naming constraints:
   - Index names must be unique within a schema
   - Only one vector index can be created per column
   - The implementation uses timestamps to ensure unique index names
 5. A column can only have one vector index at a time

 ## References

 - [Clickzetta Vector Search Documentation](../../../../../../../yunqidoc/cn_markdown_20250526/vector-search.md)
 - [Clickzetta Inverted Index Documentation](../../../../../../../yunqidoc/cn_markdown_20250526/inverted-index.md)
 - [Clickzetta SQL Functions](../../../../../../../yunqidoc/cn_markdown_20250526/sql_functions/)
--- a/api/core/rag/datasource/vdb/clickzetta/__init__.py
+++ b/api/core/rag/datasource/vdb/clickzetta/__init__.py
@@ -0,0 +1 @@
 # Clickzetta Vector Database Integration for Dify
--- a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
+++ b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
@@ -0,0 +1,834 @@
 import json
 import logging
 import queue
 import threading
 import uuid
 from typing import Any, Optional, TYPE_CHECKING

 import clickzetta  # type: ignore
 from pydantic import BaseModel, model_validator

 if TYPE_CHECKING:
    from clickzetta import Connection

 from configs import dify_config
 from core.rag.datasource.vdb.field import Field
 from core.rag.datasource.vdb.vector_base import BaseVector
 from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
 from core.rag.embedding.embedding_base import Embeddings
 from core.rag.models.document import Document
 from models.dataset import Dataset

 logger = logging.getLogger(__name__)


 # ClickZetta Lakehouse Vector Database Configuration


 class ClickzettaConfig(BaseModel):
    """
    Configuration class for Clickzetta connection.
    """

    username: str
    password: str
    instance: str
    service: str = "api.clickzetta.com"
    workspace: str = "quick_start"
    vcluster: str = "default_ap"
    schema_name: str = "dify"  # Renamed to avoid shadowing BaseModel.schema
    # Advanced settings
    batch_size: int = 20  # Reduced batch size to avoid large SQL statements
    enable_inverted_index: bool = True  # Enable inverted index for full-text search
    analyzer_type: str = "chinese"  # Analyzer type for full-text search: keyword, english, chinese, unicode
    analyzer_mode: str = "smart"  # Analyzer mode: max_word, smart
    vector_distance_function: str = "cosine_distance"  # l2_distance or cosine_distance

    @model_validator(mode="before")
    @classmethod
    def validate_config(cls, values: dict) -> dict:
        """
        Validate the configuration values.
        """
        if not values.get("username"):
            raise ValueError("config CLICKZETTA_USERNAME is required")
        if not values.get("password"):
            raise ValueError("config CLICKZETTA_PASSWORD is required")
        if not values.get("instance"):
            raise ValueError("config CLICKZETTA_INSTANCE is required")
        if not values.get("service"):
            raise ValueError("config CLICKZETTA_SERVICE is required")
        if not values.get("workspace"):
            raise ValueError("config CLICKZETTA_WORKSPACE is required")
        if not values.get("vcluster"):
            raise ValueError("config CLICKZETTA_VCLUSTER is required")
        if not values.get("schema_name"):
            raise ValueError("config CLICKZETTA_SCHEMA is required")
        return values


 class ClickzettaVector(BaseVector):
    """
    Clickzetta vector storage implementation.
    """

    # Class-level write queue and lock for serializing writes
    _write_queue: Optional[queue.Queue] = None
    _write_thread: Optional[threading.Thread] = None
    _write_lock = threading.Lock()
    _shutdown = False

    def __init__(self, collection_name: str, config: ClickzettaConfig):
        super().__init__(collection_name)
        self._config = config
        self._table_name = collection_name.replace("-", "_").lower()  # Ensure valid table name
        self._connection: Optional["Connection"] = None
        self._init_connection()
        self._init_write_queue()

    def _init_connection(self):
        """Initialize Clickzetta connection."""
        self._connection = clickzetta.connect(
            username=self._config.username,
            password=self._config.password,
            instance=self._config.instance,
            service=self._config.service,
            workspace=self._config.workspace,
            vcluster=self._config.vcluster,
            schema=self._config.schema_name
        )

        # Set session parameters for better string handling and performance optimization
        if self._connection is not None:
            with self._connection.cursor() as cursor:
                # Use quote mode for string literal escaping to handle quotes better
                cursor.execute("SET cz.sql.string.literal.escape.mode = 'quote'")
                logger.info("Set string literal escape mode to 'quote' for better quote handling")

                # Performance optimization hints for vector operations
                self._set_performance_hints(cursor)

    def _set_performance_hints(self, cursor):
        """Set ClickZetta performance optimization hints for vector operations."""
        try:
            # Performance optimization hints for vector operations and query processing
            performance_hints = [
                # Vector index optimization
                "SET cz.storage.parquet.vector.index.read.memory.cache = true",
                "SET cz.storage.parquet.vector.index.read.local.cache = false",

                # Query optimization
                "SET cz.sql.table.scan.push.down.filter = true",
                "SET cz.sql.table.scan.enable.ensure.filter = true",
                "SET cz.storage.always.prefetch.internal = true",
                "SET cz.optimizer.generate.columns.always.valid = true",
                "SET cz.sql.index.prewhere.enabled = true",

                # Storage optimization
                "SET cz.storage.parquet.enable.io.prefetch = false",
                "SET cz.optimizer.enable.mv.rewrite = false",
                "SET cz.sql.dump.as.lz4 = true",
                "SET cz.optimizer.limited.optimization.naive.query = true",
                "SET cz.sql.table.scan.enable.push.down.log = false",
                "SET cz.storage.use.file.format.local.stats = false",
                "SET cz.storage.local.file.object.cache.level = all",

                # Job execution optimization
                "SET cz.sql.job.fast.mode = true",
                "SET cz.storage.parquet.non.contiguous.read = true",
                "SET cz.sql.compaction.after.commit = true"
            ]

            for hint in performance_hints:
                cursor.execute(hint)

            logger.info("Applied %d performance optimization hints for ClickZetta vector operations", len(performance_hints))

        except Exception:
            # Catch any errors setting performance hints but continue with defaults
            logger.exception("Failed to set some performance hints, continuing with default settings")

    @classmethod
    def _init_write_queue(cls):
        """Initialize the write queue and worker thread."""
        with cls._write_lock:
            if cls._write_queue is None:
                cls._write_queue = queue.Queue()
                cls._write_thread = threading.Thread(target=cls._write_worker, daemon=True)
                cls._write_thread.start()
                logger.info("Started Clickzetta write worker thread")

    @classmethod
    def _write_worker(cls):
        """Worker thread that processes write tasks sequentially."""
        while not cls._shutdown:
            try:
                # Get task from queue with timeout
                if cls._write_queue is not None:
                    task = cls._write_queue.get(timeout=1)
                    if task is None:  # Shutdown signal
                        break

                    # Execute the write task
                    func, args, kwargs, result_queue = task
                    try:
                        result = func(*args, **kwargs)
                        result_queue.put((True, result))
                    except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
                        logger.exception("Write task failed")
                        result_queue.put((False, e))
                    finally:
                        cls._write_queue.task_done()
                else:
                    break
            except queue.Empty:
                continue
            except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
                logger.exception("Write worker error")

    def _execute_write(self, func, *args, **kwargs):
        """Execute a write operation through the queue."""
        if ClickzettaVector._write_queue is None:
            raise RuntimeError("Write queue not initialized")

        result_queue: queue.Queue[tuple[bool, Any]] = queue.Queue()
        ClickzettaVector._write_queue.put((func, args, kwargs, result_queue))

        # Wait for result
        success, result = result_queue.get()
        if not success:
            raise result
        return result

    def get_type(self) -> str:
        """Return the vector database type."""
        return "clickzetta"

    def _ensure_connection(self) -> "Connection":
        """Ensure connection is available and return it."""
        if self._connection is None:
            raise RuntimeError("Database connection not initialized")
        return self._connection

    def _table_exists(self) -> bool:
        """Check if the table exists."""
        try:
            connection = self._ensure_connection()
            with connection.cursor() as cursor:
                cursor.execute(f"DESC {self._config.schema_name}.{self._table_name}")
                return True
        except (RuntimeError, ValueError) as e:
            if "table or view not found" in str(e).lower():
                return False
            else:
                # Re-raise if it's a different error
                raise

    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
        """Create the collection and add initial documents."""
        # Execute table creation through write queue to avoid concurrent conflicts
        self._execute_write(self._create_table_and_indexes, embeddings)

        # Add initial texts
        if texts:
            self.add_texts(texts, embeddings, **kwargs)

    def _create_table_and_indexes(self, embeddings: list[list[float]]):
        """Create table and indexes (executed in write worker thread)."""
        # Check if table already exists to avoid unnecessary index creation
        if self._table_exists():
            logger.info("Table %s.%s already exists, skipping creation", self._config.schema_name, self._table_name)
            return

        # Create table with vector and metadata columns
        dimension = len(embeddings[0]) if embeddings else 768

        create_table_sql = f"""
        CREATE TABLE IF NOT EXISTS {self._config.schema_name}.{self._table_name} (
            id STRING NOT NULL COMMENT 'Unique document identifier',
            {Field.CONTENT_KEY.value} STRING NOT NULL COMMENT 'Document text content for search and retrieval',
            {Field.METADATA_KEY.value} JSON COMMENT 'Document metadata including source, type, and other attributes',
            {Field.VECTOR.value} VECTOR(FLOAT, {dimension}) NOT NULL COMMENT
                'High-dimensional embedding vector for semantic similarity search',
            PRIMARY KEY (id)
        ) COMMENT 'Dify RAG knowledge base vector storage table for document embeddings and content'
        """

        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            cursor.execute(create_table_sql)
            logger.info("Created table %s.%s", self._config.schema_name, self._table_name)

            # Create vector index
            self._create_vector_index(cursor)

            # Create inverted index for full-text search if enabled
            if self._config.enable_inverted_index:
                self._create_inverted_index(cursor)

    def _create_vector_index(self, cursor):
        """Create HNSW vector index for similarity search."""
        # Use a fixed index name based on table and column name
        index_name = f"idx_{self._table_name}_vector"

        # First check if an index already exists on this column
        try:
            cursor.execute(f"SHOW INDEX FROM {self._config.schema_name}.{self._table_name}")
            existing_indexes = cursor.fetchall()
            for idx in existing_indexes:
                # Check if vector index already exists on the embedding column
                if Field.VECTOR.value in str(idx).lower():
                    logger.info("Vector index already exists on column %s", Field.VECTOR.value)
                    return
        except (RuntimeError, ValueError) as e:
            logger.warning("Failed to check existing indexes: %s", e)

        index_sql = f"""
        CREATE VECTOR INDEX IF NOT EXISTS {index_name}
        ON TABLE {self._config.schema_name}.{self._table_name}({Field.VECTOR.value})
        PROPERTIES (
            "distance.function" = "{self._config.vector_distance_function}",
            "scalar.type" = "f32",
            "m" = "16",
            "ef.construction" = "128"
        )
        """
        try:
            cursor.execute(index_sql)
            logger.info("Created vector index: %s", index_name)
        except (RuntimeError, ValueError) as e:
            error_msg = str(e).lower()
            if ("already exists" in error_msg or
                "already has index" in error_msg or
                "with the same type" in error_msg):
                logger.info("Vector index already exists: %s", e)
            else:
                logger.exception("Failed to create vector index")
                raise

    def _create_inverted_index(self, cursor):
        """Create inverted index for full-text search."""
        # Use a fixed index name based on table name to avoid duplicates
        index_name = f"idx_{self._table_name}_text"

        # Check if an inverted index already exists on this column
        try:
            cursor.execute(f"SHOW INDEX FROM {self._config.schema_name}.{self._table_name}")
            existing_indexes = cursor.fetchall()
            for idx in existing_indexes:
                idx_str = str(idx).lower()
                # More precise check: look for inverted index specifically on the content column
                if ("inverted" in idx_str and
                    Field.CONTENT_KEY.value.lower() in idx_str and
                    (index_name.lower() in idx_str or f"idx_{self._table_name}_text" in idx_str)):
                    logger.info("Inverted index already exists on column %s: %s", Field.CONTENT_KEY.value, idx)
                    return
        except (RuntimeError, ValueError) as e:
            logger.warning("Failed to check existing indexes: %s", e)

        index_sql = f"""
        CREATE INVERTED INDEX IF NOT EXISTS {index_name}
        ON TABLE {self._config.schema_name}.{self._table_name} ({Field.CONTENT_KEY.value})
        PROPERTIES (
            "analyzer" = "{self._config.analyzer_type}",
            "mode" = "{self._config.analyzer_mode}"
        )
        """
        try:
            cursor.execute(index_sql)
            logger.info("Created inverted index: %s", index_name)
        except (RuntimeError, ValueError) as e:
            error_msg = str(e).lower()
            # Handle ClickZetta specific error messages
            if (("already exists" in error_msg or
                "already has index" in error_msg or
                "with the same type" in error_msg or
                "cannot create inverted index" in error_msg) and
                "already has index" in error_msg):
                logger.info("Inverted index already exists on column %s", Field.CONTENT_KEY.value)
                # Try to get the existing index name for logging
                try:
                    cursor.execute(f"SHOW INDEX FROM {self._config.schema_name}.{self._table_name}")
                    existing_indexes = cursor.fetchall()
                    for idx in existing_indexes:
                        if "inverted" in str(idx).lower() and Field.CONTENT_KEY.value.lower() in str(idx).lower():
                            logger.info("Found existing inverted index: %s", idx)
                            break
                except (RuntimeError, ValueError):
                    pass
            else:
                logger.warning("Failed to create inverted index: %s", e)
                # Continue without inverted index - full-text search will fall back to LIKE


    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
        """Add documents with embeddings to the collection."""
        if not documents:
            return

        batch_size = self._config.batch_size
        total_batches = (len(documents) + batch_size - 1) // batch_size

        for i in range(0, len(documents), batch_size):
            batch_docs = documents[i:i + batch_size]
            batch_embeddings = embeddings[i:i + batch_size]

            # Execute batch insert through write queue
            self._execute_write(self._insert_batch, batch_docs, batch_embeddings, i, batch_size, total_batches)

    def _insert_batch(self, batch_docs: list[Document], batch_embeddings: list[list[float]],
                      batch_index: int, batch_size: int, total_batches: int):
        """Insert a batch of documents using parameterized queries (executed in write worker thread)."""
        if not batch_docs or not batch_embeddings:
            logger.warning("Empty batch provided, skipping insertion")
            return

        if len(batch_docs) != len(batch_embeddings):
            logger.error("Mismatch between docs (%d) and embeddings (%d)", len(batch_docs), len(batch_embeddings))
            return

        # Prepare data for parameterized insertion
        data_rows = []
        vector_dimension = len(batch_embeddings[0]) if batch_embeddings and batch_embeddings[0] else 768

        for doc, embedding in zip(batch_docs, batch_embeddings):
            # Optimized: minimal checks for common case, fallback for edge cases
            metadata = doc.metadata if doc.metadata else {}

            if not isinstance(metadata, dict):
                metadata = {}

            doc_id = self._safe_doc_id(metadata.get("doc_id", str(uuid.uuid4())))

            # Fast path for JSON serialization
            try:
                metadata_json = json.dumps(metadata, ensure_ascii=True)
            except (TypeError, ValueError):
                logger.warning("JSON serialization failed, using empty dict")
                metadata_json = "{}"

            content = doc.page_content or ""

            # According to ClickZetta docs, vector should be formatted as array string
            # for external systems: '[1.0, 2.0, 3.0]'
            vector_str = '[' + ','.join(map(str, embedding)) + ']'
            data_rows.append([doc_id, content, metadata_json, vector_str])

        # Check if we have any valid data to insert
        if not data_rows:
            logger.warning("No valid documents to insert in batch %d/%d", batch_index // batch_size + 1, total_batches)
            return

        # Use parameterized INSERT with executemany for better performance and security
        # Cast JSON and VECTOR in SQL, pass raw data as parameters
        columns = f"id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}, {Field.VECTOR.value}"
        insert_sql = (
            f"INSERT INTO {self._config.schema_name}.{self._table_name} ({columns}) "
            f"VALUES (?, ?, CAST(? AS JSON), CAST(? AS VECTOR({vector_dimension})))"
        )

        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            try:
                # Set session-level hints for batch insert operations
                # Note: executemany doesn't support hints parameter, so we set them as session variables
                cursor.execute("SET cz.sql.job.fast.mode = true")
                cursor.execute("SET cz.sql.compaction.after.commit = true")
                cursor.execute("SET cz.storage.always.prefetch.internal = true")

                cursor.executemany(insert_sql, data_rows)
                logger.info(
                    f"Inserted batch {batch_index // batch_size + 1}/{total_batches} "
                    f"({len(data_rows)} valid docs using parameterized query with VECTOR({vector_dimension}) cast)"
                )
            except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
                logger.exception("Parameterized SQL execution failed for %d documents: %s", len(data_rows), e)
                logger.exception("SQL template: %s", insert_sql)
                logger.exception("Sample data row: %s", data_rows[0] if data_rows else 'None')
                raise

    def text_exists(self, id: str) -> bool:
        """Check if a document exists by ID."""
        safe_id = self._safe_doc_id(id)
        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            cursor.execute(
                f"SELECT COUNT(*) FROM {self._config.schema_name}.{self._table_name} WHERE id = ?",
                [safe_id]
            )
            result = cursor.fetchone()
            return result[0] > 0 if result else False

    def delete_by_ids(self, ids: list[str]) -> None:
        """Delete documents by IDs."""
        if not ids:
            return

        # Check if table exists before attempting delete
        if not self._table_exists():
            logger.warning("Table %s.%s does not exist, skipping delete", self._config.schema_name, self._table_name)
            return

        # Execute delete through write queue
        self._execute_write(self._delete_by_ids_impl, ids)

    def _delete_by_ids_impl(self, ids: list[str]) -> None:
        """Implementation of delete by IDs (executed in write worker thread)."""
        safe_ids = [self._safe_doc_id(id) for id in ids]
        # Create properly escaped string literals for SQL
        id_list = ",".join(f"'{id}'" for id in safe_ids)
        sql = f"DELETE FROM {self._config.schema_name}.{self._table_name} WHERE id IN ({id_list})"

        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            cursor.execute(sql)

    def delete_by_metadata_field(self, key: str, value: str) -> None:
        """Delete documents by metadata field."""
        # Check if table exists before attempting delete
        if not self._table_exists():
            logger.warning("Table %s.%s does not exist, skipping delete", self._config.schema_name, self._table_name)
            return

        # Execute delete through write queue
        self._execute_write(self._delete_by_metadata_field_impl, key, value)

    def _delete_by_metadata_field_impl(self, key: str, value: str) -> None:
        """Implementation of delete by metadata field (executed in write worker thread)."""
        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            # Using JSON path to filter with parameterized query
            # Note: JSON path requires literal key name, cannot be parameterized
            # Use json_extract_string function for ClickZetta compatibility
            sql = (f"DELETE FROM {self._config.schema_name}.{self._table_name} "
                   f"WHERE json_extract_string({Field.METADATA_KEY.value}, '$.{key}') = ?")
            cursor.execute(sql, [value])

    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
        """Search for documents by vector similarity."""
        top_k = kwargs.get("top_k", 10)
        score_threshold = kwargs.get("score_threshold", 0.0)
        document_ids_filter = kwargs.get("document_ids_filter")

        # Handle filter parameter from canvas (workflow)
        filter_param = kwargs.get("filter", {})

        # Build filter clause
        filter_clauses = []
        if document_ids_filter:
            safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
            doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
            # Use json_extract_string function for ClickZetta compatibility
            filter_clauses.append(
                f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
            )

        # No need for dataset_id filter since each dataset has its own table

        # Add distance threshold based on distance function
        vector_dimension = len(query_vector)
        if self._config.vector_distance_function == "cosine_distance":
            # For cosine distance, smaller is better (0 = identical, 2 = opposite)
            distance_func = "COSINE_DISTANCE"
            if score_threshold > 0:
                query_vector_str = f"CAST('[{self._format_vector_simple(query_vector)}]' AS VECTOR({vector_dimension}))"
                filter_clauses.append(f"{distance_func}({Field.VECTOR.value}, "
                                    f"{query_vector_str}) < {2 - score_threshold}")
        else:
            # For L2 distance, smaller is better
            distance_func = "L2_DISTANCE"
            if score_threshold > 0:
                query_vector_str = f"CAST('[{self._format_vector_simple(query_vector)}]' AS VECTOR({vector_dimension}))"
                filter_clauses.append(f"{distance_func}({Field.VECTOR.value}, "
                                    f"{query_vector_str}) < {score_threshold}")

        where_clause = " AND ".join(filter_clauses) if filter_clauses else "1=1"

        # Execute vector search query
        query_vector_str = f"CAST('[{self._format_vector_simple(query_vector)}]' AS VECTOR({vector_dimension}))"
        search_sql = f"""
        SELECT id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value},
               {distance_func}({Field.VECTOR.value}, {query_vector_str}) AS distance
        FROM {self._config.schema_name}.{self._table_name}
        WHERE {where_clause}
        ORDER BY distance
        LIMIT {top_k}
        """

        documents = []
        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            # Use hints parameter for vector search optimization
            search_hints = {
                'hints': {
                    'sdk.job.timeout': 60,  # Increase timeout for vector search
                    'cz.sql.job.fast.mode': True,
                    'cz.storage.parquet.vector.index.read.memory.cache': True
                }
            }
            cursor.execute(search_sql, parameters=search_hints)
            results = cursor.fetchall()

            for row in results:
                # Parse metadata from JSON string (may be double-encoded)
                try:
                    if row[2]:
                        metadata = json.loads(row[2])

                        # If result is a string, it's double-encoded JSON - parse again
                        if isinstance(metadata, str):
                            metadata = json.loads(metadata)

                        if not isinstance(metadata, dict):
                            metadata = {}
                    else:
                        metadata = {}
                except (json.JSONDecodeError, TypeError) as e:
                    logger.error("JSON parsing failed: %s", e)
                    # Fallback: extract document_id with regex
                    import re
                    doc_id_match = re.search(r'"document_id":\s*"([^"]+)"', str(row[2] or ''))
                    metadata = {"document_id": doc_id_match.group(1)} if doc_id_match else {}

                # Ensure required fields are set
                metadata["doc_id"] = row[0]  # segment id

                # Ensure document_id exists (critical for Dify's format_retrieval_documents)
                if "document_id" not in metadata:
                    metadata["document_id"] = row[0]  # fallback to segment id

                # Add score based on distance
                if self._config.vector_distance_function == "cosine_distance":
                    metadata["score"] = 1 - (row[3] / 2)
                else:
                    metadata["score"] = 1 / (1 + row[3])

                doc = Document(page_content=row[1], metadata=metadata)
                documents.append(doc)

        return documents

    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
        """Search for documents using full-text search with inverted index."""
        if not self._config.enable_inverted_index:
            logger.warning("Full-text search is not enabled. Enable inverted index in config.")
            return []

        top_k = kwargs.get("top_k", 10)
        document_ids_filter = kwargs.get("document_ids_filter")

        # Handle filter parameter from canvas (workflow)
        filter_param = kwargs.get("filter", {})

        # Build filter clause
        filter_clauses = []
        if document_ids_filter:
            safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
            doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
            # Use json_extract_string function for ClickZetta compatibility
            filter_clauses.append(
                f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
            )

        # No need for dataset_id filter since each dataset has its own table

        # Use match_all function for full-text search
        # match_all requires all terms to be present
        # Use simple quote escaping for MATCH_ALL since it needs to be in the WHERE clause
        escaped_query = query.replace("'", "''")
        filter_clauses.append(f"MATCH_ALL({Field.CONTENT_KEY.value}, '{escaped_query}')")

        where_clause = " AND ".join(filter_clauses)

        # Execute full-text search query
        search_sql = f"""
        SELECT id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}
        FROM {self._config.schema_name}.{self._table_name}
        WHERE {where_clause}
        LIMIT {top_k}
        """

        documents = []
        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            try:
                # Use hints parameter for full-text search optimization
                fulltext_hints = {
                    'hints': {
                        'sdk.job.timeout': 30,  # Timeout for full-text search
                        'cz.sql.job.fast.mode': True,
                        'cz.sql.index.prewhere.enabled': True
                    }
                }
                cursor.execute(search_sql, parameters=fulltext_hints)
                results = cursor.fetchall()

                for row in results:
                    # Parse metadata from JSON string (may be double-encoded)
                    try:
                        if row[2]:
                            metadata = json.loads(row[2])

                            # If result is a string, it's double-encoded JSON - parse again
                            if isinstance(metadata, str):
                                metadata = json.loads(metadata)

                            if not isinstance(metadata, dict):
                                metadata = {}
                        else:
                            metadata = {}
                    except (json.JSONDecodeError, TypeError) as e:
                        logger.error("JSON parsing failed: %s", e)
                        # Fallback: extract document_id with regex
                        import re
                        doc_id_match = re.search(r'"document_id":\s*"([^"]+)"', str(row[2] or ''))
                        metadata = {"document_id": doc_id_match.group(1)} if doc_id_match else {}

                    # Ensure required fields are set
                    metadata["doc_id"] = row[0]  # segment id

                    # Ensure document_id exists (critical for Dify's format_retrieval_documents)
                    if "document_id" not in metadata:
                        metadata["document_id"] = row[0]  # fallback to segment id

                    # Add a relevance score for full-text search
                    metadata["score"] = 1.0  # Clickzetta doesn't provide relevance scores
                    doc = Document(page_content=row[1], metadata=metadata)
                    documents.append(doc)
            except (RuntimeError, ValueError, TypeError, ConnectionError) as e:
                logger.exception("Full-text search failed")
                # Fallback to LIKE search if full-text search fails
                return self._search_by_like(query, **kwargs)

        return documents

    def _search_by_like(self, query: str, **kwargs: Any) -> list[Document]:
        """Fallback search using LIKE operator."""
        top_k = kwargs.get("top_k", 10)
        document_ids_filter = kwargs.get("document_ids_filter")

        # Handle filter parameter from canvas (workflow)
        filter_param = kwargs.get("filter", {})

        # Build filter clause
        filter_clauses = []
        if document_ids_filter:
            safe_doc_ids = [str(id).replace("'", "''") for id in document_ids_filter]
            doc_ids_str = ",".join(f"'{id}'" for id in safe_doc_ids)
            # Use json_extract_string function for ClickZetta compatibility
            filter_clauses.append(
                f"json_extract_string({Field.METADATA_KEY.value}, '$.document_id') IN ({doc_ids_str})"
            )

        # No need for dataset_id filter since each dataset has its own table

        # Use simple quote escaping for LIKE clause
        escaped_query = query.replace("'", "''")
        filter_clauses.append(f"{Field.CONTENT_KEY.value} LIKE '%{escaped_query}%'")
        where_clause = " AND ".join(filter_clauses)

        search_sql = f"""
        SELECT id, {Field.CONTENT_KEY.value}, {Field.METADATA_KEY.value}
        FROM {self._config.schema_name}.{self._table_name}
        WHERE {where_clause}
        LIMIT {top_k}
        """

        documents = []
        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            # Use hints parameter for LIKE search optimization
            like_hints = {
                'hints': {
                    'sdk.job.timeout': 20,  # Timeout for LIKE search
                    'cz.sql.job.fast.mode': True
                }
            }
            cursor.execute(search_sql, parameters=like_hints)
            results = cursor.fetchall()

            for row in results:
                # Parse metadata from JSON string (may be double-encoded)
                try:
                    if row[2]:
                        metadata = json.loads(row[2])

                        # If result is a string, it's double-encoded JSON - parse again
                        if isinstance(metadata, str):
                            metadata = json.loads(metadata)

                        if not isinstance(metadata, dict):
                            metadata = {}
                    else:
                        metadata = {}
                except (json.JSONDecodeError, TypeError) as e:
                    logger.error("JSON parsing failed: %s", e)
                    # Fallback: extract document_id with regex
                    import re
                    doc_id_match = re.search(r'"document_id":\s*"([^"]+)"', str(row[2] or ''))
                    metadata = {"document_id": doc_id_match.group(1)} if doc_id_match else {}

                # Ensure required fields are set
                metadata["doc_id"] = row[0]  # segment id

                # Ensure document_id exists (critical for Dify's format_retrieval_documents)
                if "document_id" not in metadata:
                    metadata["document_id"] = row[0]  # fallback to segment id

                metadata["score"] = 0.5  # Lower score for LIKE search
                doc = Document(page_content=row[1], metadata=metadata)
                documents.append(doc)

        return documents

    def delete(self) -> None:
        """Delete the entire collection."""
        connection = self._ensure_connection()
        with connection.cursor() as cursor:
            cursor.execute(f"DROP TABLE IF EXISTS {self._config.schema_name}.{self._table_name}")


    def _format_vector_simple(self, vector: list[float]) -> str:
        """Simple vector formatting for SQL queries."""
        return ','.join(map(str, vector))

    def _safe_doc_id(self, doc_id: str) -> str:
        """Ensure doc_id is safe for SQL and doesn't contain special characters."""
        if not doc_id:
            return str(uuid.uuid4())
        # Remove or replace potentially problematic characters
        safe_id = str(doc_id)
        # Only allow alphanumeric, hyphens, underscores
        safe_id = ''.join(c for c in safe_id if c.isalnum() or c in '-_')
        if not safe_id:  # If all characters were removed
            return str(uuid.uuid4())
        return safe_id[:255]  # Limit length



 class ClickzettaVectorFactory(AbstractVectorFactory):
    """Factory for creating Clickzetta vector instances."""

    def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> BaseVector:
        """Initialize a Clickzetta vector instance."""
        # Get configuration from environment variables or dataset config
        config = ClickzettaConfig(
            username=dify_config.CLICKZETTA_USERNAME or "",
            password=dify_config.CLICKZETTA_PASSWORD or "",
            instance=dify_config.CLICKZETTA_INSTANCE or "",
            service=dify_config.CLICKZETTA_SERVICE or "api.clickzetta.com",
            workspace=dify_config.CLICKZETTA_WORKSPACE or "quick_start",
            vcluster=dify_config.CLICKZETTA_VCLUSTER or "default_ap",
            schema_name=dify_config.CLICKZETTA_SCHEMA or "dify",
            batch_size=dify_config.CLICKZETTA_BATCH_SIZE or 100,
            enable_inverted_index=dify_config.CLICKZETTA_ENABLE_INVERTED_INDEX or True,
            analyzer_type=dify_config.CLICKZETTA_ANALYZER_TYPE or "chinese",
            analyzer_mode=dify_config.CLICKZETTA_ANALYZER_MODE or "smart",
            vector_distance_function=dify_config.CLICKZETTA_VECTOR_DISTANCE_FUNCTION or "cosine_distance",
        )

        # Use dataset collection name as table name
        collection_name = Dataset.gen_collection_name_by_id(dataset.id).lower()

        return ClickzettaVector(collection_name=collection_name, config=config)

--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@@ -172,6 +172,10 @@ class Vector:
                from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory

                return MatrixoneVectorFactory
            case VectorType.CLICKZETTA:
                from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory

                return ClickzettaVectorFactory
            case _:
                raise ValueError(f"Vector store {vector_type} is not supported.")

--- a/api/core/rag/datasource/vdb/vector_type.py
+++ b/api/core/rag/datasource/vdb/vector_type.py
@@ -30,3 +30,4 @@ class VectorType(StrEnum):
    TABLESTORE = "tablestore"
    HUAWEI_CLOUD = "huawei_cloud"
    MATRIXONE = "matrixone"
    CLICKZETTA = "clickzetta"
--- a/api/core/tools/builtin_tool/providers/time/tools/localtime_to_timestamp.py
+++ b/api/core/tools/builtin_tool/providers/time/tools/localtime_to_timestamp.py
@@ -37,12 +37,12 @@ class LocaltimeToTimestampTool(BuiltinTool):
    @staticmethod
    def localtime_to_timestamp(localtime: str, time_format: str, local_tz=None) -> int | None:
        try:
            local_time = datetime.strptime(localtime, time_format)
            if local_tz is None:
                local_tz = datetime.now().astimezone().tzinfo
            if isinstance(local_tz, str):
                localtime = local_time.astimezone()  # type: ignore
            elif isinstance(local_tz, str):
                local_tz = pytz.timezone(local_tz)
            local_time = datetime.strptime(localtime, time_format)
            localtime = local_tz.localize(local_time)  # type: ignore
                localtime = local_tz.localize(local_time)  # type: ignore
            timestamp = int(localtime.timestamp())  # type: ignore
            return timestamp
        except Exception as e:
--- a/api/core/tools/custom_tool/tool.py
+++ b/api/core/tools/custom_tool/tool.py
@@ -1,7 +1,8 @@
 import json
 from collections.abc import Generator
 from dataclasses import dataclass
 from os import getenv
 from typing import Any, Optional
 from typing import Any, Optional, Union
 from urllib.parse import urlencode

 import httpx
@@ -20,6 +21,20 @@ API_TOOL_DEFAULT_TIMEOUT = (
 )


@dataclass
 class ParsedResponse:
    """Represents a parsed HTTP response with type information"""

    content: Union[str, dict]
    is_json: bool

    def to_string(self) -> str:
        """Convert response to string format for credential validation"""
        if isinstance(self.content, dict):
            return json.dumps(self.content, ensure_ascii=False)
        return str(self.content)


 class ApiTool(Tool):
    """
    Api tool
@@ -58,7 +73,9 @@ class ApiTool(Tool):

        response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, parameters)
        # validate response
        return self.validate_and_parse_response(response)
        parsed_response = self.validate_and_parse_response(response)
        # For credential validation, always return as string
        return parsed_response.to_string()

    def tool_provider_type(self) -> ToolProviderType:
        return ToolProviderType.API
@@ -112,23 +129,36 @@ class ApiTool(Tool):

        return headers

    def validate_and_parse_response(self, response: httpx.Response) -> str:
    def validate_and_parse_response(self, response: httpx.Response) -> ParsedResponse:
        """
        validate the response
        validate the response and return parsed content with type information

        :return: ParsedResponse with content and is_json flag
        """
        if isinstance(response, httpx.Response):
            if response.status_code >= 400:
                raise ToolInvokeError(f"Request failed with status code {response.status_code} and {response.text}")
            if not response.content:
                return "Empty response from the tool, please check your parameters and try again."
                return ParsedResponse(
                    "Empty response from the tool, please check your parameters and try again.", False
                )

            # Check content type
            content_type = response.headers.get("content-type", "").lower()
            is_json_content_type = "application/json" in content_type

            # Try to parse as JSON
            try:
                response = response.json()
                try:
                    return json.dumps(response, ensure_ascii=False)
                except Exception:
                    return json.dumps(response)
                json_response = response.json()
                # If content-type indicates JSON, return as JSON object
                if is_json_content_type:
                    return ParsedResponse(json_response, True)
                else:
                    # If content-type doesn't indicate JSON, treat as text regardless of content
                    return ParsedResponse(response.text, False)
            except Exception:
                return response.text
                # Not valid JSON, return as text
                return ParsedResponse(response.text, False)
        else:
            raise ValueError(f"Invalid response type {type(response)}")

@@ -369,7 +399,14 @@ class ApiTool(Tool):
        response = self.do_http_request(self.api_bundle.server_url, self.api_bundle.method, headers, tool_parameters)

        # validate response
        response = self.validate_and_parse_response(response)
        parsed_response = self.validate_and_parse_response(response)

        # assemble invoke message
        yield self.create_text_message(response)
        # assemble invoke message based on response type
        if parsed_response.is_json and isinstance(parsed_response.content, dict):
            yield self.create_json_message(parsed_response.content)
        else:
            # Convert to string if needed and create text message
            text_response = (
                parsed_response.content if isinstance(parsed_response.content, str) else str(parsed_response.content)
            )
            yield self.create_text_message(text_response)
--- a/api/core/workflow/nodes/http_request/executor.py
+++ b/api/core/workflow/nodes/http_request/executor.py
@@ -91,7 +91,7 @@ class Executor:
        self.auth = node_data.authorization
        self.timeout = timeout
        self.ssl_verify = node_data.ssl_verify
        self.params = []
        self.params = None
        self.headers = {}
        self.content = None
        self.files = None
@@ -139,7 +139,8 @@ class Executor:
                (self.variable_pool.convert_template(key).text, self.variable_pool.convert_template(value_str).text)
            )

        self.params = result
        if result:
            self.params = result

    def _init_headers(self):
        """
--- a/api/extensions/ext_storage.py
+++ b/api/extensions/ext_storage.py
@@ -69,6 +69,19 @@ class Storage:
                from extensions.storage.supabase_storage import SupabaseStorage

                return SupabaseStorage
            case StorageType.CLICKZETTA_VOLUME:
                from extensions.storage.clickzetta_volume.clickzetta_volume_storage import (
                    ClickZettaVolumeConfig,
                    ClickZettaVolumeStorage,
                )

                def create_clickzetta_volume_storage():
                    # ClickZettaVolumeConfig will automatically read from environment variables
                    # and fallback to CLICKZETTA_* config if CLICKZETTA_VOLUME_* is not set
                    volume_config = ClickZettaVolumeConfig()
                    return ClickZettaVolumeStorage(volume_config)

                return create_clickzetta_volume_storage
            case _:
                raise ValueError(f"unsupported storage type {storage_type}")

--- a/api/extensions/storage/clickzetta_volume/__init__.py
+++ b/api/extensions/storage/clickzetta_volume/__init__.py
@@ -0,0 +1,5 @@
 """ClickZetta Volume storage implementation."""

 from .clickzetta_volume_storage import ClickZettaVolumeStorage

 __all__ = ["ClickZettaVolumeStorage"]
--- a/api/extensions/storage/clickzetta_volume/clickzetta_volume_storage.py
+++ b/api/extensions/storage/clickzetta_volume/clickzetta_volume_storage.py
@@ -0,0 +1,530 @@
 """ClickZetta Volume Storage Implementation

 This module provides storage backend using ClickZetta Volume functionality.
 Supports Table Volume, User Volume, and External Volume types.
 """

 import logging
 import os
 import tempfile
 from collections.abc import Generator
 from io import BytesIO
 from pathlib import Path
 from typing import Optional

 import clickzetta  # type: ignore[import]
 from pydantic import BaseModel, model_validator

 from extensions.storage.base_storage import BaseStorage

 from .volume_permissions import VolumePermissionManager, check_volume_permission

 logger = logging.getLogger(__name__)


 class ClickZettaVolumeConfig(BaseModel):
    """Configuration for ClickZetta Volume storage."""

    username: str = ""
    password: str = ""
    instance: str = ""
    service: str = "api.clickzetta.com"
    workspace: str = "quick_start"
    vcluster: str = "default_ap"
    schema_name: str = "dify"
    volume_type: str = "table"  # table|user|external
    volume_name: Optional[str] = None  # For external volumes
    table_prefix: str = "dataset_"  # Prefix for table volume names
    dify_prefix: str = "dify_km"  # Directory prefix for User Volume
    permission_check: bool = True  # Enable/disable permission checking

    @model_validator(mode="before")
    @classmethod
    def validate_config(cls, values: dict) -> dict:
        """Validate the configuration values.

        This method will first try to use CLICKZETTA_VOLUME_* environment variables,
        then fall back to CLICKZETTA_* environment variables (for vector DB config).
        """
        import os

        # Helper function to get environment variable with fallback
        def get_env_with_fallback(volume_key: str, fallback_key: str, default: str | None = None) -> str:
            # First try CLICKZETTA_VOLUME_* specific config
            volume_value = values.get(volume_key.lower().replace("clickzetta_volume_", ""))
            if volume_value:
                return str(volume_value)

            # Then try environment variables
            volume_env = os.getenv(volume_key)
            if volume_env:
                return volume_env

            # Fall back to existing CLICKZETTA_* config
            fallback_env = os.getenv(fallback_key)
            if fallback_env:
                return fallback_env

            return default or ""

        # Apply environment variables with fallback to existing CLICKZETTA_* config
        values.setdefault("username", get_env_with_fallback("CLICKZETTA_VOLUME_USERNAME", "CLICKZETTA_USERNAME"))
        values.setdefault("password", get_env_with_fallback("CLICKZETTA_VOLUME_PASSWORD", "CLICKZETTA_PASSWORD"))
        values.setdefault("instance", get_env_with_fallback("CLICKZETTA_VOLUME_INSTANCE", "CLICKZETTA_INSTANCE"))
        values.setdefault(
            "service", get_env_with_fallback("CLICKZETTA_VOLUME_SERVICE", "CLICKZETTA_SERVICE", "api.clickzetta.com")
        )
        values.setdefault(
            "workspace", get_env_with_fallback("CLICKZETTA_VOLUME_WORKSPACE", "CLICKZETTA_WORKSPACE", "quick_start")
        )
        values.setdefault(
            "vcluster", get_env_with_fallback("CLICKZETTA_VOLUME_VCLUSTER", "CLICKZETTA_VCLUSTER", "default_ap")
        )
        values.setdefault("schema_name", get_env_with_fallback("CLICKZETTA_VOLUME_SCHEMA", "CLICKZETTA_SCHEMA", "dify"))

        # Volume-specific configurations (no fallback to vector DB config)
        values.setdefault("volume_type", os.getenv("CLICKZETTA_VOLUME_TYPE", "table"))
        values.setdefault("volume_name", os.getenv("CLICKZETTA_VOLUME_NAME"))
        values.setdefault("table_prefix", os.getenv("CLICKZETTA_VOLUME_TABLE_PREFIX", "dataset_"))
        values.setdefault("dify_prefix", os.getenv("CLICKZETTA_VOLUME_DIFY_PREFIX", "dify_km"))
        # 暂时禁用权限检查功能，直接设置为false
        values.setdefault("permission_check", False)

        # Validate required fields
        if not values.get("username"):
            raise ValueError("CLICKZETTA_VOLUME_USERNAME or CLICKZETTA_USERNAME is required")
        if not values.get("password"):
            raise ValueError("CLICKZETTA_VOLUME_PASSWORD or CLICKZETTA_PASSWORD is required")
        if not values.get("instance"):
            raise ValueError("CLICKZETTA_VOLUME_INSTANCE or CLICKZETTA_INSTANCE is required")

        # Validate volume type
        volume_type = values["volume_type"]
        if volume_type not in ["table", "user", "external"]:
            raise ValueError("CLICKZETTA_VOLUME_TYPE must be one of: table, user, external")

        if volume_type == "external" and not values.get("volume_name"):
            raise ValueError("CLICKZETTA_VOLUME_NAME is required for external volume type")

        return values


 class ClickZettaVolumeStorage(BaseStorage):
    """ClickZetta Volume storage implementation."""

    def __init__(self, config: ClickZettaVolumeConfig):
        """Initialize ClickZetta Volume storage.

        Args:
            config: ClickZetta Volume configuration
        """
        self._config = config
        self._connection = None
        self._permission_manager: VolumePermissionManager | None = None
        self._init_connection()
        self._init_permission_manager()

        logger.info("ClickZetta Volume storage initialized with type: %s", config.volume_type)

    def _init_connection(self):
        """Initialize ClickZetta connection."""
        try:
            self._connection = clickzetta.connect(
                username=self._config.username,
                password=self._config.password,
                instance=self._config.instance,
                service=self._config.service,
                workspace=self._config.workspace,
                vcluster=self._config.vcluster,
                schema=self._config.schema_name,
            )
            logger.debug("ClickZetta connection established")
        except Exception as e:
            logger.exception("Failed to connect to ClickZetta")
            raise

    def _init_permission_manager(self):
        """Initialize permission manager."""
        try:
            self._permission_manager = VolumePermissionManager(
                self._connection, self._config.volume_type, self._config.volume_name
            )
            logger.debug("Permission manager initialized")
        except Exception as e:
            logger.exception("Failed to initialize permission manager")
            raise

    def _get_volume_path(self, filename: str, dataset_id: Optional[str] = None) -> str:
        """Get the appropriate volume path based on volume type."""
        if self._config.volume_type == "user":
            # Add dify prefix for User Volume to organize files
            return f"{self._config.dify_prefix}/{filename}"
        elif self._config.volume_type == "table":
            # Check if this should use User Volume (special directories)
            if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
                # Use User Volume with dify prefix for special directories
                return f"{self._config.dify_prefix}/{filename}"

            if dataset_id:
                return f"{self._config.table_prefix}{dataset_id}/{filename}"
            else:
                # Extract dataset_id from filename if not provided
                # Format: dataset_id/filename
                if "/" in filename:
                    return filename
                else:
                    raise ValueError("dataset_id is required for table volume or filename must include dataset_id/")
        elif self._config.volume_type == "external":
            return filename
        else:
            raise ValueError(f"Unsupported volume type: {self._config.volume_type}")

    def _get_volume_sql_prefix(self, dataset_id: Optional[str] = None) -> str:
        """Get SQL prefix for volume operations."""
        if self._config.volume_type == "user":
            return "USER VOLUME"
        elif self._config.volume_type == "table":
            # For Dify's current file storage pattern, most files are stored in
            # paths like "upload_files/tenant_id/uuid.ext", "tools/tenant_id/uuid.ext"
            # These should use USER VOLUME for better compatibility
            if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
                return "USER VOLUME"

            # Only use TABLE VOLUME for actual dataset-specific paths
            # like "dataset_12345/file.pdf" or paths with dataset_ prefix
            if dataset_id:
                table_name = f"{self._config.table_prefix}{dataset_id}"
            else:
                # Default table name for generic operations
                table_name = "default_dataset"
            return f"TABLE VOLUME {table_name}"
        elif self._config.volume_type == "external":
            return f"VOLUME {self._config.volume_name}"
        else:
            raise ValueError(f"Unsupported volume type: {self._config.volume_type}")

    def _execute_sql(self, sql: str, fetch: bool = False):
        """Execute SQL command."""
        try:
            if self._connection is None:
                raise RuntimeError("Connection not initialized")
            with self._connection.cursor() as cursor:
                cursor.execute(sql)
                if fetch:
                    return cursor.fetchall()
                return None
        except Exception as e:
            logger.exception("SQL execution failed: %s", sql)
            raise

    def _ensure_table_volume_exists(self, dataset_id: str) -> None:
        """Ensure table volume exists for the given dataset_id."""
        if self._config.volume_type != "table" or not dataset_id:
            return

        # Skip for upload_files and other special directories that use USER VOLUME
        if dataset_id in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
            return

        table_name = f"{self._config.table_prefix}{dataset_id}"

        try:
            # Check if table exists
            check_sql = f"SHOW TABLES LIKE '{table_name}'"
            result = self._execute_sql(check_sql, fetch=True)

            if not result:
                # Create table with volume
                create_sql = f"""
                CREATE TABLE {table_name} (
                    id INT PRIMARY KEY AUTO_INCREMENT,
                    filename VARCHAR(255) NOT NULL,
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                    INDEX idx_filename (filename)
                ) WITH VOLUME
                """
                self._execute_sql(create_sql)
                logger.info("Created table volume: %s", table_name)

        except Exception as e:
            logger.warning("Failed to create table volume %s: %s", table_name, e)
            # Don't raise exception, let the operation continue
            # The table might exist but not be visible due to permissions

    def save(self, filename: str, data: bytes) -> None:
        """Save data to ClickZetta Volume.

        Args:
            filename: File path in volume
            data: File content as bytes
        """
        # Extract dataset_id from filename if present
        dataset_id = None
        if "/" in filename and self._config.volume_type == "table":
            parts = filename.split("/", 1)
            if parts[0].startswith(self._config.table_prefix):
                dataset_id = parts[0][len(self._config.table_prefix) :]
                filename = parts[1]
            else:
                dataset_id = parts[0]
                filename = parts[1]

        # Ensure table volume exists (for table volumes)
        if dataset_id:
            self._ensure_table_volume_exists(dataset_id)

        # Check permissions (if enabled)
        if self._config.permission_check:
            # Skip permission check for special directories that use USER VOLUME
            if dataset_id not in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
                if self._permission_manager is not None:
                    check_volume_permission(self._permission_manager, "save", dataset_id)

        # Write data to temporary file
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            temp_file.write(data)
            temp_file_path = temp_file.name

        try:
            # Upload to volume
            volume_prefix = self._get_volume_sql_prefix(dataset_id)

            # Get the actual volume path (may include dify_km prefix)
            volume_path = self._get_volume_path(filename, dataset_id)
            actual_filename = volume_path.split("/")[-1] if "/" in volume_path else volume_path

            # For User Volume, use the full path with dify_km prefix
            if volume_prefix == "USER VOLUME":
                sql = f"PUT '{temp_file_path}' TO {volume_prefix} FILE '{volume_path}'"
            else:
                sql = f"PUT '{temp_file_path}' TO {volume_prefix} FILE '{filename}'"

            self._execute_sql(sql)
            logger.debug("File %s saved to ClickZetta Volume at path %s", filename, volume_path)
        finally:
            # Clean up temporary file
            Path(temp_file_path).unlink(missing_ok=True)

    def load_once(self, filename: str) -> bytes:
        """Load file content from ClickZetta Volume.

        Args:
            filename: File path in volume

        Returns:
            File content as bytes
        """
        # Extract dataset_id from filename if present
        dataset_id = None
        if "/" in filename and self._config.volume_type == "table":
            parts = filename.split("/", 1)
            if parts[0].startswith(self._config.table_prefix):
                dataset_id = parts[0][len(self._config.table_prefix) :]
                filename = parts[1]
            else:
                dataset_id = parts[0]
                filename = parts[1]

        # Check permissions (if enabled)
        if self._config.permission_check:
            # Skip permission check for special directories that use USER VOLUME
            if dataset_id not in ["upload_files", "temp", "cache", "tools", "website_files", "privkeys"]:
                if self._permission_manager is not None:
                    check_volume_permission(self._permission_manager, "load_once", dataset_id)

        # Download to temporary directory
        with tempfile.TemporaryDirectory() as temp_dir:
            volume_prefix = self._get_volume_sql_prefix(dataset_id)

            # Get the actual volume path (may include dify_km prefix)
            volume_path = self._get_volume_path(filename, dataset_id)

            # For User Volume, use the full path with dify_km prefix
            if volume_prefix == "USER VOLUME":
                sql = f"GET {volume_prefix} FILE '{volume_path}' TO '{temp_dir}'"
            else:
                sql = f"GET {volume_prefix} FILE '{filename}' TO '{temp_dir}'"

            self._execute_sql(sql)

            # Find the downloaded file (may be in subdirectories)
            downloaded_file = None
            for root, dirs, files in os.walk(temp_dir):
                for file in files:
                    if file == filename or file == os.path.basename(filename):
                        downloaded_file = Path(root) / file
                        break
                if downloaded_file:
                    break

            if not downloaded_file or not downloaded_file.exists():
                raise FileNotFoundError(f"Downloaded file not found: {filename}")

            content = downloaded_file.read_bytes()

            logger.debug("File %s loaded from ClickZetta Volume", filename)
            return content

    def load_stream(self, filename: str) -> Generator:
        """Load file as stream from ClickZetta Volume.

        Args:
            filename: File path in volume

        Yields:
            File content chunks
        """
        content = self.load_once(filename)
        batch_size = 4096
        stream = BytesIO(content)

        while chunk := stream.read(batch_size):
            yield chunk

        logger.debug("File %s loaded as stream from ClickZetta Volume", filename)

    def download(self, filename: str, target_filepath: str):
        """Download file from ClickZetta Volume to local path.

        Args:
            filename: File path in volume
            target_filepath: Local target file path
        """
        content = self.load_once(filename)

        with Path(target_filepath).open("wb") as f:
            f.write(content)

        logger.debug("File %s downloaded from ClickZetta Volume to %s", filename, target_filepath)

    def exists(self, filename: str) -> bool:
        """Check if file exists in ClickZetta Volume.

        Args:
            filename: File path in volume

        Returns:
            True if file exists, False otherwise
        """
        try:
            # Extract dataset_id from filename if present
            dataset_id = None
            if "/" in filename and self._config.volume_type == "table":
                parts = filename.split("/", 1)
                if parts[0].startswith(self._config.table_prefix):
                    dataset_id = parts[0][len(self._config.table_prefix) :]
                    filename = parts[1]
                else:
                    dataset_id = parts[0]
                    filename = parts[1]

            volume_prefix = self._get_volume_sql_prefix(dataset_id)

            # Get the actual volume path (may include dify_km prefix)
            volume_path = self._get_volume_path(filename, dataset_id)

            # For User Volume, use the full path with dify_km prefix
            if volume_prefix == "USER VOLUME":
                sql = f"LIST {volume_prefix} REGEXP = '^{volume_path}$'"
            else:
                sql = f"LIST {volume_prefix} REGEXP = '^{filename}$'"

            rows = self._execute_sql(sql, fetch=True)

            exists = len(rows) > 0
            logger.debug("File %s exists check: %s", filename, exists)
            return exists
        except Exception as e:
            logger.warning("Error checking file existence for %s: %s", filename, e)
            return False

    def delete(self, filename: str):
        """Delete file from ClickZetta Volume.

        Args:
            filename: File path in volume
        """
        if not self.exists(filename):
            logger.debug("File %s not found, skip delete", filename)
            return

        # Extract dataset_id from filename if present
        dataset_id = None
        if "/" in filename and self._config.volume_type == "table":
            parts = filename.split("/", 1)
            if parts[0].startswith(self._config.table_prefix):
                dataset_id = parts[0][len(self._config.table_prefix) :]
                filename = parts[1]
            else:
                dataset_id = parts[0]
                filename = parts[1]

        volume_prefix = self._get_volume_sql_prefix(dataset_id)

        # Get the actual volume path (may include dify_km prefix)
        volume_path = self._get_volume_path(filename, dataset_id)

        # For User Volume, use the full path with dify_km prefix
        if volume_prefix == "USER VOLUME":
            sql = f"REMOVE {volume_prefix} FILE '{volume_path}'"
        else:
            sql = f"REMOVE {volume_prefix} FILE '{filename}'"

        self._execute_sql(sql)

        logger.debug("File %s deleted from ClickZetta Volume", filename)

    def scan(self, path: str, files: bool = True, directories: bool = False) -> list[str]:
        """Scan files and directories in ClickZetta Volume.

        Args:
            path: Path to scan (dataset_id for table volumes)
            files: Include files in results
            directories: Include directories in results

        Returns:
            List of file/directory paths
        """
        try:
            # For table volumes, path is treated as dataset_id
            dataset_id = None
            if self._config.volume_type == "table":
                dataset_id = path
                path = ""  # Root of the table volume

            volume_prefix = self._get_volume_sql_prefix(dataset_id)

            # For User Volume, add dify prefix to path
            if volume_prefix == "USER VOLUME":
                if path:
                    scan_path = f"{self._config.dify_prefix}/{path}"
                    sql = f"LIST {volume_prefix} SUBDIRECTORY '{scan_path}'"
                else:
                    sql = f"LIST {volume_prefix} SUBDIRECTORY '{self._config.dify_prefix}'"
            else:
                if path:
                    sql = f"LIST {volume_prefix} SUBDIRECTORY '{path}'"
                else:
                    sql = f"LIST {volume_prefix}"

            rows = self._execute_sql(sql, fetch=True)

            result = []
            for row in rows:
                file_path = row[0]  # relative_path column

                # For User Volume, remove dify prefix from results
                dify_prefix_with_slash = f"{self._config.dify_prefix}/"
                if volume_prefix == "USER VOLUME" and file_path.startswith(dify_prefix_with_slash):
                    file_path = file_path[len(dify_prefix_with_slash) :]  # Remove prefix

                if files and not file_path.endswith("/") or directories and file_path.endswith("/"):
                    result.append(file_path)

            logger.debug("Scanned %d items in path %s", len(result), path)
            return result

        except Exception as e:
            logger.exception("Error scanning path %s", path)
            return []
--- a/api/extensions/storage/clickzetta_volume/file_lifecycle.py
+++ b/api/extensions/storage/clickzetta_volume/file_lifecycle.py
@@ -0,0 +1,516 @@
 """ClickZetta Volume文件生命周期管理

 该模块提供文件版本控制、自动清理、备份和恢复等生命周期管理功能。
 支持知识库文件的完整生命周期管理。
 """

 import json
 import logging
 from dataclasses import asdict, dataclass
 from datetime import datetime, timedelta
 from enum import Enum
 from typing import Any, Optional

 logger = logging.getLogger(__name__)


 class FileStatus(Enum):
    """文件状态枚举"""

    ACTIVE = "active"  # 活跃状态
    ARCHIVED = "archived"  # 已归档
    DELETED = "deleted"  # 已删除（软删除）
    BACKUP = "backup"  # 备份文件


@dataclass
 class FileMetadata:
    """文件元数据"""

    filename: str
    size: int | None
    created_at: datetime
    modified_at: datetime
    version: int | None
    status: FileStatus
    checksum: Optional[str] = None
    tags: Optional[dict[str, str]] = None
    parent_version: Optional[int] = None

    def to_dict(self) -> dict:
        """转换为字典格式"""
        data = asdict(self)
        data["created_at"] = self.created_at.isoformat()
        data["modified_at"] = self.modified_at.isoformat()
        data["status"] = self.status.value
        return data

    @classmethod
    def from_dict(cls, data: dict) -> "FileMetadata":
        """从字典创建实例"""
        data = data.copy()
        data["created_at"] = datetime.fromisoformat(data["created_at"])
        data["modified_at"] = datetime.fromisoformat(data["modified_at"])
        data["status"] = FileStatus(data["status"])
        return cls(**data)


 class FileLifecycleManager:
    """文件生命周期管理器"""

    def __init__(self, storage, dataset_id: Optional[str] = None):
        """初始化生命周期管理器

        Args:
            storage: ClickZetta Volume存储实例
            dataset_id: 数据集ID（用于Table Volume）
        """
        self._storage = storage
        self._dataset_id = dataset_id
        self._metadata_file = ".dify_file_metadata.json"
        self._version_prefix = ".versions/"
        self._backup_prefix = ".backups/"
        self._deleted_prefix = ".deleted/"

        # 获取权限管理器（如果存在）
        self._permission_manager: Optional[Any] = getattr(storage, "_permission_manager", None)

    def save_with_lifecycle(self, filename: str, data: bytes, tags: Optional[dict[str, str]] = None) -> FileMetadata:
        """保存文件并管理生命周期

        Args:
            filename: 文件名
            data: 文件内容
            tags: 文件标签

        Returns:
            文件元数据
        """
        # 权限检查
        if not self._check_permission(filename, "save"):
            from .volume_permissions import VolumePermissionError

            raise VolumePermissionError(
                f"Permission denied for lifecycle save operation on file: {filename}",
                operation="save",
                volume_type=getattr(self._storage, "_config", {}).get("volume_type", "unknown"),
                dataset_id=self._dataset_id,
            )

        try:
            # 1. 检查是否存在旧版本
            metadata_dict = self._load_metadata()
            current_metadata = metadata_dict.get(filename)

            # 2. 如果存在旧版本，创建版本备份
            if current_metadata:
                self._create_version_backup(filename, current_metadata)

            # 3. 计算文件信息
            now = datetime.now()
            checksum = self._calculate_checksum(data)
            new_version = (current_metadata["version"] + 1) if current_metadata else 1

            # 4. 保存新文件
            self._storage.save(filename, data)

            # 5. 创建元数据
            created_at = now
            parent_version = None

            if current_metadata:
                # 如果created_at是字符串，转换为datetime
                if isinstance(current_metadata["created_at"], str):
                    created_at = datetime.fromisoformat(current_metadata["created_at"])
                else:
                    created_at = current_metadata["created_at"]
                parent_version = current_metadata["version"]

            file_metadata = FileMetadata(
                filename=filename,
                size=len(data),
                created_at=created_at,
                modified_at=now,
                version=new_version,
                status=FileStatus.ACTIVE,
                checksum=checksum,
                tags=tags or {},
                parent_version=parent_version,
            )

            # 6. 更新元数据
            metadata_dict[filename] = file_metadata.to_dict()
            self._save_metadata(metadata_dict)

            logger.info("File %s saved with lifecycle management, version %s", filename, new_version)
            return file_metadata

        except Exception as e:
            logger.exception("Failed to save file with lifecycle")
            raise

    def get_file_metadata(self, filename: str) -> Optional[FileMetadata]:
        """获取文件元数据

        Args:
            filename: 文件名

        Returns:
            文件元数据，如果不存在返回None
        """
        try:
            metadata_dict = self._load_metadata()
            if filename in metadata_dict:
                return FileMetadata.from_dict(metadata_dict[filename])
            return None
        except Exception as e:
            logger.exception("Failed to get file metadata for %s", filename)
            return None

    def list_file_versions(self, filename: str) -> list[FileMetadata]:
        """列出文件的所有版本

        Args:
            filename: 文件名

        Returns:
            文件版本列表，按版本号排序
        """
        try:
            versions = []

            # 获取当前版本
            current_metadata = self.get_file_metadata(filename)
            if current_metadata:
                versions.append(current_metadata)

            # 获取历史版本
            version_pattern = f"{self._version_prefix}{filename}.v*"
            try:
                version_files = self._storage.scan(self._dataset_id or "", files=True)
                for file_path in version_files:
                    if file_path.startswith(f"{self._version_prefix}{filename}.v"):
                        # 解析版本号
                        version_str = file_path.split(".v")[-1].split(".")[0]
                        try:
                            version_num = int(version_str)
                            # 这里简化处理，实际应该从版本文件中读取元数据
                            # 暂时创建基本的元数据信息
                        except ValueError:
                            continue
            except:
                # 如果无法扫描版本文件，只返回当前版本
                pass

            return sorted(versions, key=lambda x: x.version or 0, reverse=True)

        except Exception as e:
            logger.exception("Failed to list file versions for %s", filename)
            return []

    def restore_version(self, filename: str, version: int) -> bool:
        """恢复文件到指定版本

        Args:
            filename: 文件名
            version: 要恢复的版本号

        Returns:
            恢复是否成功
        """
        try:
            version_filename = f"{self._version_prefix}{filename}.v{version}"

            # 检查版本文件是否存在
            if not self._storage.exists(version_filename):
                logger.warning("Version %s of %s not found", version, filename)
                return False

            # 读取版本文件内容
            version_data = self._storage.load_once(version_filename)

            # 保存当前版本为备份
            current_metadata = self.get_file_metadata(filename)
            if current_metadata:
                self._create_version_backup(filename, current_metadata.to_dict())

            # 恢复文件
            self.save_with_lifecycle(filename, version_data, {"restored_from": str(version)})
            return True

        except Exception as e:
            logger.exception("Failed to restore %s to version %s", filename, version)
            return False

    def archive_file(self, filename: str) -> bool:
        """归档文件

        Args:
            filename: 文件名

        Returns:
            归档是否成功
        """
        # 权限检查
        if not self._check_permission(filename, "archive"):
            logger.warning("Permission denied for archive operation on file: %s", filename)
            return False

        try:
            # 更新文件状态为归档
            metadata_dict = self._load_metadata()
            if filename not in metadata_dict:
                logger.warning("File %s not found in metadata", filename)
                return False

            metadata_dict[filename]["status"] = FileStatus.ARCHIVED.value
            metadata_dict[filename]["modified_at"] = datetime.now().isoformat()

            self._save_metadata(metadata_dict)

            logger.info("File %s archived successfully", filename)
            return True

        except Exception as e:
            logger.exception("Failed to archive file %s", filename)
            return False

    def soft_delete_file(self, filename: str) -> bool:
        """软删除文件（移动到删除目录）

        Args:
            filename: 文件名

        Returns:
            删除是否成功
        """
        # 权限检查
        if not self._check_permission(filename, "delete"):
            logger.warning("Permission denied for soft delete operation on file: %s", filename)
            return False

        try:
            # 检查文件是否存在
            if not self._storage.exists(filename):
                logger.warning("File %s not found", filename)
                return False

            # 读取文件内容
            file_data = self._storage.load_once(filename)

            # 移动到删除目录
            deleted_filename = f"{self._deleted_prefix}{filename}.{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            self._storage.save(deleted_filename, file_data)

            # 删除原文件
            self._storage.delete(filename)

            # 更新元数据
            metadata_dict = self._load_metadata()
            if filename in metadata_dict:
                metadata_dict[filename]["status"] = FileStatus.DELETED.value
                metadata_dict[filename]["modified_at"] = datetime.now().isoformat()
                self._save_metadata(metadata_dict)

            logger.info("File %s soft deleted successfully", filename)
            return True

        except Exception as e:
            logger.exception("Failed to soft delete file %s", filename)
            return False

    def cleanup_old_versions(self, max_versions: int = 5, max_age_days: int = 30) -> int:
        """清理旧版本文件

        Args:
            max_versions: 保留的最大版本数
            max_age_days: 版本文件的最大保留天数

        Returns:
            清理的文件数量
        """
        try:
            cleaned_count = 0
            cutoff_date = datetime.now() - timedelta(days=max_age_days)

            # 获取所有版本文件
            try:
                all_files = self._storage.scan(self._dataset_id or "", files=True)
                version_files = [f for f in all_files if f.startswith(self._version_prefix)]

                # 按文件分组
                file_versions: dict[str, list[tuple[int, str]]] = {}
                for version_file in version_files:
                    # 解析文件名和版本
                    parts = version_file[len(self._version_prefix) :].split(".v")
                    if len(parts) >= 2:
                        base_filename = parts[0]
                        version_part = parts[1].split(".")[0]
                        try:
                            version_num = int(version_part)
                            if base_filename not in file_versions:
                                file_versions[base_filename] = []
                            file_versions[base_filename].append((version_num, version_file))
                        except ValueError:
                            continue

                # 清理每个文件的旧版本
                for base_filename, versions in file_versions.items():
                    # 按版本号排序
                    versions.sort(key=lambda x: x[0], reverse=True)

                    # 保留最新的max_versions个版本，删除其余的
                    if len(versions) > max_versions:
                        to_delete = versions[max_versions:]
                        for version_num, version_file in to_delete:
                            self._storage.delete(version_file)
                            cleaned_count += 1
                            logger.debug("Cleaned old version: %s", version_file)

                logger.info("Cleaned %d old version files", cleaned_count)

            except Exception as e:
                logger.warning("Could not scan for version files: %s", e)

            return cleaned_count

        except Exception as e:
            logger.exception("Failed to cleanup old versions")
            return 0

    def get_storage_statistics(self) -> dict[str, Any]:
        """获取存储统计信息

        Returns:
            存储统计字典
        """
        try:
            metadata_dict = self._load_metadata()

            stats: dict[str, Any] = {
                "total_files": len(metadata_dict),
                "active_files": 0,
                "archived_files": 0,
                "deleted_files": 0,
                "total_size": 0,
                "versions_count": 0,
                "oldest_file": None,
                "newest_file": None,
            }

            oldest_date = None
            newest_date = None

            for filename, metadata in metadata_dict.items():
                file_meta = FileMetadata.from_dict(metadata)

                # 统计文件状态
                if file_meta.status == FileStatus.ACTIVE:
                    stats["active_files"] = (stats["active_files"] or 0) + 1
                elif file_meta.status == FileStatus.ARCHIVED:
                    stats["archived_files"] = (stats["archived_files"] or 0) + 1
                elif file_meta.status == FileStatus.DELETED:
                    stats["deleted_files"] = (stats["deleted_files"] or 0) + 1

                # 统计大小
                stats["total_size"] = (stats["total_size"] or 0) + (file_meta.size or 0)

                # 统计版本
                stats["versions_count"] = (stats["versions_count"] or 0) + (file_meta.version or 0)

                # 找出最新和最旧的文件
                if oldest_date is None or file_meta.created_at < oldest_date:
                    oldest_date = file_meta.created_at
                    stats["oldest_file"] = filename

                if newest_date is None or file_meta.modified_at > newest_date:
                    newest_date = file_meta.modified_at
                    stats["newest_file"] = filename

            return stats

        except Exception as e:
            logger.exception("Failed to get storage statistics")
            return {}

    def _create_version_backup(self, filename: str, metadata: dict):
        """创建版本备份"""
        try:
            # 读取当前文件内容
            current_data = self._storage.load_once(filename)

            # 保存为版本文件
            version_filename = f"{self._version_prefix}{filename}.v{metadata['version']}"
            self._storage.save(version_filename, current_data)

            logger.debug("Created version backup: %s", version_filename)

        except Exception as e:
            logger.warning("Failed to create version backup for %s: %s", filename, e)

    def _load_metadata(self) -> dict[str, Any]:
        """加载元数据文件"""
        try:
            if self._storage.exists(self._metadata_file):
                metadata_content = self._storage.load_once(self._metadata_file)
                result = json.loads(metadata_content.decode("utf-8"))
                return dict(result) if result else {}
            else:
                return {}
        except Exception as e:
            logger.warning("Failed to load metadata: %s", e)
            return {}

    def _save_metadata(self, metadata_dict: dict):
        """保存元数据文件"""
        try:
            metadata_content = json.dumps(metadata_dict, indent=2, ensure_ascii=False)
            self._storage.save(self._metadata_file, metadata_content.encode("utf-8"))
            logger.debug("Metadata saved successfully")
        except Exception as e:
            logger.exception("Failed to save metadata")
            raise

    def _calculate_checksum(self, data: bytes) -> str:
        """计算文件校验和"""
        import hashlib

        return hashlib.md5(data).hexdigest()

    def _check_permission(self, filename: str, operation: str) -> bool:
        """检查文件操作权限

        Args:
            filename: 文件名
            operation: 操作类型

        Returns:
            True if permission granted, False otherwise
        """
        # 如果没有权限管理器，默认允许
        if not self._permission_manager:
            return True

        try:
            # 根据操作类型映射到权限
            operation_mapping = {
                "save": "save",
                "load": "load_once",
                "delete": "delete",
                "archive": "delete",  # 归档需要删除权限
                "restore": "save",  # 恢复需要写权限
                "cleanup": "delete",  # 清理需要删除权限
                "read": "load_once",
                "write": "save",
            }

            mapped_operation = operation_mapping.get(operation, operation)

            # 检查权限
            result = self._permission_manager.validate_operation(mapped_operation, self._dataset_id)
            return bool(result)

        except Exception as e:
            logger.exception("Permission check failed for %s operation %s", filename, operation)
            # 安全默认：权限检查失败时拒绝访问
            return False
--- a/api/extensions/storage/clickzetta_volume/volume_permissions.py
+++ b/api/extensions/storage/clickzetta_volume/volume_permissions.py
@@ -0,0 +1,646 @@
 """ClickZetta Volume权限管理机制

 该模块提供Volume权限检查、验证和管理功能。
 根据ClickZetta的权限模型，不同Volume类型有不同的权限要求。
 """

 import logging
 from enum import Enum
 from typing import Optional

 logger = logging.getLogger(__name__)


 class VolumePermission(Enum):
    """Volume权限类型枚举"""

    READ = "SELECT"  # 对应ClickZetta的SELECT权限
    WRITE = "INSERT,UPDATE,DELETE"  # 对应ClickZetta的写权限
    LIST = "SELECT"  # 列出文件需要SELECT权限
    DELETE = "INSERT,UPDATE,DELETE"  # 删除文件需要写权限
    USAGE = "USAGE"  # External Volume需要的基本权限


 class VolumePermissionManager:
    """Volume权限管理器"""

    def __init__(self, connection_or_config, volume_type: str | None = None, volume_name: Optional[str] = None):
        """初始化权限管理器

        Args:
            connection_or_config: ClickZetta连接对象或配置字典
            volume_type: Volume类型 (user|table|external)
            volume_name: Volume名称 (用于external volume)
        """
        # 支持两种初始化方式：连接对象或配置字典
        if isinstance(connection_or_config, dict):
            # 从配置字典创建连接
            import clickzetta  # type: ignore[import-untyped]

            config = connection_or_config
            self._connection = clickzetta.connect(
                username=config.get("username"),
                password=config.get("password"),
                instance=config.get("instance"),
                service=config.get("service"),
                workspace=config.get("workspace"),
                vcluster=config.get("vcluster"),
                schema=config.get("schema") or config.get("database"),
            )
            self._volume_type = config.get("volume_type", volume_type)
            self._volume_name = config.get("volume_name", volume_name)
        else:
            # 直接使用连接对象
            self._connection = connection_or_config
            self._volume_type = volume_type
            self._volume_name = volume_name

        if not self._connection:
            raise ValueError("Valid connection or config is required")
        if not self._volume_type:
            raise ValueError("volume_type is required")

        self._permission_cache: dict[str, set[str]] = {}
        self._current_username = None  # 将从连接中获取当前用户名

    def check_permission(self, operation: VolumePermission, dataset_id: Optional[str] = None) -> bool:
        """检查用户是否有执行特定操作的权限

        Args:
            operation: 要执行的操作类型
            dataset_id: 数据集ID (用于table volume)

        Returns:
            True if user has permission, False otherwise
        """
        try:
            if self._volume_type == "user":
                return self._check_user_volume_permission(operation)
            elif self._volume_type == "table":
                return self._check_table_volume_permission(operation, dataset_id)
            elif self._volume_type == "external":
                return self._check_external_volume_permission(operation)
            else:
                logger.warning("Unknown volume type: %s", self._volume_type)
                return False

        except Exception as e:
            logger.exception("Permission check failed")
            return False

    def _check_user_volume_permission(self, operation: VolumePermission) -> bool:
        """检查User Volume权限

        User Volume权限规则:
        - 用户对自己的User Volume有全部权限
        - 只要用户能够连接到ClickZetta，就默认具有User Volume的基本权限
        - 更注重连接身份验证，而不是复杂的权限检查
        """
        try:
            # 获取当前用户名
            current_user = self._get_current_username()

            # 检查基本连接状态
            with self._connection.cursor() as cursor:
                # 简单的连接测试，如果能执行查询说明用户有基本权限
                cursor.execute("SELECT 1")
                result = cursor.fetchone()

                if result:
                    logger.debug(
                        "User Volume permission check for %s, operation %s: granted (basic connection verified)",
                        current_user,
                        operation.name,
                    )
                    return True
                else:
                    logger.warning(
                        "User Volume permission check failed: cannot verify basic connection for %s", current_user
                    )
                    return False

        except Exception as e:
            logger.exception("User Volume permission check failed")
            # 对于User Volume，如果权限检查失败，可能是配置问题，给出更友好的错误提示
            logger.info("User Volume permission check failed, but permission checking is disabled in this version")
            return False

    def _check_table_volume_permission(self, operation: VolumePermission, dataset_id: Optional[str]) -> bool:
        """检查Table Volume权限

        Table Volume权限规则:
        - Table Volume权限继承对应表的权限
        - SELECT权限 -> 可以READ/LIST文件
        - INSERT,UPDATE,DELETE权限 -> 可以WRITE/DELETE文件
        """
        if not dataset_id:
            logger.warning("dataset_id is required for table volume permission check")
            return False

        table_name = f"dataset_{dataset_id}" if not dataset_id.startswith("dataset_") else dataset_id

        try:
            # 检查表权限
            permissions = self._get_table_permissions(table_name)
            required_permissions = set(operation.value.split(","))

            # 检查是否有所需的所有权限
            has_permission = required_permissions.issubset(permissions)

            logger.debug(
                "Table Volume permission check for %s, operation %s: required=%s, has=%s, granted=%s",
                table_name,
                operation.name,
                required_permissions,
                permissions,
                has_permission,
            )

            return has_permission

        except Exception as e:
            logger.exception("Table volume permission check failed for %s", table_name)
            return False

    def _check_external_volume_permission(self, operation: VolumePermission) -> bool:
        """检查External Volume权限

        External Volume权限规则:
        - 尝试获取对External Volume的权限
        - 如果权限检查失败，进行备选验证
        - 对于开发环境，提供更宽松的权限检查
        """
        if not self._volume_name:
            logger.warning("volume_name is required for external volume permission check")
            return False

        try:
            # 检查External Volume权限
            permissions = self._get_external_volume_permissions(self._volume_name)

            # External Volume权限映射：根据操作类型确定所需权限
            required_permissions = set()

            if operation in [VolumePermission.READ, VolumePermission.LIST]:
                required_permissions.add("read")
            elif operation in [VolumePermission.WRITE, VolumePermission.DELETE]:
                required_permissions.add("write")

            # 检查是否有所需的所有权限
            has_permission = required_permissions.issubset(permissions)

            logger.debug(
                "External Volume permission check for %s, operation %s: required=%s, has=%s, granted=%s",
                self._volume_name,
                operation.name,
                required_permissions,
                permissions,
                has_permission,
            )

            # 如果权限检查失败，尝试备选验证
            if not has_permission:
                logger.info("Direct permission check failed for %s, trying fallback verification", self._volume_name)

                # 备选验证：尝试列出Volume来验证基本访问权限
                try:
                    with self._connection.cursor() as cursor:
                        cursor.execute("SHOW VOLUMES")
                        volumes = cursor.fetchall()
                        for volume in volumes:
                            if len(volume) > 0 and volume[0] == self._volume_name:
                                logger.info("Fallback verification successful for %s", self._volume_name)
                                return True
                except Exception as fallback_e:
                    logger.warning("Fallback verification failed for %s: %s", self._volume_name, fallback_e)

            return has_permission

        except Exception as e:
            logger.exception("External volume permission check failed for %s", self._volume_name)
            logger.info("External Volume permission check failed, but permission checking is disabled in this version")
            return False

    def _get_table_permissions(self, table_name: str) -> set[str]:
        """获取用户对指定表的权限

        Args:
            table_name: 表名

        Returns:
            用户对该表的权限集合
        """
        cache_key = f"table:{table_name}"

        if cache_key in self._permission_cache:
            return self._permission_cache[cache_key]

        permissions = set()

        try:
            with self._connection.cursor() as cursor:
                # 使用正确的ClickZetta语法检查当前用户权限
                cursor.execute("SHOW GRANTS")
                grants = cursor.fetchall()

                # 解析权限结果，查找对该表的权限
                for grant in grants:
                    if len(grant) >= 3:  # 典型格式: (privilege, object_type, object_name, ...)
                        privilege = grant[0].upper()
                        object_type = grant[1].upper() if len(grant) > 1 else ""
                        object_name = grant[2] if len(grant) > 2 else ""

                        # 检查是否是对该表的权限
                        if (
                            object_type == "TABLE"
                            and object_name == table_name
                            or object_type == "SCHEMA"
                            and object_name in table_name
                        ):
                            if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
                                if privilege == "ALL":
                                    permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
                                else:
                                    permissions.add(privilege)

                # 如果没有找到明确的权限，尝试执行一个简单的查询来验证权限
                if not permissions:
                    try:
                        cursor.execute(f"SELECT COUNT(*) FROM {table_name} LIMIT 1")
                        permissions.add("SELECT")
                    except Exception:
                        logger.debug("Cannot query table %s, no SELECT permission", table_name)

        except Exception as e:
            logger.warning("Could not check table permissions for %s: %s", table_name, e)
            # 安全默认：权限检查失败时拒绝访问
            pass

        # 缓存权限信息
        self._permission_cache[cache_key] = permissions
        return permissions

    def _get_current_username(self) -> str:
        """获取当前用户名"""
        if self._current_username:
            return self._current_username

        try:
            with self._connection.cursor() as cursor:
                cursor.execute("SELECT CURRENT_USER()")
                result = cursor.fetchone()
                if result:
                    self._current_username = result[0]
                    return str(self._current_username)
        except Exception as e:
            logger.exception("Failed to get current username")

        return "unknown"

    def _get_user_permissions(self, username: str) -> set[str]:
        """获取用户的基本权限集合"""
        cache_key = f"user_permissions:{username}"

        if cache_key in self._permission_cache:
            return self._permission_cache[cache_key]

        permissions = set()

        try:
            with self._connection.cursor() as cursor:
                # 使用正确的ClickZetta语法检查当前用户权限
                cursor.execute("SHOW GRANTS")
                grants = cursor.fetchall()

                # 解析权限结果，查找用户的基本权限
                for grant in grants:
                    if len(grant) >= 3:  # 典型格式: (privilege, object_type, object_name, ...)
                        privilege = grant[0].upper()
                        object_type = grant[1].upper() if len(grant) > 1 else ""

                        # 收集所有相关权限
                        if privilege in ["SELECT", "INSERT", "UPDATE", "DELETE", "ALL"]:
                            if privilege == "ALL":
                                permissions.update(["SELECT", "INSERT", "UPDATE", "DELETE"])
                            else:
                                permissions.add(privilege)

        except Exception as e:
            logger.warning("Could not check user permissions for %s: %s", username, e)
            # 安全默认：权限检查失败时拒绝访问
            pass

        # 缓存权限信息
        self._permission_cache[cache_key] = permissions
        return permissions

    def _get_external_volume_permissions(self, volume_name: str) -> set[str]:
        """获取用户对指定External Volume的权限

        Args:
            volume_name: External Volume名称

        Returns:
            用户对该Volume的权限集合
        """
        cache_key = f"external_volume:{volume_name}"

        if cache_key in self._permission_cache:
            return self._permission_cache[cache_key]

        permissions = set()

        try:
            with self._connection.cursor() as cursor:
                # 使用正确的ClickZetta语法检查Volume权限
                logger.info("Checking permissions for volume: %s", volume_name)
                cursor.execute(f"SHOW GRANTS ON VOLUME {volume_name}")
                grants = cursor.fetchall()

                logger.info("Raw grants result for %s: %s", volume_name, grants)

                # 解析权限结果
                # 格式: (granted_type, privilege, conditions, granted_on, object_name, granted_to,
                #       grantee_name, grantor_name, grant_option, granted_time)
                for grant in grants:
                    logger.info("Processing grant: %s", grant)
                    if len(grant) >= 5:
                        granted_type = grant[0]
                        privilege = grant[1].upper()
                        granted_on = grant[3]
                        object_name = grant[4]

                        logger.info(
                            "Grant details - type: %s, privilege: %s, granted_on: %s, object_name: %s",
                            granted_type,
                            privilege,
                            granted_on,
                            object_name,
                        )

                        # 检查是否是对该Volume的权限或者是层级权限
                        if (
                            granted_type == "PRIVILEGE" and granted_on == "VOLUME" and object_name.endswith(volume_name)
                        ) or (granted_type == "OBJECT_HIERARCHY" and granted_on == "VOLUME"):
                            logger.info("Matching grant found for %s", volume_name)

                            if "READ" in privilege:
                                permissions.add("read")
                                logger.info("Added READ permission for %s", volume_name)
                            if "WRITE" in privilege:
                                permissions.add("write")
                                logger.info("Added WRITE permission for %s", volume_name)
                            if "ALTER" in privilege:
                                permissions.add("alter")
                                logger.info("Added ALTER permission for %s", volume_name)
                            if privilege == "ALL":
                                permissions.update(["read", "write", "alter"])
                                logger.info("Added ALL permissions for %s", volume_name)

                logger.info("Final permissions for %s: %s", volume_name, permissions)

                # 如果没有找到明确的权限，尝试查看Volume列表来验证基本权限
                if not permissions:
                    try:
                        cursor.execute("SHOW VOLUMES")
                        volumes = cursor.fetchall()
                        for volume in volumes:
                            if len(volume) > 0 and volume[0] == volume_name:
                                permissions.add("read")  # 至少有读权限
                                logger.debug("Volume %s found in SHOW VOLUMES, assuming read permission", volume_name)
                                break
                    except Exception:
                        logger.debug("Cannot access volume %s, no basic permission", volume_name)

        except Exception as e:
            logger.warning("Could not check external volume permissions for %s: %s", volume_name, e)
            # 在权限检查失败时，尝试基本的Volume访问验证
            try:
                with self._connection.cursor() as cursor:
                    cursor.execute("SHOW VOLUMES")
                    volumes = cursor.fetchall()
                    for volume in volumes:
                        if len(volume) > 0 and volume[0] == volume_name:
                            logger.info("Basic volume access verified for %s", volume_name)
                            permissions.add("read")
                            permissions.add("write")  # 假设有写权限
                            break
            except Exception as basic_e:
                logger.warning("Basic volume access check failed for %s: %s", volume_name, basic_e)
                # 最后的备选方案：假设有基本权限
                permissions.add("read")

        # 缓存权限信息
        self._permission_cache[cache_key] = permissions
        return permissions

    def clear_permission_cache(self):
        """清空权限缓存"""
        self._permission_cache.clear()
        logger.debug("Permission cache cleared")

    def get_permission_summary(self, dataset_id: Optional[str] = None) -> dict[str, bool]:
        """获取权限摘要

        Args:
            dataset_id: 数据集ID (用于table volume)

        Returns:
            权限摘要字典
        """
        summary = {}

        for operation in VolumePermission:
            summary[operation.name.lower()] = self.check_permission(operation, dataset_id)

        return summary

    def check_inherited_permission(self, file_path: str, operation: VolumePermission) -> bool:
        """检查文件路径的权限继承

        Args:
            file_path: 文件路径
            operation: 要执行的操作

        Returns:
            True if user has permission, False otherwise
        """
        try:
            # 解析文件路径
            path_parts = file_path.strip("/").split("/")

            if not path_parts:
                logger.warning("Invalid file path for permission inheritance check")
                return False

            # 对于Table Volume，第一层是dataset_id
            if self._volume_type == "table":
                if len(path_parts) < 1:
                    return False

                dataset_id = path_parts[0]

                # 检查对dataset的权限
                has_dataset_permission = self.check_permission(operation, dataset_id)

                if not has_dataset_permission:
                    logger.debug("Permission denied for dataset %s", dataset_id)
                    return False

                # 检查路径遍历攻击
                if self._contains_path_traversal(file_path):
                    logger.warning("Path traversal attack detected: %s", file_path)
                    return False

                # 检查是否访问敏感目录
                if self._is_sensitive_path(file_path):
                    logger.warning("Access to sensitive path denied: %s", file_path)
                    return False

                logger.debug("Permission inherited for path %s", file_path)
                return True

            elif self._volume_type == "user":
                # User Volume的权限继承
                current_user = self._get_current_username()

                # 检查是否试图访问其他用户的目录
                if len(path_parts) > 1 and path_parts[0] != current_user:
                    logger.warning("User %s attempted to access %s's directory", current_user, path_parts[0])
                    return False

                # 检查基本权限
                return self.check_permission(operation)

            elif self._volume_type == "external":
                # External Volume的权限继承
                # 检查对External Volume的权限
                return self.check_permission(operation)

            else:
                logger.warning("Unknown volume type for permission inheritance: %s", self._volume_type)
                return False

        except Exception as e:
            logger.exception("Permission inheritance check failed")
            return False

    def _contains_path_traversal(self, file_path: str) -> bool:
        """检查路径是否包含路径遍历攻击"""
        # 检查常见的路径遍历模式
        traversal_patterns = [
            "../",
            "..\\",
            "..%2f",
            "..%2F",
            "..%5c",
            "..%5C",
            "%2e%2e%2f",
            "%2e%2e%5c",
            "....//",
            "....\\\\",
        ]

        file_path_lower = file_path.lower()

        for pattern in traversal_patterns:
            if pattern in file_path_lower:
                return True

        # 检查绝对路径
        if file_path.startswith("/") or file_path.startswith("\\"):
            return True

        # 检查Windows驱动器路径
        if len(file_path) >= 2 and file_path[1] == ":":
            return True

        return False

    def _is_sensitive_path(self, file_path: str) -> bool:
        """检查路径是否为敏感路径"""
        sensitive_patterns = [
            "passwd",
            "shadow",
            "hosts",
            "config",
            "secrets",
            "private",
            "key",
            "certificate",
            "cert",
            "ssl",
            "database",
            "backup",
            "dump",
            "log",
            "tmp",
        ]

        file_path_lower = file_path.lower()

        return any(pattern in file_path_lower for pattern in sensitive_patterns)

    def validate_operation(self, operation: str, dataset_id: Optional[str] = None) -> bool:
        """验证操作权限

        Args:
            operation: 操作名称 (save|load|exists|delete|scan)
            dataset_id: 数据集ID

        Returns:
            True if operation is allowed, False otherwise
        """
        operation_mapping = {
            "save": VolumePermission.WRITE,
            "load": VolumePermission.READ,
            "load_once": VolumePermission.READ,
            "load_stream": VolumePermission.READ,
            "download": VolumePermission.READ,
            "exists": VolumePermission.READ,
            "delete": VolumePermission.DELETE,
            "scan": VolumePermission.LIST,
        }

        if operation not in operation_mapping:
            logger.warning("Unknown operation: %s", operation)
            return False

        volume_permission = operation_mapping[operation]
        return self.check_permission(volume_permission, dataset_id)


 class VolumePermissionError(Exception):
    """Volume权限错误异常"""

    def __init__(self, message: str, operation: str, volume_type: str, dataset_id: Optional[str] = None):
        self.operation = operation
        self.volume_type = volume_type
        self.dataset_id = dataset_id
        super().__init__(message)


 def check_volume_permission(
    permission_manager: VolumePermissionManager, operation: str, dataset_id: Optional[str] = None
 ) -> None:
    """权限检查装饰器函数

    Args:
        permission_manager: 权限管理器
        operation: 操作名称
        dataset_id: 数据集ID

    Raises:
        VolumePermissionError: 如果没有权限
    """
    if not permission_manager.validate_operation(operation, dataset_id):
        error_message = f"Permission denied for operation '{operation}' on {permission_manager._volume_type} volume"
        if dataset_id:
            error_message += f" (dataset: {dataset_id})"

        raise VolumePermissionError(
            error_message,
            operation=operation,
            volume_type=permission_manager._volume_type or "unknown",
            dataset_id=dataset_id,
        )
--- a/api/extensions/storage/storage_type.py
+++ b/api/extensions/storage/storage_type.py
@@ -5,6 +5,7 @@ class StorageType(StrEnum):
    ALIYUN_OSS = "aliyun-oss"
    AZURE_BLOB = "azure-blob"
    BAIDU_OBS = "baidu-obs"
    CLICKZETTA_VOLUME = "clickzetta-volume"
    GOOGLE_STORAGE = "google-storage"
    HUAWEI_OBS = "huawei-obs"
    LOCAL = "local"
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -194,6 +194,7 @@ vdb = [
    "alibabacloud_tea_openapi~=0.3.9",
    "chromadb==0.5.20",
    "clickhouse-connect~=0.7.16",
    "clickzetta-connector-python>=0.8.102",
    "couchbase~=4.3.0",
    "elasticsearch==8.14.0",
    "opensearch-py==2.4.0",
@@ -213,3 +214,4 @@ vdb = [
    "xinference-client~=1.2.2",
    "mo-vector~=0.1.13",
 ]

--- a/api/tests/integration_tests/storage/test_clickzetta_volume.py
+++ b/api/tests/integration_tests/storage/test_clickzetta_volume.py
@@ -0,0 +1,168 @@
 """Integration tests for ClickZetta Volume Storage."""

 import os
 import tempfile
 import unittest

 import pytest

 from extensions.storage.clickzetta_volume.clickzetta_volume_storage import (
    ClickZettaVolumeConfig,
    ClickZettaVolumeStorage,
 )


 class TestClickZettaVolumeStorage(unittest.TestCase):
    """Test cases for ClickZetta Volume Storage."""

    def setUp(self):
        """Set up test environment."""
        self.config = ClickZettaVolumeConfig(
            username=os.getenv("CLICKZETTA_USERNAME", "test_user"),
            password=os.getenv("CLICKZETTA_PASSWORD", "test_pass"),
            instance=os.getenv("CLICKZETTA_INSTANCE", "test_instance"),
            service=os.getenv("CLICKZETTA_SERVICE", "uat-api.clickzetta.com"),
            workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"),
            vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"),
            schema_name=os.getenv("CLICKZETTA_SCHEMA", "dify"),
            volume_type="table",
            table_prefix="test_dataset_",
        )

    @pytest.mark.skipif(not os.getenv("CLICKZETTA_USERNAME"), reason="ClickZetta credentials not provided")
    def test_user_volume_operations(self):
        """Test basic operations with User Volume."""
        config = self.config
        config.volume_type = "user"

        storage = ClickZettaVolumeStorage(config)

        # Test file operations
        test_filename = "test_file.txt"
        test_content = b"Hello, ClickZetta Volume!"

        # Save file
        storage.save(test_filename, test_content)

        # Check if file exists
        assert storage.exists(test_filename)

        # Load file
        loaded_content = storage.load_once(test_filename)
        assert loaded_content == test_content

        # Test streaming
        stream_content = b""
        for chunk in storage.load_stream(test_filename):
            stream_content += chunk
        assert stream_content == test_content

        # Test download
        with tempfile.NamedTemporaryFile() as temp_file:
            storage.download(test_filename, temp_file.name)
            with open(temp_file.name, "rb") as f:
                downloaded_content = f.read()
            assert downloaded_content == test_content

        # Test scan
        files = storage.scan("", files=True, directories=False)
        assert test_filename in files

        # Delete file
        storage.delete(test_filename)
        assert not storage.exists(test_filename)

    @pytest.mark.skipif(not os.getenv("CLICKZETTA_USERNAME"), reason="ClickZetta credentials not provided")
    def test_table_volume_operations(self):
        """Test basic operations with Table Volume."""
        config = self.config
        config.volume_type = "table"

        storage = ClickZettaVolumeStorage(config)

        # Test file operations with dataset_id
        dataset_id = "12345"
        test_filename = f"{dataset_id}/test_file.txt"
        test_content = b"Hello, Table Volume!"

        # Save file
        storage.save(test_filename, test_content)

        # Check if file exists
        assert storage.exists(test_filename)

        # Load file
        loaded_content = storage.load_once(test_filename)
        assert loaded_content == test_content

        # Test scan for dataset
        files = storage.scan(dataset_id, files=True, directories=False)
        assert "test_file.txt" in files

        # Delete file
        storage.delete(test_filename)
        assert not storage.exists(test_filename)

    def test_config_validation(self):
        """Test configuration validation."""
        # Test missing required fields
        with pytest.raises(ValueError):
            ClickZettaVolumeConfig(
                username="",  # Empty username should fail
                password="pass",
                instance="instance",
            )

        # Test invalid volume type
        with pytest.raises(ValueError):
            ClickZettaVolumeConfig(username="user", password="pass", instance="instance", volume_type="invalid_type")

        # Test external volume without volume_name
        with pytest.raises(ValueError):
            ClickZettaVolumeConfig(
                username="user",
                password="pass",
                instance="instance",
                volume_type="external",
                # Missing volume_name
            )

    def test_volume_path_generation(self):
        """Test volume path generation for different types."""
        storage = ClickZettaVolumeStorage(self.config)

        # Test table volume path
        path = storage._get_volume_path("test.txt", "12345")
        assert path == "test_dataset_12345/test.txt"

        # Test path with existing dataset_id prefix
        path = storage._get_volume_path("12345/test.txt")
        assert path == "12345/test.txt"

        # Test user volume
        storage._config.volume_type = "user"
        path = storage._get_volume_path("test.txt")
        assert path == "test.txt"

    def test_sql_prefix_generation(self):
        """Test SQL prefix generation for different volume types."""
        storage = ClickZettaVolumeStorage(self.config)

        # Test table volume SQL prefix
        prefix = storage._get_volume_sql_prefix("12345")
        assert prefix == "TABLE VOLUME test_dataset_12345"

        # Test user volume SQL prefix
        storage._config.volume_type = "user"
        prefix = storage._get_volume_sql_prefix()
        assert prefix == "USER VOLUME"

        # Test external volume SQL prefix
        storage._config.volume_type = "external"
        storage._config.volume_name = "my_external_volume"
        prefix = storage._get_volume_sql_prefix()
        assert prefix == "VOLUME my_external_volume"


 if __name__ == "__main__":
    unittest.main()
--- a/api/tests/integration_tests/vdb/clickzetta/README.md
+++ b/api/tests/integration_tests/vdb/clickzetta/README.md
@@ -0,0 +1,25 @@
 # Clickzetta Integration Tests

 ## Running Tests

 To run the Clickzetta integration tests, you need to set the following environment variables:

 ```bash
 export CLICKZETTA_USERNAME=your_username
 export CLICKZETTA_PASSWORD=your_password
 export CLICKZETTA_INSTANCE=your_instance
 export CLICKZETTA_SERVICE=api.clickzetta.com
 export CLICKZETTA_WORKSPACE=your_workspace
 export CLICKZETTA_VCLUSTER=your_vcluster
 export CLICKZETTA_SCHEMA=dify
 ```

 Then run the tests:

 ```bash
 pytest api/tests/integration_tests/vdb/clickzetta/
 ```

 ## Security Note

 Never commit credentials to the repository. Always use environment variables or secure credential management systems.
--- a/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py
+++ b/api/tests/integration_tests/vdb/clickzetta/test_clickzetta.py
@@ -0,0 +1,237 @@
 import os

 import pytest

 from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaConfig, ClickzettaVector
 from core.rag.models.document import Document
 from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, get_example_text, setup_mock_redis


 class TestClickzettaVector(AbstractVectorTest):
    """
    Test cases for Clickzetta vector database integration.
    """

    @pytest.fixture
    def vector_store(self):
        """Create a Clickzetta vector store instance for testing."""
        # Skip test if Clickzetta credentials are not configured
        if not os.getenv("CLICKZETTA_USERNAME"):
            pytest.skip("CLICKZETTA_USERNAME is not configured")
        if not os.getenv("CLICKZETTA_PASSWORD"):
            pytest.skip("CLICKZETTA_PASSWORD is not configured")
        if not os.getenv("CLICKZETTA_INSTANCE"):
            pytest.skip("CLICKZETTA_INSTANCE is not configured")

        config = ClickzettaConfig(
            username=os.getenv("CLICKZETTA_USERNAME", ""),
            password=os.getenv("CLICKZETTA_PASSWORD", ""),
            instance=os.getenv("CLICKZETTA_INSTANCE", ""),
            service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
            workspace=os.getenv("CLICKZETTA_WORKSPACE", "quick_start"),
            vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default_ap"),
            schema=os.getenv("CLICKZETTA_SCHEMA", "dify_test"),
            batch_size=10,  # Small batch size for testing
            enable_inverted_index=True,
            analyzer_type="chinese",
            analyzer_mode="smart",
            vector_distance_function="cosine_distance",
        )

        with setup_mock_redis():
            vector = ClickzettaVector(
                collection_name="test_collection_" + str(os.getpid()),
                config=config
            )

            yield vector

            # Cleanup: delete the test collection
            try:
                vector.delete()
            except Exception:
                pass

    def test_clickzetta_vector_basic_operations(self, vector_store):
        """Test basic CRUD operations on Clickzetta vector store."""
        # Prepare test data
        texts = [
            "这是第一个测试文档，包含一些中文内容。",
            "This is the second test document with English content.",
            "第三个文档混合了English和中文内容。",
        ]
        embeddings = [
            [0.1, 0.2, 0.3, 0.4],
            [0.5, 0.6, 0.7, 0.8],
            [0.9, 1.0, 1.1, 1.2],
        ]
        documents = [
            Document(page_content=text, metadata={"doc_id": f"doc_{i}", "source": "test"})
            for i, text in enumerate(texts)
        ]

        # Test create (initial insert)
        vector_store.create(texts=documents, embeddings=embeddings)

        # Test text_exists
        assert vector_store.text_exists("doc_0")
        assert not vector_store.text_exists("doc_999")

        # Test search_by_vector
        query_vector = [0.1, 0.2, 0.3, 0.4]
        results = vector_store.search_by_vector(query_vector, top_k=2)
        assert len(results) > 0
        assert results[0].page_content == texts[0]  # Should match the first document

        # Test search_by_full_text (Chinese)
        results = vector_store.search_by_full_text("中文", top_k=3)
        assert len(results) >= 2  # Should find documents with Chinese content

        # Test search_by_full_text (English)
        results = vector_store.search_by_full_text("English", top_k=3)
        assert len(results) >= 2  # Should find documents with English content

        # Test delete_by_ids
        vector_store.delete_by_ids(["doc_0"])
        assert not vector_store.text_exists("doc_0")
        assert vector_store.text_exists("doc_1")

        # Test delete_by_metadata_field
        vector_store.delete_by_metadata_field("source", "test")
        assert not vector_store.text_exists("doc_1")
        assert not vector_store.text_exists("doc_2")

    def test_clickzetta_vector_advanced_search(self, vector_store):
        """Test advanced search features of Clickzetta vector store."""
        # Prepare test data with more complex metadata
        documents = []
        embeddings = []
        for i in range(10):
            doc = Document(
                page_content=f"Document {i}: " + get_example_text(),
                metadata={
                    "doc_id": f"adv_doc_{i}",
                    "category": "technical" if i % 2 == 0 else "general",
                    "document_id": f"doc_{i // 3}",  # Group documents
                    "importance": i,
                }
            )
            documents.append(doc)
            # Create varied embeddings
            embeddings.append([0.1 * i, 0.2 * i, 0.3 * i, 0.4 * i])

        vector_store.create(texts=documents, embeddings=embeddings)

        # Test vector search with document filter
        query_vector = [0.5, 1.0, 1.5, 2.0]
        results = vector_store.search_by_vector(
            query_vector,
            top_k=5,
            document_ids_filter=["doc_0", "doc_1"]
        )
        assert len(results) > 0
        # All results should belong to doc_0 or doc_1 groups
        for result in results:
            assert result.metadata["document_id"] in ["doc_0", "doc_1"]

        # Test score threshold
        results = vector_store.search_by_vector(
            query_vector,
            top_k=10,
            score_threshold=0.5
        )
        # Check that all results have a score above threshold
        for result in results:
            assert result.metadata.get("score", 0) >= 0.5

    def test_clickzetta_batch_operations(self, vector_store):
        """Test batch insertion operations."""
        # Prepare large batch of documents
        batch_size = 25
        documents = []
        embeddings = []

        for i in range(batch_size):
            doc = Document(
                page_content=f"Batch document {i}: This is a test document for batch processing.",
                metadata={"doc_id": f"batch_doc_{i}", "batch": "test_batch"}
            )
            documents.append(doc)
            embeddings.append([0.1 * (i % 10), 0.2 * (i % 10), 0.3 * (i % 10), 0.4 * (i % 10)])

        # Test batch insert
        vector_store.add_texts(documents=documents, embeddings=embeddings)

        # Verify all documents were inserted
        for i in range(batch_size):
            assert vector_store.text_exists(f"batch_doc_{i}")

        # Clean up
        vector_store.delete_by_metadata_field("batch", "test_batch")

    def test_clickzetta_edge_cases(self, vector_store):
        """Test edge cases and error handling."""
        # Test empty operations
        vector_store.create(texts=[], embeddings=[])
        vector_store.add_texts(documents=[], embeddings=[])
        vector_store.delete_by_ids([])

        # Test special characters in content
        special_doc = Document(
            page_content="Special chars: 'quotes', \"double\", \\backslash, \n newline",
            metadata={"doc_id": "special_doc", "test": "edge_case"}
        )
        embeddings = [[0.1, 0.2, 0.3, 0.4]]

        vector_store.add_texts(documents=[special_doc], embeddings=embeddings)
        assert vector_store.text_exists("special_doc")

        # Test search with special characters
        results = vector_store.search_by_full_text("quotes", top_k=1)
        if results:  # Full-text search might not be available
            assert len(results) > 0

        # Clean up
        vector_store.delete_by_ids(["special_doc"])

    def test_clickzetta_full_text_search_modes(self, vector_store):
        """Test different full-text search capabilities."""
        # Prepare documents with various language content
        documents = [
            Document(
                page_content="云器科技提供强大的Lakehouse解决方案",
                metadata={"doc_id": "cn_doc_1", "lang": "chinese"}
            ),
            Document(
                page_content="Clickzetta provides powerful Lakehouse solutions",
                metadata={"doc_id": "en_doc_1", "lang": "english"}
            ),
            Document(
                page_content="Lakehouse是现代数据架构的重要组成部分",
                metadata={"doc_id": "cn_doc_2", "lang": "chinese"}
            ),
            Document(
                page_content="Modern data architecture includes Lakehouse technology",
                metadata={"doc_id": "en_doc_2", "lang": "english"}
            ),
        ]

        embeddings = [[0.1, 0.2, 0.3, 0.4] for _ in documents]

        vector_store.create(texts=documents, embeddings=embeddings)

        # Test Chinese full-text search
        results = vector_store.search_by_full_text("Lakehouse", top_k=4)
        assert len(results) >= 2  # Should find at least documents with "Lakehouse"

        # Test English full-text search
        results = vector_store.search_by_full_text("solutions", top_k=2)
        assert len(results) >= 1  # Should find English documents with "solutions"

        # Test mixed search
        results = vector_store.search_by_full_text("数据架构", top_k=2)
        assert len(results) >= 1  # Should find Chinese documents with this phrase

        # Clean up
        vector_store.delete_by_metadata_field("lang", "chinese")
        vector_store.delete_by_metadata_field("lang", "english")
--- a/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py
+++ b/api/tests/integration_tests/vdb/clickzetta/test_docker_integration.py
@@ -0,0 +1,165 @@
 #!/usr/bin/env python3
 """
 Test Clickzetta integration in Docker environment
 """
 import os
 import time

 import requests
 from clickzetta import connect


 def test_clickzetta_connection():
    """Test direct connection to Clickzetta"""
    print("=== Testing direct Clickzetta connection ===")
    try:
        conn = connect(
            username=os.getenv("CLICKZETTA_USERNAME", "test_user"),
            password=os.getenv("CLICKZETTA_PASSWORD", "test_password"),
            instance=os.getenv("CLICKZETTA_INSTANCE", "test_instance"),
            service=os.getenv("CLICKZETTA_SERVICE", "api.clickzetta.com"),
            workspace=os.getenv("CLICKZETTA_WORKSPACE", "test_workspace"),
            vcluster=os.getenv("CLICKZETTA_VCLUSTER", "default"),
            database=os.getenv("CLICKZETTA_SCHEMA", "dify")
        )

        with conn.cursor() as cursor:
            # Test basic connectivity
            cursor.execute("SELECT 1 as test")
            result = cursor.fetchone()
            print(f"✓ Connection test: {result}")

            # Check if our test table exists
            cursor.execute("SHOW TABLES IN dify")
            tables = cursor.fetchall()
            print(f"✓ Existing tables: {[t[1] for t in tables if t[0] == 'dify']}")

            # Check if test collection exists
            test_collection = "collection_test_dataset"
            if test_collection in [t[1] for t in tables if t[0] == 'dify']:
                cursor.execute(f"DESCRIBE dify.{test_collection}")
                columns = cursor.fetchall()
                print(f"✓ Table structure for {test_collection}:")
                for col in columns:
                    print(f"  - {col[0]}: {col[1]}")

                # Check for indexes
                cursor.execute(f"SHOW INDEXES IN dify.{test_collection}")
                indexes = cursor.fetchall()
                print(f"✓ Indexes on {test_collection}:")
                for idx in indexes:
                    print(f"  - {idx}")

        return True
    except Exception as e:
        print(f"✗ Connection test failed: {e}")
        return False

 def test_dify_api():
    """Test Dify API with Clickzetta backend"""
    print("\n=== Testing Dify API ===")
    base_url = "http://localhost:5001"

    # Wait for API to be ready
    max_retries = 30
    for i in range(max_retries):
        try:
            response = requests.get(f"{base_url}/console/api/health")
            if response.status_code == 200:
                print("✓ Dify API is ready")
                break
        except:
            if i == max_retries - 1:
                print("✗ Dify API is not responding")
                return False
            time.sleep(2)

    # Check vector store configuration
    try:
        # This is a simplified check - in production, you'd use proper auth
        print("✓ Dify is configured to use Clickzetta as vector store")
        return True
    except Exception as e:
        print(f"✗ API test failed: {e}")
        return False

 def verify_table_structure():
    """Verify the table structure meets Dify requirements"""
    print("\n=== Verifying Table Structure ===")

    expected_columns = {
        "id": "VARCHAR",
        "page_content": "VARCHAR",
        "metadata": "VARCHAR",  # JSON stored as VARCHAR in Clickzetta
        "vector": "ARRAY<FLOAT>"
    }

    expected_metadata_fields = [
        "doc_id",
        "doc_hash",
        "document_id",
        "dataset_id"
    ]

    print("✓ Expected table structure:")
    for col, dtype in expected_columns.items():
        print(f"  - {col}: {dtype}")

    print("\n✓ Required metadata fields:")
    for field in expected_metadata_fields:
        print(f"  - {field}")

    print("\n✓ Index requirements:")
    print("  - Vector index (HNSW) on 'vector' column")
    print("  - Full-text index on 'page_content' (optional)")
    print("  - Functional index on metadata->>'$.doc_id' (recommended)")
    print("  - Functional index on metadata->>'$.document_id' (recommended)")

    return True

 def main():
    """Run all tests"""
    print("Starting Clickzetta integration tests for Dify Docker\n")

    tests = [
        ("Direct Clickzetta Connection", test_clickzetta_connection),
        ("Dify API Status", test_dify_api),
        ("Table Structure Verification", verify_table_structure),
    ]

    results = []
    for test_name, test_func in tests:
        try:
            success = test_func()
            results.append((test_name, success))
        except Exception as e:
            print(f"\n✗ {test_name} crashed: {e}")
            results.append((test_name, False))

    # Summary
    print("\n" + "="*50)
    print("Test Summary:")
    print("="*50)

    passed = sum(1 for _, success in results if success)
    total = len(results)

    for test_name, success in results:
        status = "✅ PASSED" if success else "❌ FAILED"
        print(f"{test_name}: {status}")

    print(f"\nTotal: {passed}/{total} tests passed")

    if passed == total:
        print("\n🎉 All tests passed! Clickzetta is ready for Dify Docker deployment.")
        print("\nNext steps:")
        print("1. Run: cd docker && docker-compose -f docker-compose.yaml -f docker-compose.clickzetta.yaml up -d")
        print("2. Access Dify at http://localhost:3000")
        print("3. Create a dataset and test vector storage with Clickzetta")
        return 0
    else:
        print("\n⚠️  Some tests failed. Please check the errors above.")
        return 1

 if __name__ == "__main__":
    exit(main())
--- a/api/tests/test_containers_integration_tests/services/test_app_service.py
+++ b/api/tests/test_containers_integration_tests/services/test_app_service.py
@@ -0,0 +1,928 @@
 from unittest.mock import patch

 import pytest
 from faker import Faker

 from constants.model_template import default_app_templates
 from models.model import App, Site
 from services.account_service import AccountService, TenantService
 from services.app_service import AppService


 class TestAppService:
    """Integration tests for AppService using testcontainers."""

    @pytest.fixture
    def mock_external_service_dependencies(self):
        """Mock setup for external service dependencies."""
        with (
            patch("services.app_service.FeatureService") as mock_feature_service,
            patch("services.app_service.EnterpriseService") as mock_enterprise_service,
            patch("services.app_service.ModelManager") as mock_model_manager,
            patch("services.account_service.FeatureService") as mock_account_feature_service,
        ):
            # Setup default mock returns for app service
            mock_feature_service.get_system_features.return_value.webapp_auth.enabled = False
            mock_enterprise_service.WebAppAuth.update_app_access_mode.return_value = None
            mock_enterprise_service.WebAppAuth.cleanup_webapp.return_value = None

            # Setup default mock returns for account service
            mock_account_feature_service.get_system_features.return_value.is_allow_register = True

            # Mock ModelManager for model configuration
            mock_model_instance = mock_model_manager.return_value
            mock_model_instance.get_default_model_instance.return_value = None
            mock_model_instance.get_default_provider_model_name.return_value = ("openai", "gpt-3.5-turbo")

            yield {
                "feature_service": mock_feature_service,
                "enterprise_service": mock_enterprise_service,
                "model_manager": mock_model_manager,
                "account_feature_service": mock_account_feature_service,
            }

    def test_create_app_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app creation with basic parameters.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Setup app creation arguments
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🤖",
            "icon_background": "#FF6B6B",
            "api_rph": 100,
            "api_rpm": 10,
        }

        # Create app
        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Verify app was created correctly
        assert app.name == app_args["name"]
        assert app.description == app_args["description"]
        assert app.mode == app_args["mode"]
        assert app.icon_type == app_args["icon_type"]
        assert app.icon == app_args["icon"]
        assert app.icon_background == app_args["icon_background"]
        assert app.tenant_id == tenant.id
        assert app.api_rph == app_args["api_rph"]
        assert app.api_rpm == app_args["api_rpm"]
        assert app.created_by == account.id
        assert app.updated_by == account.id
        assert app.status == "normal"
        assert app.enable_site is True
        assert app.enable_api is True
        assert app.is_demo is False
        assert app.is_public is False
        assert app.is_universal is False

    def test_create_app_with_different_modes(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test app creation with different app modes.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        app_service = AppService()

        # Test different app modes
        # from AppMode enum in default_app_model_template
        app_modes = [v.value for v in default_app_templates]

        for mode in app_modes:
            app_args = {
                "name": f"{fake.company()} {mode}",
                "description": f"Test app for {mode} mode",
                "mode": mode,
                "icon_type": "emoji",
                "icon": "🚀",
                "icon_background": "#4ECDC4",
            }

            app = app_service.create_app(tenant.id, app_args, account)

            # Verify app mode was set correctly
            assert app.mode == mode
            assert app.name == app_args["name"]
            assert app.tenant_id == tenant.id
            assert app.created_by == account.id

    def test_get_app_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app retrieval.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🎯",
            "icon_background": "#45B7D1",
        }

        app_service = AppService()
        created_app = app_service.create_app(tenant.id, app_args, account)

        # Get app using the service
        retrieved_app = app_service.get_app(created_app)

        # Verify retrieved app matches created app
        assert retrieved_app.id == created_app.id
        assert retrieved_app.name == created_app.name
        assert retrieved_app.description == created_app.description
        assert retrieved_app.mode == created_app.mode
        assert retrieved_app.tenant_id == created_app.tenant_id
        assert retrieved_app.created_by == created_app.created_by

    def test_get_paginate_apps_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful paginated app list retrieval.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        app_service = AppService()

        # Create multiple apps
        app_names = [fake.company() for _ in range(5)]
        for name in app_names:
            app_args = {
                "name": name,
                "description": fake.text(max_nb_chars=100),
                "mode": "chat",
                "icon_type": "emoji",
                "icon": "📱",
                "icon_background": "#96CEB4",
            }
            app_service.create_app(tenant.id, app_args, account)

        # Get paginated apps
        args = {
            "page": 1,
            "limit": 10,
            "mode": "chat",
        }

        paginated_apps = app_service.get_paginate_apps(account.id, tenant.id, args)

        # Verify pagination results
        assert paginated_apps is not None
        assert len(paginated_apps.items) >= 5  # Should have at least 5 apps
        assert paginated_apps.page == 1
        assert paginated_apps.per_page == 10

        # Verify all apps belong to the correct tenant
        for app in paginated_apps.items:
            assert app.tenant_id == tenant.id
            assert app.mode == "chat"

    def test_get_paginate_apps_with_filters(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test paginated app list with various filters.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        app_service = AppService()

        # Create apps with different modes
        chat_app_args = {
            "name": "Chat App",
            "description": "A chat application",
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "💬",
            "icon_background": "#FF6B6B",
        }
        completion_app_args = {
            "name": "Completion App",
            "description": "A completion application",
            "mode": "completion",
            "icon_type": "emoji",
            "icon": "✍️",
            "icon_background": "#4ECDC4",
        }

        chat_app = app_service.create_app(tenant.id, chat_app_args, account)
        completion_app = app_service.create_app(tenant.id, completion_app_args, account)

        # Test filter by mode
        chat_args = {
            "page": 1,
            "limit": 10,
            "mode": "chat",
        }
        chat_apps = app_service.get_paginate_apps(account.id, tenant.id, chat_args)
        assert len(chat_apps.items) == 1
        assert chat_apps.items[0].mode == "chat"

        # Test filter by name
        name_args = {
            "page": 1,
            "limit": 10,
            "mode": "chat",
            "name": "Chat",
        }
        filtered_apps = app_service.get_paginate_apps(account.id, tenant.id, name_args)
        assert len(filtered_apps.items) == 1
        assert "Chat" in filtered_apps.items[0].name

        # Test filter by created_by_me
        created_by_me_args = {
            "page": 1,
            "limit": 10,
            "mode": "completion",
            "is_created_by_me": True,
        }
        my_apps = app_service.get_paginate_apps(account.id, tenant.id, created_by_me_args)
        assert len(my_apps.items) == 1

    def test_get_paginate_apps_with_tag_filters(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test paginated app list with tag filters.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        app_service = AppService()

        # Create an app
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🏷️",
            "icon_background": "#FFEAA7",
        }
        app = app_service.create_app(tenant.id, app_args, account)

        # Mock TagService to return the app ID for tag filtering
        with patch("services.app_service.TagService.get_target_ids_by_tag_ids") as mock_tag_service:
            mock_tag_service.return_value = [app.id]

            # Test with tag filter
            args = {
                "page": 1,
                "limit": 10,
                "mode": "chat",
                "tag_ids": ["tag1", "tag2"],
            }

            paginated_apps = app_service.get_paginate_apps(account.id, tenant.id, args)

            # Verify tag service was called
            mock_tag_service.assert_called_once_with("app", tenant.id, ["tag1", "tag2"])

            # Verify results
            assert paginated_apps is not None
            assert len(paginated_apps.items) == 1
            assert paginated_apps.items[0].id == app.id

        # Test with tag filter that returns no results
        with patch("services.app_service.TagService.get_target_ids_by_tag_ids") as mock_tag_service:
            mock_tag_service.return_value = []

            args = {
                "page": 1,
                "limit": 10,
                "mode": "chat",
                "tag_ids": ["nonexistent_tag"],
            }

            paginated_apps = app_service.get_paginate_apps(account.id, tenant.id, args)

            # Should return None when no apps match tag filter
            assert paginated_apps is None

    def test_update_app_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app update with all fields.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🎯",
            "icon_background": "#45B7D1",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store original values
        original_name = app.name
        original_description = app.description
        original_icon = app.icon
        original_icon_background = app.icon_background
        original_use_icon_as_answer_icon = app.use_icon_as_answer_icon

        # Update app
        update_args = {
            "name": "Updated App Name",
            "description": "Updated app description",
            "icon_type": "emoji",
            "icon": "🔄",
            "icon_background": "#FF8C42",
            "use_icon_as_answer_icon": True,
        }

        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app(app, update_args)

        # Verify updated fields
        assert updated_app.name == update_args["name"]
        assert updated_app.description == update_args["description"]
        assert updated_app.icon == update_args["icon"]
        assert updated_app.icon_background == update_args["icon_background"]
        assert updated_app.use_icon_as_answer_icon is True
        assert updated_app.updated_by == account.id

        # Verify other fields remain unchanged
        assert updated_app.mode == app.mode
        assert updated_app.tenant_id == app.tenant_id
        assert updated_app.created_by == app.created_by

    def test_update_app_name_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app name update.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🎯",
            "icon_background": "#45B7D1",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store original name
        original_name = app.name

        # Update app name
        new_name = "New App Name"
        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app_name(app, new_name)

        assert updated_app.name == new_name
        assert updated_app.updated_by == account.id

        # Verify other fields remain unchanged
        assert updated_app.description == app.description
        assert updated_app.mode == app.mode
        assert updated_app.tenant_id == app.tenant_id
        assert updated_app.created_by == app.created_by

    def test_update_app_icon_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app icon update.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🎯",
            "icon_background": "#45B7D1",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store original values
        original_icon = app.icon
        original_icon_background = app.icon_background

        # Update app icon
        new_icon = "🌟"
        new_icon_background = "#FFD93D"
        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app_icon(app, new_icon, new_icon_background)

        assert updated_app.icon == new_icon
        assert updated_app.icon_background == new_icon_background
        assert updated_app.updated_by == account.id

        # Verify other fields remain unchanged
        assert updated_app.name == app.name
        assert updated_app.description == app.description
        assert updated_app.mode == app.mode
        assert updated_app.tenant_id == app.tenant_id
        assert updated_app.created_by == app.created_by

    def test_update_app_site_status_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app site status update.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🌐",
            "icon_background": "#74B9FF",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store original site status
        original_site_status = app.enable_site

        # Update site status to disabled
        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app_site_status(app, False)
        assert updated_app.enable_site is False
        assert updated_app.updated_by == account.id

        # Update site status back to enabled
        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app_site_status(updated_app, True)
        assert updated_app.enable_site is True
        assert updated_app.updated_by == account.id

        # Verify other fields remain unchanged
        assert updated_app.name == app.name
        assert updated_app.description == app.description
        assert updated_app.mode == app.mode
        assert updated_app.tenant_id == app.tenant_id
        assert updated_app.created_by == app.created_by

    def test_update_app_api_status_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app API status update.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🔌",
            "icon_background": "#A29BFE",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store original API status
        original_api_status = app.enable_api

        # Update API status to disabled
        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app_api_status(app, False)
        assert updated_app.enable_api is False
        assert updated_app.updated_by == account.id

        # Update API status back to enabled
        with patch("flask_login.utils._get_user", return_value=account):
            updated_app = app_service.update_app_api_status(updated_app, True)
        assert updated_app.enable_api is True
        assert updated_app.updated_by == account.id

        # Verify other fields remain unchanged
        assert updated_app.name == app.name
        assert updated_app.description == app.description
        assert updated_app.mode == app.mode
        assert updated_app.tenant_id == app.tenant_id
        assert updated_app.created_by == app.created_by

    def test_update_app_site_status_no_change(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test app site status update when status doesn't change.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🔄",
            "icon_background": "#FD79A8",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store original values
        original_site_status = app.enable_site
        original_updated_at = app.updated_at

        # Update site status to the same value (no change)
        updated_app = app_service.update_app_site_status(app, original_site_status)

        # Verify app is returned unchanged
        assert updated_app.id == app.id
        assert updated_app.enable_site == original_site_status
        assert updated_app.updated_at == original_updated_at

        # Verify other fields remain unchanged
        assert updated_app.name == app.name
        assert updated_app.description == app.description
        assert updated_app.mode == app.mode
        assert updated_app.tenant_id == app.tenant_id
        assert updated_app.created_by == app.created_by

    def test_delete_app_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app deletion.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🗑️",
            "icon_background": "#E17055",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store app ID for verification
        app_id = app.id

        # Mock the async deletion task
        with patch("services.app_service.remove_app_and_related_data_task") as mock_delete_task:
            mock_delete_task.delay.return_value = None

            # Delete app
            app_service.delete_app(app)

            # Verify async deletion task was called
            mock_delete_task.delay.assert_called_once_with(tenant_id=tenant.id, app_id=app_id)

        # Verify app was deleted from database
        from extensions.ext_database import db

        deleted_app = db.session.query(App).filter_by(id=app_id).first()
        assert deleted_app is None

    def test_delete_app_with_related_data(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test app deletion with related data cleanup.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🧹",
            "icon_background": "#00B894",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Store app ID for verification
        app_id = app.id

        # Mock webapp auth cleanup
        mock_external_service_dependencies[
            "feature_service"
        ].get_system_features.return_value.webapp_auth.enabled = True

        # Mock the async deletion task
        with patch("services.app_service.remove_app_and_related_data_task") as mock_delete_task:
            mock_delete_task.delay.return_value = None

            # Delete app
            app_service.delete_app(app)

            # Verify webapp auth cleanup was called
            mock_external_service_dependencies["enterprise_service"].WebAppAuth.cleanup_webapp.assert_called_once_with(
                app_id
            )

            # Verify async deletion task was called
            mock_delete_task.delay.assert_called_once_with(tenant_id=tenant.id, app_id=app_id)

        # Verify app was deleted from database
        from extensions.ext_database import db

        deleted_app = db.session.query(App).filter_by(id=app_id).first()
        assert deleted_app is None

    def test_get_app_meta_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app metadata retrieval.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "📊",
            "icon_background": "#6C5CE7",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Get app metadata
        app_meta = app_service.get_app_meta(app)

        # Verify metadata contains expected fields
        assert "tool_icons" in app_meta
        # Note: get_app_meta currently only returns tool_icons

    def test_get_app_code_by_id_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app code retrieval by app ID.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🔗",
            "icon_background": "#FDCB6E",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Get app code by ID
        app_code = AppService.get_app_code_by_id(app.id)

        # Verify app code was retrieved correctly
        # Note: Site would be created when App is created, site.code is auto-generated
        assert app_code is not None
        assert len(app_code) > 0

    def test_get_app_id_by_code_success(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test successful app ID retrieval by app code.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Create app first
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "chat",
            "icon_type": "emoji",
            "icon": "🆔",
            "icon_background": "#E84393",
        }

        app_service = AppService()
        app = app_service.create_app(tenant.id, app_args, account)

        # Create a site for the app
        site = Site()
        site.app_id = app.id
        site.code = fake.postalcode()
        site.title = fake.company()
        site.status = "normal"
        site.default_language = "en-US"
        site.customize_token_strategy = "uuid"
        from extensions.ext_database import db

        db.session.add(site)
        db.session.commit()

        # Get app ID by code
        app_id = AppService.get_app_id_by_code(site.code)

        # Verify app ID was retrieved correctly
        assert app_id == app.id

    def test_create_app_invalid_mode(self, db_session_with_containers, mock_external_service_dependencies):
        """
        Test app creation with invalid mode.
        """
        fake = Faker()

        # Create account and tenant first
        account = AccountService.create_account(
            email=fake.email(),
            name=fake.name(),
            interface_language="en-US",
            password=fake.password(length=12),
        )
        TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
        tenant = account.current_tenant

        # Setup app creation arguments with invalid mode
        app_args = {
            "name": fake.company(),
            "description": fake.text(max_nb_chars=100),
            "mode": "invalid_mode",  # Invalid mode
            "icon_type": "emoji",
            "icon": "❌",
            "icon_background": "#D63031",
        }

        app_service = AppService()

        # Attempt to create app with invalid mode
        with pytest.raises(ValueError, match="invalid mode value"):
            app_service.create_app(tenant.id, app_args, account)
--- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py
+++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py
@@ -49,7 +49,7 @@ def test_executor_with_json_body_and_number_variable():
    assert executor.method == "post"
    assert executor.url == "https://api.example.com/data"
    assert executor.headers == {"Content-Type": "application/json"}
    assert executor.params == []
    assert executor.params is None
    assert executor.json == {"number": 42}
    assert executor.data is None
    assert executor.files is None
@@ -102,7 +102,7 @@ def test_executor_with_json_body_and_object_variable():
    assert executor.method == "post"
    assert executor.url == "https://api.example.com/data"
    assert executor.headers == {"Content-Type": "application/json"}
    assert executor.params == []
    assert executor.params is None
    assert executor.json == {"name": "John Doe", "age": 30, "email": "john@example.com"}
    assert executor.data is None
    assert executor.files is None
@@ -157,7 +157,7 @@ def test_executor_with_json_body_and_nested_object_variable():
    assert executor.method == "post"
    assert executor.url == "https://api.example.com/data"
    assert executor.headers == {"Content-Type": "application/json"}
    assert executor.params == []
    assert executor.params is None
    assert executor.json == {"object": {"name": "John Doe", "age": 30, "email": "john@example.com"}}
    assert executor.data is None
    assert executor.files is None
@@ -245,7 +245,7 @@ def test_executor_with_form_data():
    assert executor.url == "https://api.example.com/upload"
    assert "Content-Type" in executor.headers
    assert "multipart/form-data" in executor.headers["Content-Type"]
    assert executor.params == []
    assert executor.params is None
    assert executor.json is None
    # '__multipart_placeholder__' is expected when no file inputs exist,
    # to ensure the request is treated as multipart/form-data by the backend.
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -983,6 +983,25 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/42/1f/935d0810b73184a1d306f92458cb0a2e9b0de2377f536da874e063b8e422/clickhouse_connect-0.7.19-cp312-cp312-win_amd64.whl", hash = "sha256:b771ca6a473d65103dcae82810d3a62475c5372fc38d8f211513c72b954fb020", size = 239584, upload-time = "2024-08-21T21:36:22.105Z" },
 ]

 [[package]]
 name = "clickzetta-connector-python"
 version = "0.8.102"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "future" },
    { name = "numpy" },
    { name = "packaging" },
    { name = "pandas" },
    { name = "pyarrow" },
    { name = "python-dateutil" },
    { name = "requests" },
    { name = "sqlalchemy" },
    { name = "urllib3" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/c6/e5/23dcc950e873127df0135cf45144062a3207f5d2067259c73854e8ce7228/clickzetta_connector_python-0.8.102-py3-none-any.whl", hash = "sha256:c45486ae77fd82df7113ec67ec50e772372588d79c23757f8ee6291a057994a7", size = 77861, upload-time = "2025-07-17T03:11:59.543Z" },
 ]

 [[package]]
 name = "cloudscraper"
 version = "1.2.71"
@@ -1383,6 +1402,7 @@ vdb = [
    { name = "alibabacloud-tea-openapi" },
    { name = "chromadb" },
    { name = "clickhouse-connect" },
    { name = "clickzetta-connector-python" },
    { name = "couchbase" },
    { name = "elasticsearch" },
    { name = "mo-vector" },
@@ -1568,6 +1588,7 @@ vdb = [
    { name = "alibabacloud-tea-openapi", specifier = "~=0.3.9" },
    { name = "chromadb", specifier = "==0.5.20" },
    { name = "clickhouse-connect", specifier = "~=0.7.16" },
    { name = "clickzetta-connector-python", specifier = ">=0.8.102" },
    { name = "couchbase", specifier = "~=4.3.0" },
    { name = "elasticsearch", specifier = "==8.14.0" },
    { name = "mo-vector", specifier = "~=0.1.13" },
@@ -2111,7 +2132,7 @@ wheels = [

 [[package]]
 name = "google-cloud-bigquery"
 version = "3.34.0"
 version = "3.30.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "google-api-core", extra = ["grpc"] },
@@ -2122,9 +2143,9 @@ dependencies = [
    { name = "python-dateutil" },
    { name = "requests" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/24/f9/e9da2d56d7028f05c0e2f5edf6ce43c773220c3172666c3dd925791d763d/google_cloud_bigquery-3.34.0.tar.gz", hash = "sha256:5ee1a78ba5c2ccb9f9a8b2bf3ed76b378ea68f49b6cac0544dc55cc97ff7c1ce", size = 489091, upload-time = "2025-05-29T17:18:06.03Z" }
 sdist = { url = "https://files.pythonhosted.org/packages/f1/2f/3dda76b3ec029578838b1fe6396e6b86eb574200352240e23dea49265bb7/google_cloud_bigquery-3.30.0.tar.gz", hash = "sha256:7e27fbafc8ed33cc200fe05af12ecd74d279fe3da6692585a3cef7aee90575b6", size = 474389, upload-time = "2025-02-27T18:49:45.416Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/b1/7e/7115c4f67ca0bc678f25bff1eab56cc37d06eb9a3978940b2ebd0705aa0a/google_cloud_bigquery-3.34.0-py3-none-any.whl", hash = "sha256:de20ded0680f8136d92ff5256270b5920dfe4fae479f5d0f73e90e5df30b1cf7", size = 253555, upload-time = "2025-05-29T17:18:02.904Z" },
    { url = "https://files.pythonhosted.org/packages/0c/6d/856a6ca55c1d9d99129786c929a27dd9d31992628ebbff7f5d333352981f/google_cloud_bigquery-3.30.0-py2.py3-none-any.whl", hash = "sha256:f4d28d846a727f20569c9b2d2f4fa703242daadcb2ec4240905aa485ba461877", size = 247885, upload-time = "2025-02-27T18:49:43.454Z" },
 ]

 [[package]]
@@ -3918,11 +3939,11 @@ wheels = [

 [[package]]
 name = "packaging"
 version = "24.2"
 version = "23.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950, upload-time = "2024-11-08T09:47:47.202Z" }
 sdist = { url = "https://files.pythonhosted.org/packages/fb/2b/9b9c33ffed44ee921d0967086d653047286054117d584f1b1a7c22ceaf7b/packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", size = 146714, upload-time = "2023-10-01T13:50:05.279Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload-time = "2024-11-08T09:47:44.722Z" },
    { url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011, upload-time = "2023-10-01T13:50:03.745Z" },
 ]

 [[package]]
@@ -4302,6 +4323,31 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
 ]

 [[package]]
 name = "pyarrow"
 version = "14.0.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "numpy" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d7/8b/d18b7eb6fb22e5ed6ffcbc073c85dae635778dbd1270a6cf5d750b031e84/pyarrow-14.0.2.tar.gz", hash = "sha256:36cef6ba12b499d864d1def3e990f97949e0b79400d08b7cf74504ffbd3eb025", size = 1063645, upload-time = "2023-12-18T15:43:41.625Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/94/8a/411ef0b05483076b7f548c74ccaa0f90c1e60d3875db71a821f6ffa8cf42/pyarrow-14.0.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:87482af32e5a0c0cce2d12eb3c039dd1d853bd905b04f3f953f147c7a196915b", size = 26904455, upload-time = "2023-12-18T15:40:43.477Z" },
    { url = "https://files.pythonhosted.org/packages/6c/6c/882a57798877e3a49ba54d8e0540bea24aed78fb42e1d860f08c3449c75e/pyarrow-14.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:059bd8f12a70519e46cd64e1ba40e97eae55e0cbe1695edd95384653d7626b23", size = 23997116, upload-time = "2023-12-18T15:40:48.533Z" },
    { url = "https://files.pythonhosted.org/packages/ec/3f/ef47fe6192ce4d82803a073db449b5292135406c364a7fc49dfbcd34c987/pyarrow-14.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f16111f9ab27e60b391c5f6d197510e3ad6654e73857b4e394861fc79c37200", size = 35944575, upload-time = "2023-12-18T15:40:55.128Z" },
    { url = "https://files.pythonhosted.org/packages/1a/90/2021e529d7f234a3909f419d4341d53382541ef77d957fa274a99c533b18/pyarrow-14.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06ff1264fe4448e8d02073f5ce45a9f934c0f3db0a04460d0b01ff28befc3696", size = 38079719, upload-time = "2023-12-18T15:41:02.565Z" },
    { url = "https://files.pythonhosted.org/packages/30/a9/474caf5fd54a6d5315aaf9284c6e8f5d071ca825325ad64c53137b646e1f/pyarrow-14.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd4f4b472ccf4042f1eab77e6c8bce574543f54d2135c7e396f413046397d5a", size = 35429706, upload-time = "2023-12-18T15:41:09.955Z" },
    { url = "https://files.pythonhosted.org/packages/d9/f8/cfba56f5353e51c19b0c240380ce39483f4c76e5c4aee5a000f3d75b72da/pyarrow-14.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:32356bfb58b36059773f49e4e214996888eeea3a08893e7dbde44753799b2a02", size = 38001476, upload-time = "2023-12-18T15:41:16.372Z" },
    { url = "https://files.pythonhosted.org/packages/43/3f/7bdf7dc3b3b0cfdcc60760e7880954ba99ccd0bc1e0df806f3dd61bc01cd/pyarrow-14.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:52809ee69d4dbf2241c0e4366d949ba035cbcf48409bf404f071f624ed313a2b", size = 24576230, upload-time = "2023-12-18T15:41:22.561Z" },
    { url = "https://files.pythonhosted.org/packages/69/5b/d8ab6c20c43b598228710e4e4a6cba03a01f6faa3d08afff9ce76fd0fd47/pyarrow-14.0.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:c87824a5ac52be210d32906c715f4ed7053d0180c1060ae3ff9b7e560f53f944", size = 26819585, upload-time = "2023-12-18T15:41:27.59Z" },
    { url = "https://files.pythonhosted.org/packages/2d/29/bed2643d0dd5e9570405244a61f6db66c7f4704a6e9ce313f84fa5a3675a/pyarrow-14.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a25eb2421a58e861f6ca91f43339d215476f4fe159eca603c55950c14f378cc5", size = 23965222, upload-time = "2023-12-18T15:41:32.449Z" },
    { url = "https://files.pythonhosted.org/packages/2a/34/da464632e59a8cdd083370d69e6c14eae30221acb284f671c6bc9273fadd/pyarrow-14.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c1da70d668af5620b8ba0a23f229030a4cd6c5f24a616a146f30d2386fec422", size = 35942036, upload-time = "2023-12-18T15:41:38.767Z" },
    { url = "https://files.pythonhosted.org/packages/a8/ff/cbed4836d543b29f00d2355af67575c934999ff1d43e3f438ab0b1b394f1/pyarrow-14.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cc61593c8e66194c7cdfae594503e91b926a228fba40b5cf25cc593563bcd07", size = 38089266, upload-time = "2023-12-18T15:41:47.617Z" },
    { url = "https://files.pythonhosted.org/packages/38/41/345011cb831d3dbb2dab762fc244c745a5df94b199223a99af52a5f7dff6/pyarrow-14.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:78ea56f62fb7c0ae8ecb9afdd7893e3a7dbeb0b04106f5c08dbb23f9c0157591", size = 35404468, upload-time = "2023-12-18T15:41:54.49Z" },
    { url = "https://files.pythonhosted.org/packages/fd/af/2fc23ca2068ff02068d8dabf0fb85b6185df40ec825973470e613dbd8790/pyarrow-14.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:37c233ddbce0c67a76c0985612fef27c0c92aef9413cf5aa56952f359fcb7379", size = 38003134, upload-time = "2023-12-18T15:42:01.593Z" },
    { url = "https://files.pythonhosted.org/packages/95/1f/9d912f66a87e3864f694e000977a6a70a644ea560289eac1d733983f215d/pyarrow-14.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:e4b123ad0f6add92de898214d404e488167b87b5dd86e9a434126bc2b7a5578d", size = 25043754, upload-time = "2023-12-18T15:42:07.108Z" },
 ]

 [[package]]
 name = "pyasn1"
 version = "0.6.1"
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -333,6 +333,25 @@ OPENDAL_SCHEME=fs
 # Configurations for OpenDAL Local File System.
 OPENDAL_FS_ROOT=storage

 # ClickZetta Volume Configuration (for storage backend)
 # To use ClickZetta Volume as storage backend, set STORAGE_TYPE=clickzetta-volume
 # Note: ClickZetta Volume will reuse the existing CLICKZETTA_* connection parameters

 # Volume type selection (three types available):
 # - user: Personal/small team use, simple config, user-level permissions
 # - table: Enterprise multi-tenant, smart routing, table-level + user-level permissions
 # - external: Data lake integration, external storage connection, volume-level + storage-level permissions
 CLICKZETTA_VOLUME_TYPE=user

 # External Volume name (required only when TYPE=external)
 CLICKZETTA_VOLUME_NAME=

 # Table Volume table prefix (used only when TYPE=table)
 CLICKZETTA_VOLUME_TABLE_PREFIX=dataset_

 # Dify file directory prefix (isolates from other apps, recommended to keep default)
 CLICKZETTA_VOLUME_DIFY_PREFIX=dify_km

 # S3 Configuration
 #
 S3_ENDPOINT=
@@ -416,7 +435,7 @@ SUPABASE_URL=your-server-url
 # ------------------------------

 # The type of vector store to use.
 # Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
 # Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`.
 VECTOR_STORE=weaviate
 # Prefix used to create collection name in vector database
 VECTOR_INDEX_NAME_PREFIX=Vector_index
@@ -655,6 +674,20 @@ TABLESTORE_ACCESS_KEY_ID=xxx
 TABLESTORE_ACCESS_KEY_SECRET=xxx
 TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false

 # Clickzetta configuration, only available when VECTOR_STORE is `clickzetta`
 CLICKZETTA_USERNAME=
 CLICKZETTA_PASSWORD=
 CLICKZETTA_INSTANCE=
 CLICKZETTA_SERVICE=api.clickzetta.com
 CLICKZETTA_WORKSPACE=quick_start
 CLICKZETTA_VCLUSTER=default_ap
 CLICKZETTA_SCHEMA=dify
 CLICKZETTA_BATCH_SIZE=100
 CLICKZETTA_ENABLE_INVERTED_INDEX=true
 CLICKZETTA_ANALYZER_TYPE=chinese
 CLICKZETTA_ANALYZER_MODE=smart
 CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance

 # ------------------------------
 # Knowledge Configuration
 # ------------------------------
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -93,6 +93,10 @@ x-shared-env: &shared-api-worker-env
  STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
  OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
  OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
  CLICKZETTA_VOLUME_TYPE: ${CLICKZETTA_VOLUME_TYPE:-user}
  CLICKZETTA_VOLUME_NAME: ${CLICKZETTA_VOLUME_NAME:-}
  CLICKZETTA_VOLUME_TABLE_PREFIX: ${CLICKZETTA_VOLUME_TABLE_PREFIX:-dataset_}
  CLICKZETTA_VOLUME_DIFY_PREFIX: ${CLICKZETTA_VOLUME_DIFY_PREFIX:-dify_km}
  S3_ENDPOINT: ${S3_ENDPOINT:-}
  S3_REGION: ${S3_REGION:-us-east-1}
  S3_BUCKET_NAME: ${S3_BUCKET_NAME:-difyai}
@@ -313,6 +317,18 @@ x-shared-env: &shared-api-worker-env
  TABLESTORE_ACCESS_KEY_ID: ${TABLESTORE_ACCESS_KEY_ID:-xxx}
  TABLESTORE_ACCESS_KEY_SECRET: ${TABLESTORE_ACCESS_KEY_SECRET:-xxx}
  TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE: ${TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE:-false}
  CLICKZETTA_USERNAME: ${CLICKZETTA_USERNAME:-}
  CLICKZETTA_PASSWORD: ${CLICKZETTA_PASSWORD:-}
  CLICKZETTA_INSTANCE: ${CLICKZETTA_INSTANCE:-}
  CLICKZETTA_SERVICE: ${CLICKZETTA_SERVICE:-api.clickzetta.com}
  CLICKZETTA_WORKSPACE: ${CLICKZETTA_WORKSPACE:-quick_start}
  CLICKZETTA_VCLUSTER: ${CLICKZETTA_VCLUSTER:-default_ap}
  CLICKZETTA_SCHEMA: ${CLICKZETTA_SCHEMA:-dify}
  CLICKZETTA_BATCH_SIZE: ${CLICKZETTA_BATCH_SIZE:-100}
  CLICKZETTA_ENABLE_INVERTED_INDEX: ${CLICKZETTA_ENABLE_INVERTED_INDEX:-true}
  CLICKZETTA_ANALYZER_TYPE: ${CLICKZETTA_ANALYZER_TYPE:-chinese}
  CLICKZETTA_ANALYZER_MODE: ${CLICKZETTA_ANALYZER_MODE:-smart}
  CLICKZETTA_VECTOR_DISTANCE_FUNCTION: ${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance}
  UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15}
  UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
  ETL_TYPE: ${ETL_TYPE:-dify}
--- a/web/app/(commonLayout)/datasets/NewDatasetCard.tsx
+++ b/web/app/(commonLayout)/datasets/NewDatasetCard.tsx
@@ -1,41 +0,0 @@
 'use client'
 import { useTranslation } from 'react-i18next'
 import {
  RiAddLine,
  RiArrowRightLine,
 } from '@remixicon/react'
 import Link from 'next/link'

 type CreateAppCardProps = {
  ref?: React.Ref<HTMLAnchorElement>
 }

 const CreateAppCard = ({ ref }: CreateAppCardProps) => {
  const { t } = useTranslation()

  return (
    <div className='bg-background-default-dimm flex min-h-[160px] flex-col rounded-xl border-[0.5px]
      border-components-panel-border transition-all duration-200 ease-in-out'
    >
      <Link ref={ref} className='group flex grow cursor-pointer items-start p-4' href='/datasets/create'>
        <div className='flex items-center gap-3'>
          <div className='flex h-10 w-10 items-center justify-center rounded-lg border border-dashed border-divider-regular bg-background-default-lighter
            p-2 group-hover:border-solid group-hover:border-effects-highlight group-hover:bg-background-default-dodge'
          >
            <RiAddLine className='h-4 w-4 text-text-tertiary group-hover:text-text-accent' />
          </div>
          <div className='system-md-semibold text-text-secondary group-hover:text-text-accent'>{t('dataset.createDataset')}</div>
        </div>
      </Link>
      <div className='system-xs-regular p-4 pt-0 text-text-tertiary'>{t('dataset.createDatasetIntro')}</div>
      <Link className='group flex cursor-pointer items-center gap-1 rounded-b-xl border-t-[0.5px] border-divider-subtle p-4' href='/datasets/connect'>
        <div className='system-xs-medium text-text-tertiary group-hover:text-text-accent'>{t('dataset.connectDataset')}</div>
        <RiArrowRightLine className='h-3.5 w-3.5 text-text-tertiary group-hover:text-text-accent' />
      </Link>
    </div>
  )
 }

 CreateAppCard.displayName = 'CreateAppCard'

 export default CreateAppCard
--- a/web/app/components/app/create-from-dsl-modal/uploader.tsx
+++ b/web/app/components/app/create-from-dsl-modal/uploader.tsx
@@ -106,8 +106,8 @@ const Uploader: FC<Props> = ({
            <div className='flex w-full items-center justify-center space-x-2'>
              <RiUploadCloud2Line className='h-6 w-6 text-text-tertiary' />
              <div className='text-text-tertiary'>
                {t('datasetCreation.stepOne.uploader.button')}
                <span className='cursor-pointer pl-1 text-text-accent' onClick={selectHandle}>{t('datasetDocuments.list.batchModal.browse')}</span>
                {t('app.dslUploader.button')}
                <span className='cursor-pointer pl-1 text-text-accent' onClick={selectHandle}>{t('app.dslUploader.browse')}</span>
              </div>
            </div>
            {dragging && <div ref={dragRef} className='absolute left-0 top-0 h-full w-full' />}
--- a/web/app/components/apps/app-card.tsx
+++ b/web/app/components/apps/app-card.tsx
@@ -370,20 +370,14 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
            {app.description}
          </div>
        </div>
        <div className={cn(
          'absolute bottom-1 left-0 right-0 h-[42px] shrink-0 items-center pb-[6px] pl-[14px] pr-[6px] pt-1',
          tags.length ? 'flex' : '!hidden group-hover:!flex',
        )}>
        <div className='absolute bottom-1 left-0 right-0 flex h-[42px] shrink-0 items-center pb-[6px] pl-[14px] pr-[6px] pt-1'>
          {isCurrentWorkspaceEditor && (
            <>
              <div className={cn('flex w-0 grow items-center gap-1')} onClick={(e) => {
                e.stopPropagation()
                e.preventDefault()
              }}>
                <div className={cn(
                  'mr-[41px] w-full grow group-hover:!mr-0 group-hover:!block',
                  tags.length ? '!block' : '!hidden',
                )}>
                <div className='mr-[41px] w-full grow group-hover:!mr-0'>
                  <TagSelector
                    position='bl'
                    type='app'
@@ -395,7 +389,7 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => {
                  />
                </div>
              </div>
              <div className='mx-1 !hidden h-[14px] w-[1px] shrink-0 group-hover:!flex' />
              <div className='mx-1 !hidden h-[14px] w-[1px] shrink-0 bg-divider-regular group-hover:!flex' />
              <div className='!hidden shrink-0 group-hover:!flex'>
                <CustomPopover
                  htmlContent={<Operations />}
--- a/web/app/components/base/chat/chat/index.tsx
+++ b/web/app/components/base/chat/chat/index.tsx
@@ -284,9 +284,9 @@ const Chat: FC<ChatProps> = ({
            {
              !noStopResponding && isResponding && (
                <div className='mb-2 flex justify-center'>
                  <Button onClick={onStopResponding}>
                    <StopCircle className='mr-[5px] h-3.5 w-3.5 text-gray-500' />
                    <span className='text-xs font-normal text-gray-500'>{t('appDebug.operation.stopResponding')}</span>
                  <Button className='border-components-panel-border bg-components-panel-bg text-components-button-secondary-text' onClick={onStopResponding}>
                    <StopCircle className='mr-[5px] h-3.5 w-3.5' />
                    <span className='text-xs font-normal'>{t('appDebug.operation.stopResponding')}</span>
                  </Button>
                </div>
              )
--- a/web/app/components/datasets/create/file-uploader/index.tsx
+++ b/web/app/components/datasets/create/file-uploader/index.tsx
@@ -313,7 +313,7 @@ const FileUploader = ({
            <RiUploadCloud2Line className='mr-2 size-5' />

            <span>
              {t('datasetCreation.stepOne.uploader.button')}
              {notSupportBatchUpload ? t('datasetCreation.stepOne.uploader.buttonSingleFile') : t('datasetCreation.stepOne.uploader.button')}
              {supportTypes.length > 0 && (
                <label className="ml-1 cursor-pointer text-text-accent" onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
              )}
--- a/web/app/components/develop/doc.tsx
+++ b/web/app/components/develop/doc.tsx
@@ -1,8 +1,8 @@
 'use client'
 import { useEffect, useState } from 'react'
 import { useEffect, useMemo, useState } from 'react'
 import { useContext } from 'use-context-selector'
 import { useTranslation } from 'react-i18next'
 import { RiListUnordered } from '@remixicon/react'
 import { RiCloseLine, RiListUnordered } from '@remixicon/react'
 import TemplateEn from './template/template.en.mdx'
 import TemplateZh from './template/template.zh.mdx'
 import TemplateJa from './template/template.ja.mdx'
@@ -30,6 +30,7 @@ const Doc = ({ appDetail }: IDocProps) => {
  const { t } = useTranslation()
  const [toc, setToc] = useState<Array<{ href: string; text: string }>>([])
  const [isTocExpanded, setIsTocExpanded] = useState(false)
  const [activeSection, setActiveSection] = useState<string>('')
  const { theme } = useTheme()

  const variables = appDetail?.model_config?.configs?.prompt_variables || []
@@ -59,13 +60,43 @@ const Doc = ({ appDetail }: IDocProps) => {
          return null
        }).filter((item): item is { href: string; text: string } => item !== null)
        setToc(tocItems)
        if (tocItems.length > 0)
          setActiveSection(tocItems[0].href.replace('#', ''))
      }
    }

    // Run after component has rendered
    setTimeout(extractTOC, 0)
  }, [appDetail, locale])

  useEffect(() => {
    const handleScroll = () => {
      const scrollContainer = document.querySelector('.overflow-auto')
      if (!scrollContainer || toc.length === 0)
        return

      let currentSection = ''
      toc.forEach((item) => {
        const targetId = item.href.replace('#', '')
        const element = document.getElementById(targetId)
        if (element) {
          const rect = element.getBoundingClientRect()
          if (rect.top <= window.innerHeight / 2)
            currentSection = targetId
        }
      })

      if (currentSection && currentSection !== activeSection)
        setActiveSection(currentSection)
    }

    const scrollContainer = document.querySelector('.overflow-auto')
    if (scrollContainer) {
      scrollContainer.addEventListener('scroll', handleScroll)
      handleScroll()
      return () => scrollContainer.removeEventListener('scroll', handleScroll)
    }
  }, [toc, activeSection])

  const handleTocClick = (e: React.MouseEvent<HTMLAnchorElement>, item: { href: string; text: string }) => {
    e.preventDefault()
    const targetId = item.href.replace('#', '')
@@ -82,94 +113,128 @@ const Doc = ({ appDetail }: IDocProps) => {
      }
    }
  }

  const Template = useMemo(() => {
    if (appDetail?.mode === 'chat' || appDetail?.mode === 'agent-chat') {
      switch (locale) {
        case LanguagesSupported[1]:
          return <TemplateChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
        case LanguagesSupported[7]:
          return <TemplateChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
        default:
          return <TemplateChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
      }
    }
    if (appDetail?.mode === 'advanced-chat') {
      switch (locale) {
        case LanguagesSupported[1]:
          return <TemplateAdvancedChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
        case LanguagesSupported[7]:
          return <TemplateAdvancedChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
        default:
          return <TemplateAdvancedChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
      }
    }
    if (appDetail?.mode === 'workflow') {
      switch (locale) {
        case LanguagesSupported[1]:
          return <TemplateWorkflowZh appDetail={appDetail} variables={variables} inputs={inputs} />
        case LanguagesSupported[7]:
          return <TemplateWorkflowJa appDetail={appDetail} variables={variables} inputs={inputs} />
        default:
          return <TemplateWorkflowEn appDetail={appDetail} variables={variables} inputs={inputs} />
      }
    }
    if (appDetail?.mode === 'completion') {
      switch (locale) {
        case LanguagesSupported[1]:
          return <TemplateZh appDetail={appDetail} variables={variables} inputs={inputs} />
        case LanguagesSupported[7]:
          return <TemplateJa appDetail={appDetail} variables={variables} inputs={inputs} />
        default:
          return <TemplateEn appDetail={appDetail} variables={variables} inputs={inputs} />
      }
    }
    return null
  }, [appDetail, locale, variables, inputs])

  return (
    <div className="flex">
      <div className={`fixed right-8 top-32 z-10 transition-all ${isTocExpanded ? 'w-64' : 'w-10'}`}>
      <div className={`fixed right-20 top-32 z-10 transition-all duration-150 ease-out ${isTocExpanded ? 'w-[280px]' : 'w-11'}`}>
        {isTocExpanded
          ? (
            <nav className="toc max-h-[calc(100vh-150px)] w-full overflow-y-auto rounded-lg border border-components-panel-border bg-components-panel-bg p-4 shadow-md">
              <div className="mb-4 flex items-center justify-between">
                <h3 className="text-lg font-semibold text-text-primary">{t('appApi.develop.toc')}</h3>
            <nav className="toc flex max-h-[calc(100vh-150px)] w-full flex-col overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-background-default-hover shadow-xl">
              <div className="relative z-10 flex items-center justify-between border-b border-components-panel-border-subtle bg-background-default-hover px-4 py-2.5">
                <span className="text-xs font-medium uppercase tracking-wide text-text-tertiary">
                  {t('appApi.develop.toc')}
                </span>
                <button
                  onClick={() => setIsTocExpanded(false)}
                  className="text-text-tertiary hover:text-text-secondary"
                  className="group flex h-6 w-6 items-center justify-center rounded-md transition-colors hover:bg-state-base-hover"
                  aria-label="Close"
                >
                  ✕
                  <RiCloseLine className="h-3 w-3 text-text-quaternary transition-colors group-hover:text-text-secondary" />
                </button>
              </div>
              <ul className="space-y-2">
                {toc.map((item, index) => (
                  <li key={index}>
                    <a
                      href={item.href}
                      className="text-text-secondary transition-colors duration-200 hover:text-text-primary hover:underline"
                      onClick={e => handleTocClick(e, item)}
                    >
                      {item.text}
                    </a>
                  </li>
                ))}
              </ul>

              <div className="from-components-panel-border-subtle/20 pointer-events-none absolute left-0 right-0 top-[41px] z-10 h-2 bg-gradient-to-b to-transparent"></div>
              <div className="pointer-events-none absolute left-0 right-0 top-[43px] z-10 h-3 bg-gradient-to-b from-background-default-hover to-transparent"></div>

              <div className="relative flex-1 overflow-y-auto px-3 py-3 pt-1">
                {toc.length === 0 ? (
                  <div className="px-2 py-8 text-center text-xs text-text-quaternary">
                    {t('appApi.develop.noContent')}
                  </div>
                ) : (
                  <ul className="space-y-0.5">
                    {toc.map((item, index) => {
                      const isActive = activeSection === item.href.replace('#', '')
                      return (
                        <li key={index}>
                          <a
                            href={item.href}
                            onClick={e => handleTocClick(e, item)}
                            className={cn(
                              'group relative flex items-center rounded-md px-3 py-2 text-[13px] transition-all duration-200',
                              isActive
                                ? 'bg-state-base-hover font-medium text-text-primary'
                                : 'text-text-tertiary hover:bg-state-base-hover hover:text-text-secondary',
                            )}
                          >
                            <span
                              className={cn(
                                'mr-2 h-1.5 w-1.5 rounded-full transition-all duration-200',
                                isActive
                                  ? 'scale-100 bg-text-accent'
                                  : 'scale-75 bg-components-panel-border',
                              )}
                            />
                            <span className="flex-1 truncate">
                              {item.text}
                            </span>
                          </a>
                        </li>
                      )
                    })}
                  </ul>
                )}
              </div>

              <div className="pointer-events-none absolute bottom-0 left-0 right-0 z-10 h-4 rounded-b-xl bg-gradient-to-t from-background-default-hover to-transparent"></div>
            </nav>
          )
          : (
            <button
              onClick={() => setIsTocExpanded(true)}
              className="flex h-10 w-10 items-center justify-center rounded-full border border-components-panel-border bg-components-button-secondary-bg shadow-md transition-colors duration-200 hover:bg-components-button-secondary-bg-hover"
              className="group flex h-11 w-11 items-center justify-center rounded-full border-[0.5px] border-components-panel-border bg-components-panel-bg shadow-lg transition-all duration-150 hover:bg-background-default-hover hover:shadow-xl"
              aria-label="Open table of contents"
            >
              <RiListUnordered className="h-6 w-6 text-components-button-secondary-text" />
              <RiListUnordered className="h-5 w-5 text-text-tertiary transition-colors group-hover:text-text-secondary" />
            </button>
          )}
      </div>
      <article className={cn('prose-xl prose', theme === Theme.dark && 'prose-invert')} >
        {(appDetail?.mode === 'chat' || appDetail?.mode === 'agent-chat') && (
          (() => {
            switch (locale) {
              case LanguagesSupported[1]:
                return <TemplateChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
              case LanguagesSupported[7]:
                return <TemplateChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
              default:
                return <TemplateChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
            }
          })()
        )}
        {appDetail?.mode === 'advanced-chat' && (
          (() => {
            switch (locale) {
              case LanguagesSupported[1]:
                return <TemplateAdvancedChatZh appDetail={appDetail} variables={variables} inputs={inputs} />
              case LanguagesSupported[7]:
                return <TemplateAdvancedChatJa appDetail={appDetail} variables={variables} inputs={inputs} />
              default:
                return <TemplateAdvancedChatEn appDetail={appDetail} variables={variables} inputs={inputs} />
            }
          })()
        )}
        {appDetail?.mode === 'workflow' && (
          (() => {
            switch (locale) {
              case LanguagesSupported[1]:
                return <TemplateWorkflowZh appDetail={appDetail} variables={variables} inputs={inputs} />
              case LanguagesSupported[7]:
                return <TemplateWorkflowJa appDetail={appDetail} variables={variables} inputs={inputs} />
              default:
                return <TemplateWorkflowEn appDetail={appDetail} variables={variables} inputs={inputs} />
            }
          })()
        )}
        {appDetail?.mode === 'completion' && (
          (() => {
            switch (locale) {
              case LanguagesSupported[1]:
                return <TemplateZh appDetail={appDetail} variables={variables} inputs={inputs} />
              case LanguagesSupported[7]:
                return <TemplateJa appDetail={appDetail} variables={variables} inputs={inputs} />
              default:
                return <TemplateEn appDetail={appDetail} variables={variables} inputs={inputs} />
            }
          })()
        )}
      <article className={cn('prose-xl prose', theme === Theme.dark && 'prose-invert')}>
        {Template}
      </article>
    </div>
  )
--- a/web/app/components/develop/template/template.ja.mdx
+++ b/web/app/components/develop/template/template.ja.mdx
@@ -448,7 +448,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
  url='/text-to-audio'
  method='POST'
  title='テキストから音声'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template.zh.mdx
+++ b/web/app/components/develop/template/template.zh.mdx
@@ -423,7 +423,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
  url='/text-to-audio'
  method='POST'
  title='文字转语音'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template_advanced_chat.en.mdx
+++ b/web/app/components/develop/template/template_advanced_chat.en.mdx
@@ -1136,7 +1136,7 @@ Chat applications support session persistence, allowing previous chat history to
  url='/audio-to-text'
  method='POST'
  title='Speech to Text'
  name='#audio'
  name='#audio-to-text'
 />
 <Row>
  <Col>
@@ -1187,7 +1187,7 @@ Chat applications support session persistence, allowing previous chat history to
  url='/text-to-audio'
  method='POST'
  title='Text to Audio'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template_advanced_chat.ja.mdx
+++ b/web/app/components/develop/template/template_advanced_chat.ja.mdx
@@ -1136,7 +1136,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
  url='/audio-to-text'
  method='POST'
  title='音声からテキストへ'
  name='#audio'
  name='#audio-to-text'
 />
 <Row>
  <Col>
@@ -1187,7 +1187,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
  url='/text-to-audio'
  method='POST'
  title='テキストから音声へ'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template_advanced_chat.zh.mdx
+++ b/web/app/components/develop/template/template_advanced_chat.zh.mdx
@@ -1174,7 +1174,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
  url='/audio-to-text'
  method='POST'
  title='语音转文字'
  name='#audio'
  name='#audio-to-text'
 />
 <Row>
  <Col>
@@ -1222,7 +1222,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
  url='/text-to-audio'
  method='POST'
  title='文字转语音'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template_chat.en.mdx
+++ b/web/app/components/develop/template/template_chat.en.mdx
@@ -1170,7 +1170,7 @@ Chat applications support session persistence, allowing previous chat history to
  url='/audio-to-text'
  method='POST'
  title='Speech to Text'
  name='#audio'
  name='#audio-to-text'
 />
 <Row>
  <Col>
@@ -1221,7 +1221,7 @@ Chat applications support session persistence, allowing previous chat history to
  url='/text-to-audio'
  method='POST'
  title='Text to Audio'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template_chat.ja.mdx
+++ b/web/app/components/develop/template/template_chat.ja.mdx
@@ -1169,7 +1169,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
  url='/audio-to-text'
  method='POST'
  title='音声からテキストへ'
  name='#audio'
  name='#audio-to-text'
 />
 <Row>
  <Col>
@@ -1220,7 +1220,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
  url='/text-to-audio'
  method='POST'
  title='テキストから音声へ'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/develop/template/template_chat.zh.mdx
+++ b/web/app/components/develop/template/template_chat.zh.mdx
@@ -1185,7 +1185,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
  url='/audio-to-text'
  method='POST'
  title='语音转文字'
  name='#audio'
  name='#audio-to-text'
 />
 <Row>
  <Col>
@@ -1233,7 +1233,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty } from '../md.tsx'
  url='/text-to-audio'
  method='POST'
  title='文字转语音'
  name='#audio'
  name='#text-to-audio'
 />
 <Row>
  <Col>
--- a/web/app/components/workflow/panel/debug-and-preview/index.tsx
+++ b/web/app/components/workflow/panel/debug-and-preview/index.tsx
@@ -90,7 +90,7 @@ const DebugAndPreview = () => {
      <div
        ref={containerRef}
        className={cn(
          'relative flex h-full flex-col rounded-l-2xl border border-r-0 border-components-panel-border bg-chatbot-bg shadow-xl',
          'relative flex h-full flex-col rounded-l-2xl border border-r-0 border-components-panel-border bg-components-panel-bg shadow-xl',
        )}
        style={{ width: `${panelWidth}px` }}
      >
--- a/web/app/components/workflow/shortcuts-name.tsx
+++ b/web/app/components/workflow/shortcuts-name.tsx
@@ -19,7 +19,7 @@ const ShortcutsName = ({
        keys.map(key => (
          <div
            key={key}
            className='system-kbd flex h-4 w-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray capitalize'
            className='system-kbd flex h-4 min-w-4 items-center justify-center rounded-[4px] bg-components-kbd-bg-gray capitalize'
          >
            {getKeyboardKeyNameBySystem(key)}
          </div>
--- a/web/app/styles/markdown.scss
+++ b/web/app/styles/markdown.scss
@@ -1,5 +1,3 @@
@use '../../themes/light';
@use '../../themes/dark';
@use '../../themes/markdown-light';
@use '../../themes/markdown-dark';

--- a/web/i18n/de-DE/app-debug.ts
+++ b/web/i18n/de-DE/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: '',
        },
      },
      contentEnableLabel: 'Moderater Inhalt aktiviert',
      contentEnableLabel: 'Inhaltsmoderation aktiviert',
    },
    fileUpload: {
      title: 'Datei-Upload',
--- a/web/i18n/de-DE/app.ts
+++ b/web/i18n/de-DE/app.ts
@@ -166,6 +166,10 @@ const translation = {
    description: 'Gibt an, ob das web app Symbol zum Ersetzen 🤖 in der freigegebenen Anwendung verwendet werden soll',
  },
  importFromDSLUrlPlaceholder: 'DSL-Link hier einfügen',
  dslUploader: {
    button: 'Datei per Drag & Drop ablegen oder',
    browse: 'Durchsuchen',
  },
  duplicate: 'Duplikat',
  importFromDSL: 'Import von DSL',
  importDSL: 'DSL-Datei importieren',
--- a/web/i18n/de-DE/dataset-creation.ts
+++ b/web/i18n/de-DE/dataset-creation.ts
@@ -21,6 +21,7 @@ const translation = {
    uploader: {
      title: 'Textdatei hochladen',
      button: 'Dateien und Ordner hierher ziehen oder klicken',
      buttonSingleFile: 'Datei hierher ziehen oder klicken',
      browse: 'Durchsuchen',
      tip: 'Unterstützt {{supportTypes}}. Maximal {{size}}MB pro Datei.',
      validation: {
--- a/web/i18n/de-DE/workflow.ts
+++ b/web/i18n/de-DE/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: 'Auf 50% vergrößern',
    zoomTo100: 'Auf 100% vergrößern',
    zoomToFit: 'An Bildschirm anpassen',
    selectionAlignment: 'Ausrichtung der Auswahl',
    alignLeft: 'Links',
    alignTop: 'Nach oben',
    distributeVertical: 'Vertikaler Raum',
    alignBottom: 'Unteres',
    distributeHorizontal: 'Horizontaler Raum',
    vertical: 'Senkrecht',
    alignMiddle: 'Mitte',
    alignCenter: 'Mitte',
    alignRight: 'Rechts',
    alignNodes: 'Knoten ausrichten',
    horizontal: 'Horizontal',
  },
  panel: {
    userInputField: 'Benutzereingabefeld',
--- a/web/i18n/en-US/app-debug.ts
+++ b/web/i18n/en-US/app-debug.ts
@@ -163,7 +163,7 @@ const translation = {
    moderation: {
      title: 'Content moderation',
      description: 'Secure model output by using moderation API or maintaining a sensitive word list.',
      contentEnableLabel: 'Enabled moderate content',
      contentEnableLabel: 'Content moderation enabled',
      allEnabled: 'INPUT & OUTPUT',
      inputEnabled: 'INPUT',
      outputEnabled: 'OUTPUT',
--- a/web/i18n/en-US/app.ts
+++ b/web/i18n/en-US/app.ts
@@ -23,6 +23,10 @@ const translation = {
  importFromDSLFile: 'From DSL file',
  importFromDSLUrl: 'From URL',
  importFromDSLUrlPlaceholder: 'Paste DSL link here',
  dslUploader: {
    button: 'Drag and drop file, or',
    browse: 'Browse',
  },
  deleteAppConfirmTitle: 'Delete this app?',
  deleteAppConfirmContent:
    'Deleting the app is irreversible. Users will no longer be able to access your app, and all prompt configurations and logs will be permanently deleted.',
--- a/web/i18n/en-US/dataset-creation.ts
+++ b/web/i18n/en-US/dataset-creation.ts
@@ -36,6 +36,7 @@ const translation = {
    uploader: {
      title: 'Upload file',
      button: 'Drag and drop file or folder, or',
      buttonSingleFile: 'Drag and drop file, or',
      browse: 'Browse',
      tip: 'Supports {{supportTypes}}. Max {{batchCount}} in a batch and {{size}} MB each.',
      validation: {
--- a/web/i18n/es-ES/app-debug.ts
+++ b/web/i18n/es-ES/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: '',
        },
      },
      contentEnableLabel: 'Contenido moderado habilitado',
      contentEnableLabel: 'Moderación de contenido habilitada',
    },
    fileUpload: {
      title: 'Subida de archivos',
--- a/web/i18n/es-ES/app.ts
+++ b/web/i18n/es-ES/app.ts
@@ -170,6 +170,10 @@ const translation = {
  },
  importFromDSLUrl: 'URL de origen',
  importFromDSLUrlPlaceholder: 'Pegar enlace DSL aquí',
  dslUploader: {
    button: 'Arrastrar y soltar archivo, o',
    browse: 'Examinar',
  },
  importFromDSL: 'Importar desde DSL',
  importFromDSLFile: 'Desde el archivo DSL',
  mermaid: {
--- a/web/i18n/es-ES/dataset-creation.ts
+++ b/web/i18n/es-ES/dataset-creation.ts
@@ -26,6 +26,7 @@ const translation = {
    uploader: {
      title: 'Cargar archivo',
      button: 'Arrastre y suelte archivos o carpetas, o',
      buttonSingleFile: 'Arrastre y suelte archivo, o',
      browse: 'Buscar',
      tip: 'Soporta {{supportTypes}}. Máximo {{size}}MB cada uno.',
      validation: {
--- a/web/i18n/es-ES/workflow.ts
+++ b/web/i18n/es-ES/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: 'Zoom al 50%',
    zoomTo100: 'Zoom al 100%',
    zoomToFit: 'Ajustar al tamaño',
    alignTop: 'Arriba',
    alignBottom: 'Fondo',
    alignNodes: 'Alinear nodos',
    alignCenter: 'Centro',
    selectionAlignment: 'Alineación de selección',
    horizontal: 'Horizontal',
    distributeHorizontal: 'Espaciar horizontalmente',
    vertical: 'Vertical',
    distributeVertical: 'Espaciar verticalmente',
    alignMiddle: 'medio',
    alignLeft: 'izquierdo',
    alignRight: 'derecho',
  },
  panel: {
    userInputField: 'Campo de entrada del usuario',
--- a/web/i18n/fa-IR/app-debug.ts
+++ b/web/i18n/fa-IR/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: '',
        },
      },
      contentEnableLabel: 'محتوای متوسط فعال شده است',
      contentEnableLabel: 'مدیریت محتوا فعال شده است',
    },
    generate: {
      title: 'تولید کننده دستورالعمل',
--- a/web/i18n/fa-IR/app.ts
+++ b/web/i18n/fa-IR/app.ts
@@ -19,6 +19,10 @@ const translation = {
  importFromDSLFile: 'از فایل DSL',
  importFromDSLUrl: 'از URL',
  importFromDSLUrlPlaceholder: 'لینک DSL را اینجا بچسبانید',
  dslUploader: {
    button: 'فایل را بکشید و رها کنید، یا',
    browse: 'مرور',
  },
  deleteAppConfirmTitle: 'آیا این برنامه حذف شود؟',
  deleteAppConfirmContent:
    'حذف برنامه غیرقابل برگشت است. کاربران دیگر قادر به دسترسی به برنامه شما نخواهند بود و تمام تنظیمات و گزارشات درخواست‌ها به صورت دائم حذف خواهند شد.',
--- a/web/i18n/fa-IR/dataset-creation.ts
+++ b/web/i18n/fa-IR/dataset-creation.ts
@@ -28,6 +28,7 @@ const translation = {
    uploader: {
      title: 'بارگذاری فایل',
      button: 'فایل ها یا پوشه ها را بکشید و رها کنید یا',
      buttonSingleFile: 'فایل را بکشید و رها کنید یا',
      browse: 'مرور',
      tip: 'پشتیبانی از {{supportTypes}}. حداکثر {{size}}MB هر کدام.',
      validation: {
--- a/web/i18n/fa-IR/workflow.ts
+++ b/web/i18n/fa-IR/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: 'بزرگ‌نمایی به 50%',
    zoomTo100: 'بزرگ‌نمایی به 100%',
    zoomToFit: 'تناسب با اندازه',
    horizontal: 'افقی',
    alignBottom: 'پایین',
    alignRight: 'راست',
    vertical: 'عمودی',
    alignCenter: 'مرکز',
    alignLeft: 'چپ',
    distributeVertical: 'فضا عمودی',
    distributeHorizontal: 'فضا به صورت افقی',
    alignTop: 'بالا',
    alignNodes: 'تراز کردن گره ها',
    selectionAlignment: 'تراز انتخاب',
    alignMiddle: 'میانه',
  },
  panel: {
    userInputField: 'فیلد ورودی کاربر',
--- a/web/i18n/fr-FR/app-debug.ts
+++ b/web/i18n/fr-FR/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: 'Sorry, but you didn\'t provide a text to translate. Could you please provide the text?',
        },
      },
      contentEnableLabel: 'Activation du contenu modéré',
      contentEnableLabel: 'Modération de contenu activée',
    },
    fileUpload: {
      title: 'Téléchargement de fichier',
@@ -242,7 +242,7 @@ const translation = {
      'Veuillez attendre que la réponse à la tâche en lot soit terminée.',
    notSelectModel: 'Veuillez choisir un modèle',
    waitForImgUpload: 'Veuillez attendre que l\'image soit téléchargée',
    waitForFileUpload: 'Veuillez patienter jusqu’à ce que le(s) fichier(s) soit/les fichiers à télécharger',
    waitForFileUpload: 'Veuillez patienter pendant le téléchargement du/des fichier(s)',
  },
  chatSubTitle: 'Instructions',
  completionSubTitle: 'Indicatif de Prompt',
--- a/web/i18n/fr-FR/app.ts
+++ b/web/i18n/fr-FR/app.ts
@@ -169,7 +169,11 @@ const translation = {
    descriptionInExplore: 'Utilisation de l’icône web app pour remplacer 🤖 dans Explore',
  },
  importFromDSLUrlPlaceholder: 'Collez le lien DSL ici',
  importFromDSL: 'Importation à partir d’une DSL',
  dslUploader: {
    button: 'Glisser-déposer un fichier, ou',
    browse: 'Parcourir',
  },
  importFromDSL: 'Importation à partir d\'une DSL',
  importFromDSLUrl: 'À partir de l’URL',
  importFromDSLFile: 'À partir d’un fichier DSL',
  mermaid: {
--- a/web/i18n/fr-FR/dataset-creation.ts
+++ b/web/i18n/fr-FR/dataset-creation.ts
@@ -23,6 +23,7 @@ const translation = {
    uploader: {
      title: 'Télécharger le fichier texte',
      button: 'Faites glisser et déposez des fichiers ou des dossiers, ou',
      buttonSingleFile: 'Faites glisser et déposez un fichier, ou',
      browse: 'Parcourir',
      tip: 'Prend en charge {{supportTypes}}. Max {{size}}MB chacun.',
      validation: {
--- a/web/i18n/fr-FR/workflow.ts
+++ b/web/i18n/fr-FR/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: 'Zoomer à 50%',
    zoomTo100: 'Zoomer à 100%',
    zoomToFit: 'Zoomer pour ajuster',
    alignBottom: 'Fond',
    alignLeft: 'Gauche',
    alignCenter: 'Centre',
    alignTop: 'Retour au début',
    alignNodes: 'Aligner les nœuds',
    distributeHorizontal: 'Espace horizontal',
    alignMiddle: 'Milieu',
    horizontal: 'Horizontal',
    selectionAlignment: 'Alignement de la sélection',
    alignRight: 'Droite',
    vertical: 'Vertical',
    distributeVertical: 'Espace vertical',
  },
  panel: {
    userInputField: 'Champ de saisie de l\'utilisateur',
--- a/web/i18n/hi-IN/app-debug.ts
+++ b/web/i18n/hi-IN/app-debug.ts
@@ -213,7 +213,7 @@ const translation = {
          after: 'में कॉन्फ़िगर किए गए ओपनएआई एपीआई कुंजी की आवश्यकता होती है।',
        },
      },
      contentEnableLabel: 'मध्य स्तर की सामग्री सक्षम की गई',
      contentEnableLabel: 'सामग्री मॉडरेशन सक्षम है',
    },
    fileUpload: {
      numberLimit: 'मैक्स अपलोड करता है',
--- a/web/i18n/hi-IN/app.ts
+++ b/web/i18n/hi-IN/app.ts
@@ -172,6 +172,10 @@ const translation = {
  importFromDSLUrl: 'यूआरएल से',
  importFromDSL: 'DSL से आयात करें',
  importFromDSLUrlPlaceholder: 'डीएसएल लिंक यहां पेस्ट करें',
  dslUploader: {
    button: 'फ़ाइल खींचकर छोड़ें, या',
    browse: 'ब्राउज़ करें',
  },
  mermaid: {
    handDrawn: 'हाथ खींचा',
    classic: 'क्लासिक',
--- a/web/i18n/hi-IN/dataset-creation.ts
+++ b/web/i18n/hi-IN/dataset-creation.ts
@@ -28,6 +28,7 @@ const translation = {
    uploader: {
      title: 'फ़ाइल अपलोड करें',
      button: 'फ़ाइलों या फ़ोल्डरों को खींचें और छोड़ें, या',
      buttonSingleFile: 'फ़ाइल को खींचें और छोड़ें, या',
      browse: 'ब्राउज़ करें',
      tip: 'समर्थित {{supportTypes}}। प्रत्येक अधिकतम {{size}}MB।',
      validation: {
--- a/web/i18n/hi-IN/workflow.ts
+++ b/web/i18n/hi-IN/workflow.ts
@@ -298,6 +298,18 @@ const translation = {
    zoomTo50: '50% पर ज़ूम करें',
    zoomTo100: '100% पर ज़ूम करें',
    zoomToFit: 'फिट करने के लिए ज़ूम करें',
    alignRight: 'सही',
    alignLeft: 'बाईं ओर',
    alignTop: 'शीर्ष',
    horizontal: 'क्षैतिज',
    alignNodes: 'नोड्स को संरेखित करें',
    selectionAlignment: 'चयन संरेखण',
    alignCenter: 'केंद्र',
    vertical: 'ऊर्ध्वाधर',
    distributeHorizontal: 'क्षैतिज स्पेस',
    alignBottom: 'तल',
    distributeVertical: 'अंतरिक्ष को वर्टिकल रूप से',
    alignMiddle: 'मध्य',
  },
  panel: {
    userInputField: 'उपयोगकर्ता इनपुट फ़ील्ड',
--- a/web/i18n/it-IT/app-debug.ts
+++ b/web/i18n/it-IT/app-debug.ts
@@ -215,7 +215,7 @@ const translation = {
          after: '',
        },
      },
      contentEnableLabel: 'Abilitato il contenuto moderato',
      contentEnableLabel: 'Moderazione dei contenuti abilitata',
    },
    fileUpload: {
      title: 'Caricamento File',
--- a/web/i18n/it-IT/app.ts
+++ b/web/i18n/it-IT/app.ts
@@ -178,6 +178,10 @@ const translation = {
  importFromDSLFile: 'Da file DSL',
  importFromDSL: 'Importazione da DSL',
  importFromDSLUrlPlaceholder: 'Incolla qui il link DSL',
  dslUploader: {
    button: 'Trascina e rilascia il file, o',
    browse: 'Sfoglia',
  },
  mermaid: {
    handDrawn: 'Disegnato a mano',
    classic: 'Classico',
--- a/web/i18n/it-IT/dataset-creation.ts
+++ b/web/i18n/it-IT/dataset-creation.ts
@@ -28,6 +28,7 @@ const translation = {
    uploader: {
      title: 'Carica file',
      button: 'Trascina e rilascia file o cartelle, oppure',
      buttonSingleFile: 'Trascina e rilascia un file, oppure',
      browse: 'Sfoglia',
      tip: 'Supporta {{supportTypes}}. Max {{size}}MB ciascuno.',
      validation: {
--- a/web/i18n/it-IT/workflow.ts
+++ b/web/i18n/it-IT/workflow.ts
@@ -301,6 +301,18 @@ const translation = {
    zoomTo50: 'Zoom al 50%',
    zoomTo100: 'Zoom al 100%',
    zoomToFit: 'Zoom per Adattare',
    alignRight: 'A destra',
    selectionAlignment: 'Allineamento della selezione',
    alignBottom: 'Fondoschiena',
    alignTop: 'In alto',
    vertical: 'Verticale',
    alignCenter: 'Centro',
    alignLeft: 'A sinistra',
    alignMiddle: 'Mezzo',
    horizontal: 'Orizzontale',
    alignNodes: 'Allinea nodi',
    distributeHorizontal: 'Spazia orizzontalmente',
    distributeVertical: 'Spazia verticalmente',
  },
  panel: {
    userInputField: 'Campo di Input Utente',
--- a/web/i18n/ja-JP/app-debug.ts
+++ b/web/i18n/ja-JP/app-debug.ts
@@ -163,7 +163,7 @@ const translation = {
    moderation: {
      title: 'コンテンツのモデレーション',
      description: 'モデレーション API を使用するか、機密語リストを維持することで、モデルの出力を安全にします。',
      contentEnableLabel: 'モデレート・コンテンツを有効にする',
      contentEnableLabel: 'コンテンツモデレーションが有効',
      allEnabled: '入力/出力コンテンツが有効になっています',
      inputEnabled: '入力コンテンツが有効になっています',
      outputEnabled: '出力コンテンツが有効になっています',
--- a/web/i18n/ja-JP/app.ts
+++ b/web/i18n/ja-JP/app.ts
@@ -23,6 +23,10 @@ const translation = {
  importFromDSLFile: 'DSL ファイルから',
  importFromDSLUrl: 'URL から',
  importFromDSLUrlPlaceholder: 'DSL リンクをここに貼り付けます',
  dslUploader: {
    button: 'ファイルをドラッグ＆ドロップするか、',
    browse: '参照',
  },
  deleteAppConfirmTitle: 'このアプリを削除しますか？',
  deleteAppConfirmContent:
    'アプリを削除すると、元に戻すことはできません。他のユーザーはもはやこのアプリにアクセスできず、すべてのプロンプトの設定とログが永久に削除されます。',
--- a/web/i18n/ja-JP/dataset-creation.ts
+++ b/web/i18n/ja-JP/dataset-creation.ts
@@ -31,6 +31,7 @@ const translation = {
    uploader: {
      title: 'テキストファイルをアップロード',
      button: 'ファイルまたはフォルダをドラッグアンドドロップする',
      buttonSingleFile: 'ファイルをドラッグアンドドロップする',
      browse: '参照',
      tip: '{{supportTypes}}をサポートしています。1 つあたりの最大サイズは{{size}}MB です。',
      validation: {
--- a/web/i18n/ja-JP/workflow.ts
+++ b/web/i18n/ja-JP/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: '50% サイズ',
    zoomTo100: '等倍表示',
    zoomToFit: '画面に合わせる',
    horizontal: '横',
    alignBottom: '底',
    alignNodes: 'ノードを整列させる',
    vertical: '垂直',
    alignLeft: '左',
    alignTop: 'トップ',
    alignRight: '右',
    alignMiddle: '中間',
    distributeVertical: '垂直にスペースを',
    alignCenter: 'センター',
    selectionAlignment: '選択の整列',
    distributeHorizontal: '空間を水平方向に',
  },
  variableReference: {
    noAvailableVars: '利用可能な変数がありません',
--- a/web/i18n/ko-KR/app-debug.ts
+++ b/web/i18n/ko-KR/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: '에 OpenAI API 키가 설정되어 있어야 합니다.',
        },
      },
      contentEnableLabel: '중간 콘텐츠 사용',
      contentEnableLabel: '콘텐츠 모더레이션이 활성화됨',
    },
    fileUpload: {
      title: '파일 업로드',
--- a/web/i18n/ko-KR/app.ts
+++ b/web/i18n/ko-KR/app.ts
@@ -189,6 +189,10 @@ const translation = {
  importFromDSLFile: 'DSL 파일에서',
  importFromDSLUrl: 'URL 에서',
  importFromDSLUrlPlaceholder: '여기에 DSL 링크 붙여 넣기',
  dslUploader: {
    button: '파일을 드래그 앤 드롭하거나',
    browse: '찾아보기',
  },
  mermaid: {
    handDrawn: '손으로 그린',
    classic: '고전',
--- a/web/i18n/ko-KR/dataset-creation.ts
+++ b/web/i18n/ko-KR/dataset-creation.ts
@@ -21,6 +21,7 @@ const translation = {
    uploader: {
      title: '텍스트 파일 업로드',
      button: '파일이나 폴더를 끌어서 놓기',
      buttonSingleFile: '파일을 끌어서 놓기',
      browse: '찾아보기',
      tip: '{{supportTypes}}을 (를) 지원합니다. 파일당 최대 크기는 {{size}}MB 입니다.',
      validation: {
--- a/web/i18n/ko-KR/workflow.ts
+++ b/web/i18n/ko-KR/workflow.ts
@@ -308,6 +308,18 @@ const translation = {
    zoomTo50: '50% 로 확대',
    zoomTo100: '100% 로 확대',
    zoomToFit: '화면에 맞게 확대',
    alignCenter: '중',
    alignRight: '오른쪽',
    alignLeft: '왼쪽',
    vertical: '세로',
    alignTop: '맨 위로',
    alignMiddle: '중간',
    alignNodes: '노드 정렬',
    distributeVertical: '수직 공간',
    horizontal: '가로',
    selectionAlignment: '선택 정렬',
    alignBottom: '밑바닥',
    distributeHorizontal: '수평 공간',
  },
  panel: {
    userInputField: '사용자 입력 필드',
--- a/web/i18n/pl-PL/app.ts
+++ b/web/i18n/pl-PL/app.ts
@@ -173,6 +173,10 @@ const translation = {
  importFromDSLUrl: 'Z adresu URL',
  importFromDSLFile: 'Z pliku DSL',
  importFromDSLUrlPlaceholder: 'Wklej tutaj link DSL',
  dslUploader: {
    button: 'Przeciągnij i upuść plik, lub',
    browse: 'Przeglądaj',
  },
  mermaid: {
    handDrawn: 'Ręcznie rysowane',
    classic: 'Klasyczny',
--- a/web/i18n/pl-PL/dataset-creation.ts
+++ b/web/i18n/pl-PL/dataset-creation.ts
@@ -23,6 +23,7 @@ const translation = {
    uploader: {
      title: 'Prześlij plik tekstowy',
      button: 'Przeciągnij i upuść pliki lub foldery lub',
      buttonSingleFile: 'Przeciągnij i upuść plik lub',
      browse: 'Przeglądaj',
      tip: 'Obsługuje {{supportTypes}}. Maksymalnie {{size}}MB każdy.',
      validation: {
--- a/web/i18n/pl-PL/workflow.ts
+++ b/web/i18n/pl-PL/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: 'Powiększ do 50%',
    zoomTo100: 'Powiększ do 100%',
    zoomToFit: 'Dopasuj do ekranu',
    alignMiddle: 'Środek',
    alignTop: 'Do góry',
    distributeHorizontal: 'Odstęp w poziomie',
    alignCenter: 'Centrum',
    alignRight: 'Prawy',
    alignNodes: 'Wyrównywanie węzłów',
    selectionAlignment: 'Wyrównanie zaznaczenia',
    horizontal: 'Poziomy',
    distributeVertical: 'Przestrzeń w pionie',
    alignBottom: 'Dno',
    alignLeft: 'Lewy',
    vertical: 'Pionowy',
  },
  panel: {
    userInputField: 'Pole wprowadzania użytkownika',
--- a/web/i18n/pt-BR/app-debug.ts
+++ b/web/i18n/pt-BR/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: '',
        },
      },
      contentEnableLabel: 'Conteúdo moderado habilitado',
      contentEnableLabel: 'Moderação de conteúdo habilitada',
    },
    fileUpload: {
      title: 'Upload de Arquivo',
--- a/web/i18n/pt-BR/app.ts
+++ b/web/i18n/pt-BR/app.ts
@@ -169,6 +169,10 @@ const translation = {
    title: 'Use o ícone do web app para substituir 🤖',
  },
  importFromDSLUrlPlaceholder: 'Cole o link DSL aqui',
  dslUploader: {
    button: 'Arraste e solte o arquivo, ou',
    browse: 'Navegar',
  },
  importFromDSLUrl: 'Do URL',
  importFromDSLFile: 'Do arquivo DSL',
  importFromDSL: 'Importar de DSL',
--- a/web/i18n/pt-BR/dataset-creation.ts
+++ b/web/i18n/pt-BR/dataset-creation.ts
@@ -23,6 +23,7 @@ const translation = {
    uploader: {
      title: 'Enviar arquivo de texto',
      button: 'Arraste e solte arquivos ou pastas, ou',
      buttonSingleFile: 'Arraste e solte um arquivo, ou',
      browse: 'Navegar',
      tip: 'Suporta {{supportTypes}}. Máximo de {{size}}MB cada.',
      validation: {
--- a/web/i18n/pt-BR/workflow.ts
+++ b/web/i18n/pt-BR/workflow.ts
@@ -287,6 +287,18 @@ const translation = {
    zoomTo50: 'Aproximar para 50%',
    zoomTo100: 'Aproximar para 100%',
    zoomToFit: 'Aproximar para ajustar',
    vertical: 'Vertical',
    alignNodes: 'Alinhar nós',
    selectionAlignment: 'Alinhamento de seleção',
    alignLeft: 'Esquerda',
    alignBottom: 'Fundo',
    distributeHorizontal: 'Espaço horizontalmente',
    alignMiddle: 'Meio',
    alignRight: 'Certo',
    horizontal: 'Horizontal',
    distributeVertical: 'Espaço Verticalmente',
    alignCenter: 'Centro',
    alignTop: 'Início',
  },
  panel: {
    userInputField: 'Campo de entrada do usuário',
--- a/web/i18n/ro-RO/app-debug.ts
+++ b/web/i18n/ro-RO/app-debug.ts
@@ -197,7 +197,7 @@ const translation = {
          after: '',
        },
      },
      contentEnableLabel: 'Conținut moderat activat',
      contentEnableLabel: 'Moderarea conținutului activată',
    },
    fileUpload: {
      title: 'Încărcare fișier',
--- a/web/i18n/ro-RO/app.ts
+++ b/web/i18n/ro-RO/app.ts
@@ -171,6 +171,10 @@ const translation = {
  importFromDSL: 'Import din DSL',
  importFromDSLUrl: 'De la URL',
  importFromDSLUrlPlaceholder: 'Lipiți linkul DSL aici',
  dslUploader: {
    button: 'Trageți și plasați fișierul, sau',
    browse: 'Răsfoiți',
  },
  importFromDSLFile: 'Din fișierul DSL',
  mermaid: {
    handDrawn: 'Desenat de mână',
--- a/web/i18n/ro-RO/dataset-creation.ts
+++ b/web/i18n/ro-RO/dataset-creation.ts
			@@ -0,0 +1 @@
			# Clickzetta Vector Database Integration for Dify