| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357 | 
							- #
 - #  Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
 - #
 - #  Licensed under the Apache License, Version 2.0 (the "License");
 - #  you may not use this file except in compliance with the License.
 - #  You may obtain a copy of the License at
 - #
 - #      http://www.apache.org/licenses/LICENSE-2.0
 - #
 - #  Unless required by applicable law or agreed to in writing, software
 - #  distributed under the License is distributed on an "AS IS" BASIS,
 - #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 - #  See the License for the specific language governing permissions and
 - #  limitations under the License.
 - #
 - import functools
 - import json
 - import random
 - import time
 - from base64 import b64encode
 - from functools import wraps
 - from hmac import HMAC
 - from io import BytesIO
 - from urllib.parse import quote, urlencode
 - from uuid import uuid1
 - 
 - import requests
 - from flask import (
 -     Response, jsonify, send_file, make_response,
 -     request as flask_request,
 - )
 - from itsdangerous import URLSafeTimedSerializer
 - from werkzeug.http import HTTP_STATUS_CODES
 - 
 - from api.db.db_models import APIToken
 - from api.settings import (
 -     REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
 -     CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
 - )
 - from api.settings import RetCode
 - from api.utils import CustomJSONEncoder, get_uuid
 - from api.utils import json_dumps
 - from api.utils.log_utils import logger
 - 
 - requests.models.complexjson.dumps = functools.partial(
 -     json.dumps, cls=CustomJSONEncoder)
 - 
 - 
 - def request(**kwargs):
 -     sess = requests.Session()
 -     stream = kwargs.pop('stream', sess.stream)
 -     timeout = kwargs.pop('timeout', None)
 -     kwargs['headers'] = {
 -         k.replace(
 -             '_',
 -             '-').upper(): v for k,
 -                                 v in kwargs.get(
 -             'headers',
 -             {}).items()}
 -     prepped = requests.Request(**kwargs).prepare()
 - 
 -     if CLIENT_AUTHENTICATION and HTTP_APP_KEY and SECRET_KEY:
 -         timestamp = str(round(time() * 1000))
 -         nonce = str(uuid1())
 -         signature = b64encode(HMAC(SECRET_KEY.encode('ascii'), b'\n'.join([
 -             timestamp.encode('ascii'),
 -             nonce.encode('ascii'),
 -             HTTP_APP_KEY.encode('ascii'),
 -             prepped.path_url.encode('ascii'),
 -             prepped.body if kwargs.get('json') else b'',
 -             urlencode(
 -                 sorted(
 -                     kwargs['data'].items()),
 -                 quote_via=quote,
 -                 safe='-._~').encode('ascii')
 -             if kwargs.get('data') and isinstance(kwargs['data'], dict) else b'',
 -         ]), 'sha1').digest()).decode('ascii')
 - 
 -         prepped.headers.update({
 -             'TIMESTAMP': timestamp,
 -             'NONCE': nonce,
 -             'APP-KEY': HTTP_APP_KEY,
 -             'SIGNATURE': signature,
 -         })
 - 
 -     return sess.send(prepped, stream=stream, timeout=timeout)
 - 
 - 
 - def get_exponential_backoff_interval(retries, full_jitter=False):
 -     """Calculate the exponential backoff wait time."""
 -     # Will be zero if factor equals 0
 -     countdown = min(REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC * (2 ** retries))
 -     # Full jitter according to
 -     # https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
 -     if full_jitter:
 -         countdown = random.randrange(countdown + 1)
 -     # Adjust according to maximum wait time and account for negative values.
 -     return max(0, countdown)
 - 
 - 
 - def get_data_error_result(code=RetCode.DATA_ERROR,
 -                           message='Sorry! Data missing!'):
 -     import re
 -     result_dict = {
 -         "code": code,
 -         "message": re.sub(
 -             r"rag",
 -             "seceum",
 -             message,
 -             flags=re.IGNORECASE)}
 -     response = {}
 -     for key, value in result_dict.items():
 -         if value is None and key != "code":
 -             continue
 -         else:
 -             response[key] = value
 -     return jsonify(response)
 - 
 - 
 - def server_error_response(e):
 -     logger.exception(e)
 -     try:
 -         if e.code == 401:
 -             return get_json_result(code=401, message=repr(e))
 -     except BaseException:
 -         pass
 -     if len(e.args) > 1:
 -         return get_json_result(
 -             code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
 -     return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
 - 
 - 
 - def error_response(response_code, message=None):
 -     if message is None:
 -         message = HTTP_STATUS_CODES.get(response_code, 'Unknown Error')
 - 
 -     return Response(json.dumps({
 -         'message': message,
 -         'code': response_code,
 -     }), status=response_code, mimetype='application/json')
 - 
 - 
 - def validate_request(*args, **kwargs):
 -     def wrapper(func):
 -         @wraps(func)
 -         def decorated_function(*_args, **_kwargs):
 -             input_arguments = flask_request.json or flask_request.form.to_dict()
 -             no_arguments = []
 -             error_arguments = []
 -             for arg in args:
 -                 if arg not in input_arguments:
 -                     no_arguments.append(arg)
 -             for k, v in kwargs.items():
 -                 config_value = input_arguments.get(k, None)
 -                 if config_value is None:
 -                     no_arguments.append(k)
 -                 elif isinstance(v, (tuple, list)):
 -                     if config_value not in v:
 -                         error_arguments.append((k, set(v)))
 -                 elif config_value != v:
 -                     error_arguments.append((k, v))
 -             if no_arguments or error_arguments:
 -                 error_string = ""
 -                 if no_arguments:
 -                     error_string += "required argument are missing: {}; ".format(
 -                         ",".join(no_arguments))
 -                 if error_arguments:
 -                     error_string += "required argument values: {}".format(
 -                         ",".join(["{}={}".format(a[0], a[1]) for a in error_arguments]))
 -                 return get_json_result(
 -                     code=RetCode.ARGUMENT_ERROR, message=error_string)
 -             return func(*_args, **_kwargs)
 - 
 -         return decorated_function
 - 
 -     return wrapper
 - 
 - 
 - def is_localhost(ip):
 -     return ip in {'127.0.0.1', '::1', '[::1]', 'localhost'}
 - 
 - 
 - def send_file_in_mem(data, filename):
 -     if not isinstance(data, (str, bytes)):
 -         data = json_dumps(data)
 -     if isinstance(data, str):
 -         data = data.encode('utf-8')
 - 
 -     f = BytesIO()
 -     f.write(data)
 -     f.seek(0)
 - 
 -     return send_file(f, as_attachment=True, attachment_filename=filename)
 - 
 - 
 - def get_json_result(code=RetCode.SUCCESS, message='success', data=None):
 -     response = {"code": code, "message": message, "data": data}
 -     return jsonify(response)
 - 
 - def apikey_required(func):
 -     @wraps(func)
 -     def decorated_function(*args, **kwargs):
 -         token = flask_request.headers.get('Authorization').split()[1]
 -         objs = APIToken.query(token=token)
 -         if not objs:
 -             return build_error_result(
 -                 message='API-KEY is invalid!', code=RetCode.FORBIDDEN
 -             )
 -         kwargs['tenant_id'] = objs[0].tenant_id
 -         return func(*args, **kwargs)
 - 
 -     return decorated_function
 - 
 - 
 - def build_error_result(code=RetCode.FORBIDDEN, message='success'):
 -     response = {"code": code, "message": message}
 -     response = jsonify(response)
 -     response.status_code = code
 -     return response
 - 
 - 
 - def construct_response(code=RetCode.SUCCESS,
 -                        message='success', data=None, auth=None):
 -     result_dict = {"code": code, "message": message, "data": data}
 -     response_dict = {}
 -     for key, value in result_dict.items():
 -         if value is None and key != "code":
 -             continue
 -         else:
 -             response_dict[key] = value
 -     response = make_response(jsonify(response_dict))
 -     if auth:
 -         response.headers["Authorization"] = auth
 -     response.headers["Access-Control-Allow-Origin"] = "*"
 -     response.headers["Access-Control-Allow-Method"] = "*"
 -     response.headers["Access-Control-Allow-Headers"] = "*"
 -     response.headers["Access-Control-Allow-Headers"] = "*"
 -     response.headers["Access-Control-Expose-Headers"] = "Authorization"
 -     return response
 - 
 - 
 - def construct_result(code=RetCode.DATA_ERROR, message='data is missing'):
 -     import re
 -     result_dict = {"code": code, "message": re.sub(r"rag", "seceum", message, flags=re.IGNORECASE)}
 -     response = {}
 -     for key, value in result_dict.items():
 -         if value is None and key != "code":
 -             continue
 -         else:
 -             response[key] = value
 -     return jsonify(response)
 - 
 - 
 - def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
 -     if data is None:
 -         return jsonify({"code": code, "message": message})
 -     else:
 -         return jsonify({"code": code, "message": message, "data": data})
 - 
 - 
 - def construct_error_response(e):
 -     logger.exception(e)
 -     try:
 -         if e.code == 401:
 -             return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
 -     except BaseException:
 -         pass
 -     if len(e.args) > 1:
 -         return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
 -     return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
 - 
 - 
 - def token_required(func):
 -     @wraps(func)
 -     def decorated_function(*args, **kwargs):
 -         authorization_list=flask_request.headers.get('Authorization').split()
 -         if len(authorization_list) < 2:
 -             return get_json_result(data=False,message="Please check your authorization format.")
 -         token = authorization_list[1]
 -         objs = APIToken.query(token=token)
 -         if not objs:
 -             return get_json_result(
 -                 data=False, message='Token is not valid!', code=RetCode.AUTHENTICATION_ERROR
 -             )
 -         kwargs['tenant_id'] = objs[0].tenant_id
 -         return func(*args, **kwargs)
 - 
 -     return decorated_function
 - 
 - 
 - def get_result(code=RetCode.SUCCESS, message="", data=None):
 -     if code == 0:
 -         if data is not None:
 -             response = {"code": code, "data": data}
 -         else:
 -             response = {"code": code}
 -     else:
 -         response = {"code": code, "message": message}
 -     return jsonify(response)
 - 
 - 
 - def get_error_data_result(message='Sorry! Data missing!', code=RetCode.DATA_ERROR,
 -                           ):
 -     import re
 -     result_dict = {
 -         "code": code,
 -         "message": re.sub(
 -             r"rag",
 -             "seceum",
 -             message,
 -             flags=re.IGNORECASE)}
 -     response = {}
 -     for key, value in result_dict.items():
 -         if value is None and key != "code":
 -             continue
 -         else:
 -             response[key] = value
 -     return jsonify(response)
 - 
 - 
 - def generate_confirmation_token(tenent_id):
 -     serializer = URLSafeTimedSerializer(tenent_id)
 -     return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
 - 
 - 
 - def valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method):
 -     if valid_parameter(permission,valid_permission):
 -         return valid_parameter(permission,valid_permission)
 -     if valid_parameter(language,valid_language):
 -         return valid_parameter(language,valid_language)
 -     if valid_parameter(chunk_method,valid_chunk_method):
 -         return valid_parameter(chunk_method,valid_chunk_method)
 - 
 - def valid_parameter(parameter,valid_values):
 -     if parameter and parameter not in valid_values:
 -        return get_error_data_result(f"'{parameter}' is not in {valid_values}")
 - 
 - def get_parser_config(chunk_method,parser_config):
 -     if parser_config:
 -         return parser_config
 -     if not chunk_method:
 -         chunk_method = "naive"
 -     key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"use_raptor": False}},
 -                  "qa":{"raptor":{"use_raptor":False}},
 -                  "resume":None,
 -                  "manual":{"raptor":{"use_raptor":False}},
 -                  "table":None,
 -                  "paper":{"raptor":{"use_raptor":False}},
 -                  "book":{"raptor":{"use_raptor":False}},
 -                  "laws":{"raptor":{"use_raptor":False}},
 -                  "presentation":{"raptor":{"use_raptor":False}},
 -                  "one":None,
 -                  "knowledge_graph":{"chunk_token_num":8192,"delimiter":"\\n!?;。;!?","entity_types":["organization","person","location","event","time"]},
 -                  "email":None,
 -                  "picture":None}
 -     parser_config=key_mapping[chunk_method]
 -     return parser_config
 
 
  |