| if not sensitive_word_avoidance_dict: | if not sensitive_word_avoidance_dict: | ||||
| return None | return None | ||||
| if 'enabled' in sensitive_word_avoidance_dict and sensitive_word_avoidance_dict['enabled']: | |||||
| if sensitive_word_avoidance_dict.get('enabled'): | |||||
| return SensitiveWordAvoidanceEntity( | return SensitiveWordAvoidanceEntity( | ||||
| type=sensitive_word_avoidance_dict.get('type'), | type=sensitive_word_avoidance_dict.get('type'), | ||||
| config=sensitive_word_avoidance_dict.get('config'), | config=sensitive_word_avoidance_dict.get('config'), |
| """ | """ | ||||
| file_upload_dict = config.get('file_upload') | file_upload_dict = config.get('file_upload') | ||||
| if file_upload_dict: | if file_upload_dict: | ||||
| if 'image' in file_upload_dict and file_upload_dict['image']: | |||||
| if file_upload_dict.get('image'): | |||||
| if 'enabled' in file_upload_dict['image'] and file_upload_dict['image']['enabled']: | if 'enabled' in file_upload_dict['image'] and file_upload_dict['image']['enabled']: | ||||
| image_config = { | image_config = { | ||||
| 'number_limits': file_upload_dict['image']['number_limits'], | 'number_limits': file_upload_dict['image']['number_limits'], |
| more_like_this = False | more_like_this = False | ||||
| more_like_this_dict = config.get('more_like_this') | more_like_this_dict = config.get('more_like_this') | ||||
| if more_like_this_dict: | if more_like_this_dict: | ||||
| if 'enabled' in more_like_this_dict and more_like_this_dict['enabled']: | |||||
| if more_like_this_dict.get('enabled'): | |||||
| more_like_this = True | more_like_this = True | ||||
| return more_like_this | return more_like_this |
| show_retrieve_source = False | show_retrieve_source = False | ||||
| retriever_resource_dict = config.get('retriever_resource') | retriever_resource_dict = config.get('retriever_resource') | ||||
| if retriever_resource_dict: | if retriever_resource_dict: | ||||
| if 'enabled' in retriever_resource_dict and retriever_resource_dict['enabled']: | |||||
| if retriever_resource_dict.get('enabled'): | |||||
| show_retrieve_source = True | show_retrieve_source = True | ||||
| return show_retrieve_source | return show_retrieve_source |
| speech_to_text = False | speech_to_text = False | ||||
| speech_to_text_dict = config.get('speech_to_text') | speech_to_text_dict = config.get('speech_to_text') | ||||
| if speech_to_text_dict: | if speech_to_text_dict: | ||||
| if 'enabled' in speech_to_text_dict and speech_to_text_dict['enabled']: | |||||
| if speech_to_text_dict.get('enabled'): | |||||
| speech_to_text = True | speech_to_text = True | ||||
| return speech_to_text | return speech_to_text |
| suggested_questions_after_answer = False | suggested_questions_after_answer = False | ||||
| suggested_questions_after_answer_dict = config.get('suggested_questions_after_answer') | suggested_questions_after_answer_dict = config.get('suggested_questions_after_answer') | ||||
| if suggested_questions_after_answer_dict: | if suggested_questions_after_answer_dict: | ||||
| if 'enabled' in suggested_questions_after_answer_dict and suggested_questions_after_answer_dict['enabled']: | |||||
| if suggested_questions_after_answer_dict.get('enabled'): | |||||
| suggested_questions_after_answer = True | suggested_questions_after_answer = True | ||||
| return suggested_questions_after_answer | return suggested_questions_after_answer |
| text_to_speech = False | text_to_speech = False | ||||
| text_to_speech_dict = config.get('text_to_speech') | text_to_speech_dict = config.get('text_to_speech') | ||||
| if text_to_speech_dict: | if text_to_speech_dict: | ||||
| if 'enabled' in text_to_speech_dict and text_to_speech_dict['enabled']: | |||||
| if text_to_speech_dict.get('enabled'): | |||||
| text_to_speech = TextToSpeechEntity( | text_to_speech = TextToSpeechEntity( | ||||
| enabled=text_to_speech_dict.get('enabled'), | enabled=text_to_speech_dict.get('enabled'), | ||||
| voice=text_to_speech_dict.get('voice'), | voice=text_to_speech_dict.get('voice'), |
| conversation = self._get_conversation_by_user(app_model, args.get('conversation_id'), user) | conversation = self._get_conversation_by_user(app_model, args.get('conversation_id'), user) | ||||
| # parse files | # parse files | ||||
| files = args['files'] if 'files' in args and args['files'] else [] | |||||
| files = args['files'] if args.get('files') else [] | |||||
| message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | ||||
| file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False) | file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False) | ||||
| if file_extra_config: | if file_extra_config: |
| ) | ) | ||||
| # parse files | # parse files | ||||
| files = args['files'] if 'files' in args and args['files'] else [] | |||||
| files = args['files'] if args.get('files') else [] | |||||
| message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | ||||
| file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict()) | file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict()) | ||||
| if file_extra_config: | if file_extra_config: |
| ) | ) | ||||
| # parse files | # parse files | ||||
| files = args['files'] if 'files' in args and args['files'] else [] | |||||
| files = args['files'] if args.get('files') else [] | |||||
| message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | ||||
| file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict()) | file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict()) | ||||
| if file_extra_config: | if file_extra_config: |
| ) | ) | ||||
| # parse files | # parse files | ||||
| files = args['files'] if 'files' in args and args['files'] else [] | |||||
| files = args['files'] if args.get('files') else [] | |||||
| message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | ||||
| file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict()) | file_extra_config = FileUploadConfigManager.convert(override_model_config_dict or app_model_config.to_dict()) | ||||
| if file_extra_config: | if file_extra_config: |
| inputs = args['inputs'] | inputs = args['inputs'] | ||||
| # parse files | # parse files | ||||
| files = args['files'] if 'files' in args and args['files'] else [] | |||||
| files = args['files'] if args.get('files') else [] | |||||
| message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | message_file_parser = MessageFileParser(tenant_id=app_model.tenant_id, app_id=app_model.id) | ||||
| file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False) | file_extra_config = FileUploadConfigManager.convert(workflow.features_dict, is_vision=False) | ||||
| if file_extra_config: | if file_extra_config: |
| enabled=False, | enabled=False, | ||||
| created_by=self._user_id, | created_by=self._user_id, | ||||
| ) | ) | ||||
| if 'answer' in doc.metadata and doc.metadata['answer']: | |||||
| if doc.metadata.get('answer'): | |||||
| segment_document.answer = doc.metadata.pop('answer', '') | segment_document.answer = doc.metadata.pop('answer', '') | ||||
| db.session.add(segment_document) | db.session.add(segment_document) | ||||
| else: | else: | ||||
| segment_document.content = doc.page_content | segment_document.content = doc.page_content | ||||
| if 'answer' in doc.metadata and doc.metadata['answer']: | |||||
| if doc.metadata.get('answer'): | |||||
| segment_document.answer = doc.metadata.pop('answer', '') | segment_document.answer = doc.metadata.pop('answer', '') | ||||
| segment_document.index_node_hash = doc.metadata['doc_hash'] | segment_document.index_node_hash = doc.metadata['doc_hash'] | ||||
| segment_document.word_count = len(doc.page_content) | segment_document.word_count = len(doc.page_content) |
| if separator: | if separator: | ||||
| separator = separator.replace('\\n', '\n') | separator = separator.replace('\\n', '\n') | ||||
| if 'chunk_overlap' in segmentation and segmentation['chunk_overlap']: | |||||
| if segmentation.get('chunk_overlap'): | |||||
| chunk_overlap = segmentation['chunk_overlap'] | chunk_overlap = segmentation['chunk_overlap'] | ||||
| else: | else: | ||||
| chunk_overlap = 0 | chunk_overlap = 0 |
| """ | """ | ||||
| Code block mode wrapper for invoking large language model | Code block mode wrapper for invoking large language model | ||||
| """ | """ | ||||
| if 'response_format' in model_parameters and model_parameters['response_format']: | |||||
| if model_parameters.get('response_format'): | |||||
| stop = stop or [] | stop = stop or [] | ||||
| # chat model | # chat model | ||||
| self._transform_chat_json_prompts( | self._transform_chat_json_prompts( | ||||
| "max_retries": 1, | "max_retries": 1, | ||||
| } | } | ||||
| if 'anthropic_api_url' in credentials and credentials['anthropic_api_url']: | |||||
| if credentials.get('anthropic_api_url'): | |||||
| credentials['anthropic_api_url'] = credentials['anthropic_api_url'].rstrip('/') | credentials['anthropic_api_url'] = credentials['anthropic_api_url'].rstrip('/') | ||||
| credentials_kwargs['base_url'] = credentials['anthropic_api_url'] | credentials_kwargs['base_url'] = credentials['anthropic_api_url'] | ||||
| # save stop reason temporarily | # save stop reason temporarily | ||||
| stop_reason = '' | stop_reason = '' | ||||
| for choice in choices: | for choice in choices: | ||||
| if 'finish_reason' in choice and choice['finish_reason']: | |||||
| if choice.get('finish_reason'): | |||||
| stop_reason = choice['finish_reason'] | stop_reason = choice['finish_reason'] | ||||
| if len(choice['delta']['content']) == 0: | if len(choice['delta']['content']) == 0: |
| if 'top_p' in model_parameters and type(model_parameters['top_p']) == float: | if 'top_p' in model_parameters and type(model_parameters['top_p']) == float: | ||||
| extra_kwargs['top_p'] = model_parameters['top_p'] | extra_kwargs['top_p'] = model_parameters['top_p'] | ||||
| if 'plugin_web_search' in model_parameters and model_parameters['plugin_web_search']: | |||||
| if model_parameters.get('plugin_web_search'): | |||||
| extra_kwargs['plugins'] = [ | extra_kwargs['plugins'] = [ | ||||
| 'plugin_web_search' | 'plugin_web_search' | ||||
| ] | ] | ||||
| self._handle_error(code, msg) | self._handle_error(code, msg) | ||||
| # final chunk | # final chunk | ||||
| if data['reply'] or 'usage' in data and data['usage']: | |||||
| if data['reply'] or data.get('usage'): | |||||
| total_tokens = data['usage']['total_tokens'] | total_tokens = data['usage']['total_tokens'] | ||||
| minimax_message = MinimaxMessage( | minimax_message = MinimaxMessage( | ||||
| role=MinimaxMessage.Role.ASSISTANT.value, | role=MinimaxMessage.Role.ASSISTANT.value, |
| "max_retries": 1, | "max_retries": 1, | ||||
| } | } | ||||
| if 'openai_api_base' in credentials and credentials['openai_api_base']: | |||||
| if credentials.get('openai_api_base'): | |||||
| credentials['openai_api_base'] = credentials['openai_api_base'].rstrip('/') | credentials['openai_api_base'] = credentials['openai_api_base'].rstrip('/') | ||||
| credentials_kwargs['base_url'] = credentials['openai_api_base'] + '/v1' | credentials_kwargs['base_url'] = credentials['openai_api_base'] + '/v1' | ||||
| completion_usage += len(token_ids) | completion_usage += len(token_ids) | ||||
| message = OpenLLMGenerateMessage(content=text, role=OpenLLMGenerateMessage.Role.ASSISTANT.value) | message = OpenLLMGenerateMessage(content=text, role=OpenLLMGenerateMessage.Role.ASSISTANT.value) | ||||
| if 'finish_reason' in choice and choice['finish_reason']: | |||||
| if choice.get('finish_reason'): | |||||
| finish_reason = choice['finish_reason'] | finish_reason = choice['finish_reason'] | ||||
| prompt_token_ids = data['prompt_token_ids'] | prompt_token_ids = data['prompt_token_ids'] | ||||
| message.stop_reason = finish_reason | message.stop_reason = finish_reason |
| document_score_list = {} | document_score_list = {} | ||||
| for item in all_documents: | for item in all_documents: | ||||
| if 'score' in item.metadata and item.metadata['score']: | |||||
| if item.metadata.get('score'): | |||||
| document_score_list[item.metadata['doc_id']] = item.metadata['score'] | document_score_list[item.metadata['doc_id']] = item.metadata['score'] | ||||
| document_context_list = [] | document_context_list = [] |
| if not base_url: | if not base_url: | ||||
| return self.create_text_message('Please input base_url') | return self.create_text_message('Please input base_url') | ||||
| if 'model' in tool_parameters and tool_parameters['model']: | |||||
| if tool_parameters.get('model'): | |||||
| self.runtime.credentials['model'] = tool_parameters['model'] | self.runtime.credentials['model'] = tool_parameters['model'] | ||||
| model = self.runtime.credentials.get('model', None) | model = self.runtime.credentials.get('model', None) |
| if 'success' not in response_data['queryresult'] or response_data['queryresult']['success'] != True: | if 'success' not in response_data['queryresult'] or response_data['queryresult']['success'] != True: | ||||
| query_result = response_data.get('queryresult', {}) | query_result = response_data.get('queryresult', {}) | ||||
| if 'error' in query_result and query_result['error']: | |||||
| if query_result.get('error'): | |||||
| if 'msg' in query_result['error']: | if 'msg' in query_result['error']: | ||||
| if query_result['error']['msg'] == 'Invalid appid': | if query_result['error']['msg'] == 'Invalid appid': | ||||
| raise ToolProviderCredentialValidationError('Invalid appid') | raise ToolProviderCredentialValidationError('Invalid appid') |
| document_score_list = {} | document_score_list = {} | ||||
| for item in all_documents: | for item in all_documents: | ||||
| if 'score' in item.metadata and item.metadata['score']: | |||||
| if item.metadata.get('score'): | |||||
| document_score_list[item.metadata['doc_id']] = item.metadata['score'] | document_score_list[item.metadata['doc_id']] = item.metadata['score'] | ||||
| document_context_list = [] | document_context_list = [] |
| document_score_list = {} | document_score_list = {} | ||||
| if dataset.indexing_technique != "economy": | if dataset.indexing_technique != "economy": | ||||
| for item in documents: | for item in documents: | ||||
| if 'score' in item.metadata and item.metadata['score']: | |||||
| if item.metadata.get('score'): | |||||
| document_score_list[item.metadata['doc_id']] = item.metadata['score'] | document_score_list[item.metadata['doc_id']] = item.metadata['score'] | ||||
| document_context_list = [] | document_context_list = [] | ||||
| index_node_ids = [document.metadata['doc_id'] for document in documents] | index_node_ids = [document.metadata['doc_id'] for document in documents] |
| } | } | ||||
| # Populate article fields from readability fields where present | # Populate article fields from readability fields where present | ||||
| if input_json: | if input_json: | ||||
| if "title" in input_json and input_json["title"]: | |||||
| if input_json.get("title"): | |||||
| article_json["title"] = input_json["title"] | article_json["title"] = input_json["title"] | ||||
| if "byline" in input_json and input_json["byline"]: | |||||
| if input_json.get("byline"): | |||||
| article_json["byline"] = input_json["byline"] | article_json["byline"] = input_json["byline"] | ||||
| if "date" in input_json and input_json["date"]: | |||||
| if input_json.get("date"): | |||||
| article_json["date"] = input_json["date"] | article_json["date"] = input_json["date"] | ||||
| if "content" in input_json and input_json["content"]: | |||||
| if input_json.get("content"): | |||||
| article_json["content"] = input_json["content"] | article_json["content"] = input_json["content"] | ||||
| article_json["plain_content"] = plain_content(article_json["content"], False, False) | article_json["plain_content"] = plain_content(article_json["content"], False, False) | ||||
| article_json["plain_text"] = extract_text_blocks_as_plain_text(article_json["plain_content"]) | article_json["plain_text"] = extract_text_blocks_as_plain_text(article_json["plain_content"]) | ||||
| if "textContent" in input_json and input_json["textContent"]: | |||||
| if input_json.get("textContent"): | |||||
| article_json["plain_text"] = input_json["textContent"] | article_json["plain_text"] = input_json["textContent"] | ||||
| article_json["plain_text"] = re.sub(r'\n\s*\n', '\n', article_json["plain_text"]) | article_json["plain_text"] = re.sub(r'\n\s*\n', '\n', article_json["plain_text"]) | ||||
| if all_documents: | if all_documents: | ||||
| document_score_list = {} | document_score_list = {} | ||||
| for item in all_documents: | for item in all_documents: | ||||
| if 'score' in item.metadata and item.metadata['score']: | |||||
| if item.metadata.get('score'): | |||||
| document_score_list[item.metadata['doc_id']] = item.metadata['score'] | document_score_list[item.metadata['doc_id']] = item.metadata['score'] | ||||
| index_node_ids = [document.metadata['doc_id'] for document in all_documents] | index_node_ids = [document.metadata['doc_id'] for document in all_documents] |
| "I001", # unsorted-imports | "I001", # unsorted-imports | ||||
| "I002", # missing-required-import | "I002", # missing-required-import | ||||
| "UP", # pyupgrade rules | "UP", # pyupgrade rules | ||||
| "RUF019", # unnecessary-key-check | |||||
| ] | ] | ||||
| ignore = [ | ignore = [ | ||||
| "F403", # undefined-local-with-import-star | "F403", # undefined-local-with-import-star |
| if not app: | if not app: | ||||
| raise NotFound("App not found") | raise NotFound("App not found") | ||||
| if 'message_id' in args and args['message_id']: | |||||
| if args.get('message_id'): | |||||
| message_id = str(args['message_id']) | message_id = str(args['message_id']) | ||||
| # get message info | # get message info | ||||
| message = db.session.query(Message).filter( | message = db.session.query(Message).filter( |
| elif args['mode'] == 'channel': | elif args['mode'] == 'channel': | ||||
| filters.append(App.mode == AppMode.CHANNEL.value) | filters.append(App.mode == AppMode.CHANNEL.value) | ||||
| if 'name' in args and args['name']: | |||||
| if args.get('name'): | |||||
| name = args['name'][:30] | name = args['name'][:30] | ||||
| filters.append(App.name.ilike(f'%{name}%')) | filters.append(App.name.ilike(f'%{name}%')) | ||||
| if 'tag_ids' in args and args['tag_ids']: | |||||
| if args.get('tag_ids'): | |||||
| target_ids = TagService.get_target_ids_by_tag_ids('app', | target_ids = TagService.get_target_ids_by_tag_ids('app', | ||||
| tenant_id, | tenant_id, | ||||
| args['tag_ids']) | args['tag_ids']) |
| documents = [] | documents = [] | ||||
| batch = time.strftime('%Y%m%d%H%M%S') + str(random.randint(100000, 999999)) | batch = time.strftime('%Y%m%d%H%M%S') + str(random.randint(100000, 999999)) | ||||
| if 'original_document_id' in document_data and document_data["original_document_id"]: | |||||
| if document_data.get("original_document_id"): | |||||
| document = DocumentService.update_document_with_dataset_id(dataset, document_data, account) | document = DocumentService.update_document_with_dataset_id(dataset, document_data, account) | ||||
| documents.append(document) | documents.append(document) | ||||
| else: | else: | ||||
| if document.display_status != 'available': | if document.display_status != 'available': | ||||
| raise ValueError("Document is not available") | raise ValueError("Document is not available") | ||||
| # update document name | # update document name | ||||
| if 'name' in document_data and document_data['name']: | |||||
| if document_data.get('name'): | |||||
| document.name = document_data['name'] | document.name = document_data['name'] | ||||
| # save process rule | # save process rule | ||||
| if 'process_rule' in document_data and document_data['process_rule']: | |||||
| if document_data.get('process_rule'): | |||||
| process_rule = document_data["process_rule"] | process_rule = document_data["process_rule"] | ||||
| if process_rule["mode"] == "custom": | if process_rule["mode"] == "custom": | ||||
| dataset_process_rule = DatasetProcessRule( | dataset_process_rule = DatasetProcessRule( | ||||
| db.session.commit() | db.session.commit() | ||||
| document.dataset_process_rule_id = dataset_process_rule.id | document.dataset_process_rule_id = dataset_process_rule.id | ||||
| # update document data source | # update document data source | ||||
| if 'data_source' in document_data and document_data['data_source']: | |||||
| if document_data.get('data_source'): | |||||
| file_name = '' | file_name = '' | ||||
| data_source_info = {} | data_source_info = {} | ||||
| if document_data["data_source"]["type"] == "upload_file": | if document_data["data_source"]["type"] == "upload_file": | ||||
| embedding_model.model | embedding_model.model | ||||
| ) | ) | ||||
| dataset_collection_binding_id = dataset_collection_binding.id | dataset_collection_binding_id = dataset_collection_binding.id | ||||
| if 'retrieval_model' in document_data and document_data['retrieval_model']: | |||||
| if document_data.get('retrieval_model'): | |||||
| retrieval_model = document_data['retrieval_model'] | retrieval_model = document_data['retrieval_model'] | ||||
| else: | else: | ||||
| default_retrieval_model = { | default_retrieval_model = { | ||||
| and ('process_rule' not in args and not args['process_rule']): | and ('process_rule' not in args and not args['process_rule']): | ||||
| raise ValueError("Data source or Process rule is required") | raise ValueError("Data source or Process rule is required") | ||||
| else: | else: | ||||
| if 'data_source' in args and args['data_source']: | |||||
| if args.get('data_source'): | |||||
| DocumentService.data_source_args_validate(args) | DocumentService.data_source_args_validate(args) | ||||
| if 'process_rule' in args and args['process_rule']: | |||||
| if args.get('process_rule'): | |||||
| DocumentService.process_rule_args_validate(args) | DocumentService.process_rule_args_validate(args) | ||||
| @classmethod | @classmethod | ||||
| if segment.content == content: | if segment.content == content: | ||||
| if document.doc_form == 'qa_model': | if document.doc_form == 'qa_model': | ||||
| segment.answer = args['answer'] | segment.answer = args['answer'] | ||||
| if 'keywords' in args and args['keywords']: | |||||
| if args.get('keywords'): | |||||
| segment.keywords = args['keywords'] | segment.keywords = args['keywords'] | ||||
| segment.enabled = True | segment.enabled = True | ||||
| segment.disabled_at = None | segment.disabled_at = None |