浏览代码

fix update segment keyword with same content (#12908)

tags/0.15.2
Jyong 9 个月前
父节点
当前提交
162a8c4393
没有帐户链接到提交者的电子邮件
共有 1 个文件被更改,包括 13 次插入8 次删除
  1. 13
    8
      api/services/dataset_service.py

+ 13
- 8
api/services/dataset_service.py 查看文件

import random import random
import time import time
import uuid import uuid
from collections import Counter
from typing import Any, Optional from typing import Any, Optional


from flask_login import current_user # type: ignore from flask_login import current_user # type: ignore
segment.answer = args.answer segment.answer = args.answer
segment.word_count += len(args.answer) if args.answer else 0 segment.word_count += len(args.answer) if args.answer else 0
word_count_change = segment.word_count - word_count_change word_count_change = segment.word_count - word_count_change
keyword_changed = False
if args.keywords: if args.keywords:
segment.keywords = args.keywords
if Counter(segment.keywords) != Counter(args.keywords):
segment.keywords = args.keywords
keyword_changed = True
segment.enabled = True segment.enabled = True
segment.disabled_at = None segment.disabled_at = None
segment.disabled_by = None segment.disabled_by = None
document.word_count = max(0, document.word_count + word_count_change) document.word_count = max(0, document.word_count + word_count_change)
db.session.add(document) db.session.add(document)
# update segment index task # update segment index task
if args.enabled:
VectorService.create_segments_vector(
[args.keywords] if args.keywords else None,
[segment],
dataset,
document.doc_form,
)
if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks: if document.doc_form == IndexType.PARENT_CHILD_INDEX and args.regenerate_child_chunks:
# regenerate child chunks # regenerate child chunks
# get embedding model instance # get embedding model instance
VectorService.generate_child_chunks( VectorService.generate_child_chunks(
segment, document, dataset, embedding_model_instance, processing_rule, True segment, document, dataset, embedding_model_instance, processing_rule, True
) )
elif document.doc_form in (IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX):
if args.enabled or keyword_changed:
VectorService.create_segments_vector(
[args.keywords] if args.keywords else None,
[segment],
dataset,
document.doc_form,
)
else: else:
segment_hash = helper.generate_text_hash(content) segment_hash = helper.generate_text_hash(content)
tokens = 0 tokens = 0

正在加载...
取消
保存