1 year ago · 3859fce6bf
--- a/python/Dockerfile
+++ b/python/Dockerfile
 FROM ubuntu:22.04 as base
 RUN apt-get update 
 ENV TZ="Asia/Taipei"
 RUN apt-get install -yq  \
    build-essential \
    curl \
    libncursesw5-dev \
    libssl-dev \
    libsqlite3-dev \
    libgdbm-dev \
    libc6-dev \
    libbz2-dev \
    software-properties-common \
    python3.11 python3.11-dev python3-pip
 RUN apt-get install -yq  git
 RUN pip3 config set global.index-url https://mirror.baidu.com/pypi/simple
 RUN pip3 config set global.trusted-host mirror.baidu.com
 RUN pip3 install --upgrade pip
 RUN pip3 install torch==2.0.1 
 RUN pip3 install torch-model-archiver==0.8.2
 RUN pip3 install torchvision==0.15.2
 COPY requirements.txt .
 WORKDIR /docgpt
 ENV PYTHONPATH=/docgpt/
--- a/python/ToPDF.pdf
+++ b/python/ToPDF.pdf
--- a/python/]
+++ b/python/]
 from abc import ABC
 from openai import OpenAI
 import os
 import base64
 from io import BytesIO
 class Base(ABC):
    def describe(self, image, max_tokens=300):
        raise NotImplementedError("Please implement encode method!")
 class GptV4(Base):
    def __init__(self):
        import openapi
        openapi.api_key = os.environ["OPENAPI_KEY"]
        self.client = OpenAI()
    def describe(self, image, max_tokens=300):
        buffered = BytesIO()
        try:
            image.save(buffered, format="JPEG")
        except Exception as e:
            image.save(buffered, format="PNG")
        b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
        res = self.client.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=[
              {
                "role": "user",
                "content": [
                  {
                    "type": "text",
                    "text": "请用中文详细描述一下图中的内容，比如时间，地点，人物，事情，人物心情等。",
                  },
                  {
                    "type": "image_url",
                    "image_url": {
                      "url": f"data:image/jpeg;base64,{b64}"
                    },
                  },
                ],
              }
            ],
            max_tokens=max_tokens,
        )
        return res.choices[0].message.content.strip()
 class QWen(Base):
    def chat(self, system, history, gen_conf):
        from http import HTTPStatus
        from dashscope import Generation
        from dashscope.api_entities.dashscope_response import Role
        # export DASHSCOPE_API_KEY=YOUR_DASHSCOPE_API_KEY
        response = Generation.call(
                    Generation.Models.qwen_turbo,
                    messages=messages,
                    result_format='message'
        )
        if response.status_code == HTTPStatus.OK:
            return response.output.choices[0]['message']['content']
        return response.message
--- a/python/output/ToPDF.pdf
+++ b/python/output/ToPDF.pdf
--- a/python/res/1-0.tm
+++ b/python/res/1-0.tm
 2023-12-20 11:44:08.791336+00:00
 2023-12-20 11:44:08.853249+00:00
 2023-12-20 11:44:08.909933+00:00
 2023-12-21 00:47:09.996757+00:00
 2023-12-20 11:44:08.965855+00:00
 2023-12-20 11:44:09.011682+00:00
 2023-12-21 00:47:10.063326+00:00
 2023-12-20 11:44:09.069486+00:00
--- a/python/res/thumbnail-1-0.tm
+++ b/python/res/thumbnail-1-0.tm
 2023-12-27 08:21:49.309802+00:00
 2023-12-27 08:37:22.407772+00:00
 2023-12-27 08:59:18.845627+00:00
--- a/python/tmp.log
+++ b/python/tmp.log
 
Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]
Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 106184.91it/s]
 -----------  Model Configuration -----------
 Model Arch: GFL
 Transform Order: 
 --transform op: Resize
 --transform op: NormalizeImage
 --transform op: Permute
 --transform op: PadStride
 --------------------------------------------
 Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
 The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.
 Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
 - This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
 - This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
 WARNING:root:The files are stored in /opt/home/kevinhu/docgpt/, please check it!