Implement sender LLM OCR and request remarks

2025-08-29 20:13:38 +08:00 · 2025-01-08 13:38:39 +08:00 · 2025-01-08 13:38:39 +08:00 · 674a7ec592
commit 674a7ec592
parent d5477a1e1a
11 changed files with 174 additions and 62 deletions
--- a/docmanager_backend/config/management/commands/start_watcher.py
+++ b/docmanager_backend/config/management/commands/start_watcher.py
@ -69,79 +69,84 @@ class PDFHandler(FileSystemEventHandler):
            with fitz.open(file_path) as doc:
                num_pages = len(doc)
-                for page_num in range(num_pages):
+                # Perform OCR only on the first page
-                    page = doc[page_num]
+                page = doc[0]
-                    pix = page.get_pixmap(matrix=(1.2, 1.2))
+                pix = page.get_pixmap(matrix=(1.2, 1.2))
-                    # Convert pixmap to bytes
+                # Convert pixmap to bytes
-                    img_bytes = pix.tobytes()
+                img_bytes = pix.tobytes()
-                    # Create a BytesIO object
+                # Create a BytesIO object
-                    img_buffer = BytesIO(img_bytes)
+                img_buffer = BytesIO(img_bytes)
-                    # Create a PIL Image object from the bytes
+                # Create a PIL Image object from the bytes
-                    img = Image.open(img_buffer)
+                img = Image.open(img_buffer)
-                    # Perform OCR
+                # Perform OCR
-                    text = pytesseract.image_to_string(img).strip()
+                text = pytesseract.image_to_string(img).strip()
-                    # Try to pass image to the Ollama image recognition API first
+                # Try to pass image to the Ollama image recognition API first
-                    try:
+                try:
-                        class DocumentCategory(BaseModel):
+                    class DocumentCategory(BaseModel):
-                            category: str = "other"
+                        category: str = "other"
-                            explanation: Optional[str] = None
+                        sent_from: str = "N/A"
                        explanation: Optional[str] = None
-                        client = Client(
+                    client = Client(
-                            host=get_secret("OLLAMA_URL"),
+                        host=get_secret("OLLAMA_URL"),
-                            auth=httpx.BasicAuth(
+                        auth=httpx.BasicAuth(
-                                username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD")) if get_secret("OLLAMA_USE_AUTH") else None,
+                            username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD")) if get_secret("OLLAMA_USE_AUTH") else None,
-                        )
+                    )
-                        encoded_image = base64.b64encode(
+                    encoded_image = base64.b64encode(
-                            img_buffer.getvalue()).decode()
+                        img_buffer.getvalue()).decode()
-                        possible_categories = set((Document.objects.all().values_list(
+                    possible_categories = set((Document.objects.all().values_list(
-                            "document_type", flat=True), "Documented Procedures Manual", "Form", "Special Order"))
+                        "document_type", flat=True), "Documented Procedures Manual", "Form", "Special Order", "Memorandum"))
-                        prompt = f"""
+                    prompt = f"""
-                            Read the text from the image and provide a category. Return as JSON.
+                        Read the text from the image and provide a category. Return as JSON.
-                            Possible categories are: {possible_categories}. You are free to create a new one if none are suitable.
+                        Possible categories are: {possible_categories}. You are free to create a new one if none are suitable.
                            """
                        response = client.chat(
                            model=get_secret("OLLAMA_MODEL"),
                            messages=[
                                {"role": "user",
                                 "content": prompt,
                                 "images": [encoded_image]},
                            ],
                            format=DocumentCategory.model_json_schema(),
                            options={
                                "temperature": 0
                            },
-                        )
+                        If the document is of type Special Order or Memorandum, provide the sender of the document. Possible senders are Vice President, President, Chancellor.
                        provide N/A.
                        """
                    response = client.chat(
                        model=get_secret("OLLAMA_MODEL"),
                        messages=[
                            {"role": "user",
                                "content": prompt,
                                "images": [encoded_image]},
                        ],
                        format=DocumentCategory.model_json_schema(),
                        options={
                            "temperature": 0
                        },
-                        DocumentCategory.model_validate_json(
+                    )
                            response.message.content)
                        result = json.loads(response.message.content)
                        document_type = result.get("category")
-                    # If that fails, just use regular OCR read the title as a dirty fix/fallback
+                    DocumentCategory.model_validate_json(
-                    except Exception as e:
+                        response.message.content)
-                        self.logger.warning(f"Error! {e}")
+                    result = json.loads(response.message.content)
-                        self.logger.warning(
+                    document_type = result.get("category")
-                            "Ollama OCR offload failed. Falling back to default OCR")
+                    sent_from = result.get("sent_from")
                        lines = text.split("\n")
-                        for line in lines:
+                # If that fails, just use regular OCR read the title as a dirty fix/fallback
-                            if line.strip():
+                except Exception as e:
-                                document_type = line.strip().lower()
+                    self.logger.warning(f"Error! {e}")
-                                break
+                    self.logger.warning(
                        "Ollama OCR offload failed. Falling back to default OCR")
                    lines = text.split("\n")
-                        if not document_type:
+                    for line in lines:
-                            document_type = "other"
+                        if line.strip():
                            document_type = line.strip().lower()
                            break
-                    metadata += text
+                    if not document_type:
                        document_type = "other"
                metadata += text
            # Open the file for instance creation
            DOCUMENT, created = Document.objects.get_or_create(
@ -158,8 +163,10 @@ class PDFHandler(FileSystemEventHandler):
                    name=filename, content=File(open(file_path, "rb")))
                self.logger.info(
                    f"Document '{filename}' created successfully with type '{
-                        document_type}'."
+                        document_type}'. sent_from: {sent_from}"
                )
                DOCUMENT.sent_from = sent_from
                DOCUMENT.save()
            else:
                self.logger.info(f"Document '{filename}' already exists.")
--- a/docmanager_backend/document_requests/migrations/0003_documentrequest_denied_remarks.py
+++ b/docmanager_backend/document_requests/migrations/0003_documentrequest_denied_remarks.py
@ -0,0 +1,18 @@
 # Generated by Django 5.1.3 on 2025-01-08 04:29
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ("document_requests", "0002_documentrequest_questionnaire"),
    ]
    operations = [
        migrations.AddField(
            model_name="documentrequest",
            name="denied_remarks",
            field=models.TextField(blank=True, max_length=512, null=True),
        ),
    ]
--- a/docmanager_backend/document_requests/migrations/0004_rename_denied_remarks_documentrequest_remarks.py
+++ b/docmanager_backend/document_requests/migrations/0004_rename_denied_remarks_documentrequest_remarks.py
@ -0,0 +1,18 @@
 # Generated by Django 5.1.3 on 2025-01-08 04:51
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ("document_requests", "0003_documentrequest_denied_remarks"),
    ]
    operations = [
        migrations.RenameField(
            model_name="documentrequest",
            old_name="denied_remarks",
            new_name="remarks",
        ),
    ]
--- a/docmanager_backend/document_requests/models.py
+++ b/docmanager_backend/document_requests/models.py
@ -27,6 +27,8 @@ class DocumentRequest(models.Model):
        ("denied", "Denied"),
    )
    remarks = models.TextField(max_length=512, blank=True, null=True)
    status = models.CharField(
        max_length=32, choices=STATUS_CHOICES, default="pending")
--- a/docmanager_backend/document_requests/serializers.py
+++ b/docmanager_backend/document_requests/serializers.py
@ -101,6 +101,7 @@ class DocumentRequestSerializer(serializers.ModelSerializer):
            "purpose",
            "date_requested",
            "documents",
            "remarks",
            "status",
        ]
        read_only_fields = [
@ -112,6 +113,7 @@ class DocumentRequestSerializer(serializers.ModelSerializer):
            "purpose",
            "date_requested",
            "documents",
            "remarks,"
            "status",
        ]
@ -146,6 +148,7 @@ class FullDocumentRequestSerializer(serializers.ModelSerializer):
            "purpose",
            "date_requested",
            "documents",
            "remarks",
            "status",
        ]
        read_only_fields = [
@ -167,27 +170,41 @@ class DocumentRequestUpdateSerializer(serializers.ModelSerializer):
    class Meta:
        model = DocumentRequest
-        fields = ["id", "status"]
+        fields = ["id", "status", "remarks"]
-        read_only_fields = ["id", "status"]
+        read_only_fields = ["id"]
    def update(self, instance, validated_data):
        print(validated_data)
        if instance.status == "denied" or instance.status == "approved":
            raise serializers.ValidationError(
                {
                    "error": "Already approved/denied requests cannot be updated. You should instead create a new request and approve it from there"
                }
            )
        elif "status" not in validated_data:
            raise serializers.ValidationError(
                {
                    "error": "No status value update provided"
                }
            )
        elif validated_data["status"] == instance.status:
            raise serializers.ValidationError(
                {"error": "Request form status provided is the same as current status"}
            )
-
+        elif validated_data["status"] == "denied" and "remarks" not in validated_data:
            raise serializers.ValidationError(
                {"error": "Request denial requires remarks"}
            )
        representation = super().update(instance, validated_data)
        # Send an email on request status update
        try:
            email = RequestUpdateEmail()
            email.context = {"request_status": instance.status}
            if instance.status == "denied":
                email.context = {"remarks": instance.remarks}
            else:
                email.context = {"remarks": "N/A"}
            email.send(to=[instance.requester.email])
        except:
            # Silence out errors if email sending fails
--- a/docmanager_backend/documents/migrations/0003_document_memorandum_from.py
+++ b/docmanager_backend/documents/migrations/0003_document_memorandum_from.py
@ -0,0 +1,18 @@
 # Generated by Django 5.1.3 on 2025-01-08 04:39
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ("documents", "0002_alter_document_document_type"),
    ]
    operations = [
        migrations.AddField(
            model_name="document",
            name="memorandum_from",
            field=models.CharField(blank=True, max_length=128, null=True),
        ),
    ]
--- a/docmanager_backend/documents/migrations/0004_rename_memorandum_from_document_sent_from.py
+++ b/docmanager_backend/documents/migrations/0004_rename_memorandum_from_document_sent_from.py
@ -0,0 +1,18 @@
 # Generated by Django 5.1.3 on 2025-01-08 04:44
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ("documents", "0003_document_memorandum_from"),
    ]
    operations = [
        migrations.RenameField(
            model_name="document",
            old_name="memorandum_from",
            new_name="sent_from",
        ),
    ]
--- a/docmanager_backend/documents/models.py
+++ b/docmanager_backend/documents/models.py
@ -9,6 +9,9 @@ class Document(models.Model):
    document_type = models.CharField(
        max_length=128, null=False, blank=False
    )
    sent_from = models.CharField(
        max_length=128, null=True, blank=True
    )
    number_pages = models.IntegerField(null=False, blank=False)
    ocr_metadata = models.TextField(null=True, blank=True)
--- a/docmanager_backend/documents/serializers.py
+++ b/docmanager_backend/documents/serializers.py
@ -53,6 +53,7 @@ class DocumentSerializer(serializers.ModelSerializer):
            "document_type",
            "number_pages",
            "ocr_metadata",
            "sent_from",
            "date_uploaded",
        ]
        read_only_fields = [
@ -61,6 +62,7 @@ class DocumentSerializer(serializers.ModelSerializer):
            "document_type",
            "number_pages",
            "ocr_metadata",
            "sent_from",
            "date_uploaded",
        ]
@ -81,6 +83,7 @@ class DocumentFileSerializer(serializers.ModelSerializer):
            "number_pages",
            "ocr_metadata",
            "date_uploaded",
            "sent_from",
            "file",
        ]
        read_only_fields = [
@ -90,5 +93,6 @@ class DocumentFileSerializer(serializers.ModelSerializer):
            "number_pages",
            "ocr_metadata",
            "date_uploaded",
            "sent_from",
            "file",
        ]
--- a/docmanager_backend/emails/templates.py
+++ b/docmanager_backend/emails/templates.py
@ -8,6 +8,7 @@ class RequestUpdateEmail(email.BaseEmailMessage):
    def get_context_data(self):
        context = super().get_context_data()
        context["request_status"] = context.get("request_status")
        context["remarks"] = context.get("remarks")
        context["url"] = FRONTEND_URL
        context.update(self.context)
        return context
--- a/docmanager_backend/emails/templates/request_approved.html
+++ b/docmanager_backend/emails/templates/request_approved.html
@ -7,6 +7,8 @@
 {% block text_body %}
  {% blocktrans %}You're receiving this email because your document request has been {{ request_status }}.{% endblocktrans %}
  {% blocktrans %}Remarks: {{ remarks }}{% endblocktrans %}
  {% trans 'Please visit the site to check your request:' %}
  {{ url|safe }}
@ -18,6 +20,10 @@
    {% blocktrans %}You're receiving this email because your document request has been {{ request_status }}.{% endblocktrans %}
  </p>
  <p>
    {% blocktrans %}Remarks {{ remarks }}{% endblocktrans %}
  </p>
  <p>
    {% trans 'Please visit the site to check your request:' %}
  </p>