Add directory watcher service

This commit is contained in:
Keannu Christian Bernasol 2024-11-27 00:32:28 +08:00
parent 957272cd07
commit 5bc170f519
6 changed files with 164 additions and 8 deletions

View file

@ -0,0 +1,26 @@
# Generated by Django 5.1.3 on 2024-11-26 15:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("documents", "0003_remove_document_metadata_document_ocr_metadata"),
]
operations = [
migrations.AlterField(
model_name="document",
name="document_type",
field=models.CharField(
choices=[
("memorandum", "Memorandum"),
("hoa", "HOA"),
("documented procedures manual", "Documented Procedures Manual"),
("other", "Other"),
],
max_length=32,
),
),
]

View file

@ -10,7 +10,9 @@ class Document(models.Model):
DOCUMENT_TYPE_CHOICES = (
("memorandum", "Memorandum"),
("hoa", "HOA"),
# TODO: Update this list on types of documents
("documented procedures manual", "Documented Procedures Manual"),
("other", "Other"),
)
document_type = models.CharField(
max_length=32, choices=DOCUMENT_TYPE_CHOICES, null=False, blank=False

View file

@ -1,3 +1,6 @@
from io import BytesIO
from documents.models import Document
from django.db.models.signals import post_save
from django.dispatch import receiver
from config.settings import MEDIA_ROOT
@ -9,18 +12,25 @@ from .models import Document
@receiver(post_save, sender=Document)
def domain_post_save(sender, instance, **kwargs):
def document_post_save(sender, instance, **kwargs):
if not instance.ocr_metadata:
metadata = ""
with fitz.open(os.path.join(MEDIA_ROOT, instance.file.name)) as doc:
mat = fitz.Matrix(1.2, 1.2)
for page in doc:
pix = page.get_pixmap(matrix=mat)
output = f'{page.number}.jpg'
pix.save(output)
res = str(pytesseract.image_to_string(Image.open(output)))
os.remove(output)
metadata += res
# Convert pixmap to bytes
img_bytes = pix.tobytes()
# Create a BytesIO object
img_buffer = BytesIO(img_bytes)
# Create a PIL Image object from the bytes
img = Image.open(img_buffer)
# Perform OCR
text = pytesseract.image_to_string(img).strip()
metadata += text
instance.ocr_metadata = metadata
instance.save()