mirror of
https://github.com/lemeow125/DocManagerBackend.git
synced 2025-01-19 01:23:02 +08:00
Implement sender LLM OCR and request remarks
This commit is contained in:
parent
d5477a1e1a
commit
674a7ec592
11 changed files with 174 additions and 62 deletions
|
@ -69,8 +69,8 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
with fitz.open(file_path) as doc:
|
with fitz.open(file_path) as doc:
|
||||||
num_pages = len(doc)
|
num_pages = len(doc)
|
||||||
|
|
||||||
for page_num in range(num_pages):
|
# Perform OCR only on the first page
|
||||||
page = doc[page_num]
|
page = doc[0]
|
||||||
pix = page.get_pixmap(matrix=(1.2, 1.2))
|
pix = page.get_pixmap(matrix=(1.2, 1.2))
|
||||||
|
|
||||||
# Convert pixmap to bytes
|
# Convert pixmap to bytes
|
||||||
|
@ -89,6 +89,7 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
try:
|
try:
|
||||||
class DocumentCategory(BaseModel):
|
class DocumentCategory(BaseModel):
|
||||||
category: str = "other"
|
category: str = "other"
|
||||||
|
sent_from: str = "N/A"
|
||||||
explanation: Optional[str] = None
|
explanation: Optional[str] = None
|
||||||
|
|
||||||
client = Client(
|
client = Client(
|
||||||
|
@ -101,11 +102,14 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
img_buffer.getvalue()).decode()
|
img_buffer.getvalue()).decode()
|
||||||
|
|
||||||
possible_categories = set((Document.objects.all().values_list(
|
possible_categories = set((Document.objects.all().values_list(
|
||||||
"document_type", flat=True), "Documented Procedures Manual", "Form", "Special Order"))
|
"document_type", flat=True), "Documented Procedures Manual", "Form", "Special Order", "Memorandum"))
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
Read the text from the image and provide a category. Return as JSON.
|
Read the text from the image and provide a category. Return as JSON.
|
||||||
|
|
||||||
Possible categories are: {possible_categories}. You are free to create a new one if none are suitable.
|
Possible categories are: {possible_categories}. You are free to create a new one if none are suitable.
|
||||||
|
|
||||||
|
If the document is of type Special Order or Memorandum, provide the sender of the document. Possible senders are Vice President, President, Chancellor.
|
||||||
|
provide N/A.
|
||||||
"""
|
"""
|
||||||
response = client.chat(
|
response = client.chat(
|
||||||
model=get_secret("OLLAMA_MODEL"),
|
model=get_secret("OLLAMA_MODEL"),
|
||||||
|
@ -125,6 +129,7 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
response.message.content)
|
response.message.content)
|
||||||
result = json.loads(response.message.content)
|
result = json.loads(response.message.content)
|
||||||
document_type = result.get("category")
|
document_type = result.get("category")
|
||||||
|
sent_from = result.get("sent_from")
|
||||||
|
|
||||||
# If that fails, just use regular OCR read the title as a dirty fix/fallback
|
# If that fails, just use regular OCR read the title as a dirty fix/fallback
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -158,8 +163,10 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
name=filename, content=File(open(file_path, "rb")))
|
name=filename, content=File(open(file_path, "rb")))
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f"Document '{filename}' created successfully with type '{
|
f"Document '{filename}' created successfully with type '{
|
||||||
document_type}'."
|
document_type}'. sent_from: {sent_from}"
|
||||||
)
|
)
|
||||||
|
DOCUMENT.sent_from = sent_from
|
||||||
|
DOCUMENT.save()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.logger.info(f"Document '{filename}' already exists.")
|
self.logger.info(f"Document '{filename}' already exists.")
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 5.1.3 on 2025-01-08 04:29
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("document_requests", "0002_documentrequest_questionnaire"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="documentrequest",
|
||||||
|
name="denied_remarks",
|
||||||
|
field=models.TextField(blank=True, max_length=512, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 5.1.3 on 2025-01-08 04:51
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("document_requests", "0003_documentrequest_denied_remarks"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RenameField(
|
||||||
|
model_name="documentrequest",
|
||||||
|
old_name="denied_remarks",
|
||||||
|
new_name="remarks",
|
||||||
|
),
|
||||||
|
]
|
|
@ -27,6 +27,8 @@ class DocumentRequest(models.Model):
|
||||||
("denied", "Denied"),
|
("denied", "Denied"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
remarks = models.TextField(max_length=512, blank=True, null=True)
|
||||||
|
|
||||||
status = models.CharField(
|
status = models.CharField(
|
||||||
max_length=32, choices=STATUS_CHOICES, default="pending")
|
max_length=32, choices=STATUS_CHOICES, default="pending")
|
||||||
|
|
||||||
|
|
|
@ -101,6 +101,7 @@ class DocumentRequestSerializer(serializers.ModelSerializer):
|
||||||
"purpose",
|
"purpose",
|
||||||
"date_requested",
|
"date_requested",
|
||||||
"documents",
|
"documents",
|
||||||
|
"remarks",
|
||||||
"status",
|
"status",
|
||||||
]
|
]
|
||||||
read_only_fields = [
|
read_only_fields = [
|
||||||
|
@ -112,6 +113,7 @@ class DocumentRequestSerializer(serializers.ModelSerializer):
|
||||||
"purpose",
|
"purpose",
|
||||||
"date_requested",
|
"date_requested",
|
||||||
"documents",
|
"documents",
|
||||||
|
"remarks,"
|
||||||
"status",
|
"status",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -146,6 +148,7 @@ class FullDocumentRequestSerializer(serializers.ModelSerializer):
|
||||||
"purpose",
|
"purpose",
|
||||||
"date_requested",
|
"date_requested",
|
||||||
"documents",
|
"documents",
|
||||||
|
"remarks",
|
||||||
"status",
|
"status",
|
||||||
]
|
]
|
||||||
read_only_fields = [
|
read_only_fields = [
|
||||||
|
@ -167,27 +170,41 @@ class DocumentRequestUpdateSerializer(serializers.ModelSerializer):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = DocumentRequest
|
model = DocumentRequest
|
||||||
fields = ["id", "status"]
|
fields = ["id", "status", "remarks"]
|
||||||
read_only_fields = ["id", "status"]
|
read_only_fields = ["id"]
|
||||||
|
|
||||||
def update(self, instance, validated_data):
|
def update(self, instance, validated_data):
|
||||||
|
print(validated_data)
|
||||||
if instance.status == "denied" or instance.status == "approved":
|
if instance.status == "denied" or instance.status == "approved":
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(
|
||||||
{
|
{
|
||||||
"error": "Already approved/denied requests cannot be updated. You should instead create a new request and approve it from there"
|
"error": "Already approved/denied requests cannot be updated. You should instead create a new request and approve it from there"
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
elif "status" not in validated_data:
|
||||||
|
raise serializers.ValidationError(
|
||||||
|
{
|
||||||
|
"error": "No status value update provided"
|
||||||
|
}
|
||||||
|
)
|
||||||
elif validated_data["status"] == instance.status:
|
elif validated_data["status"] == instance.status:
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(
|
||||||
{"error": "Request form status provided is the same as current status"}
|
{"error": "Request form status provided is the same as current status"}
|
||||||
)
|
)
|
||||||
|
elif validated_data["status"] == "denied" and "remarks" not in validated_data:
|
||||||
|
raise serializers.ValidationError(
|
||||||
|
{"error": "Request denial requires remarks"}
|
||||||
|
)
|
||||||
representation = super().update(instance, validated_data)
|
representation = super().update(instance, validated_data)
|
||||||
|
|
||||||
# Send an email on request status update
|
# Send an email on request status update
|
||||||
try:
|
try:
|
||||||
email = RequestUpdateEmail()
|
email = RequestUpdateEmail()
|
||||||
email.context = {"request_status": instance.status}
|
email.context = {"request_status": instance.status}
|
||||||
|
if instance.status == "denied":
|
||||||
|
email.context = {"remarks": instance.remarks}
|
||||||
|
else:
|
||||||
|
email.context = {"remarks": "N/A"}
|
||||||
email.send(to=[instance.requester.email])
|
email.send(to=[instance.requester.email])
|
||||||
except:
|
except:
|
||||||
# Silence out errors if email sending fails
|
# Silence out errors if email sending fails
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 5.1.3 on 2025-01-08 04:39
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("documents", "0002_alter_document_document_type"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="document",
|
||||||
|
name="memorandum_from",
|
||||||
|
field=models.CharField(blank=True, max_length=128, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 5.1.3 on 2025-01-08 04:44
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("documents", "0003_document_memorandum_from"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RenameField(
|
||||||
|
model_name="document",
|
||||||
|
old_name="memorandum_from",
|
||||||
|
new_name="sent_from",
|
||||||
|
),
|
||||||
|
]
|
|
@ -9,6 +9,9 @@ class Document(models.Model):
|
||||||
document_type = models.CharField(
|
document_type = models.CharField(
|
||||||
max_length=128, null=False, blank=False
|
max_length=128, null=False, blank=False
|
||||||
)
|
)
|
||||||
|
sent_from = models.CharField(
|
||||||
|
max_length=128, null=True, blank=True
|
||||||
|
)
|
||||||
number_pages = models.IntegerField(null=False, blank=False)
|
number_pages = models.IntegerField(null=False, blank=False)
|
||||||
ocr_metadata = models.TextField(null=True, blank=True)
|
ocr_metadata = models.TextField(null=True, blank=True)
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,7 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||||
"document_type",
|
"document_type",
|
||||||
"number_pages",
|
"number_pages",
|
||||||
"ocr_metadata",
|
"ocr_metadata",
|
||||||
|
"sent_from",
|
||||||
"date_uploaded",
|
"date_uploaded",
|
||||||
]
|
]
|
||||||
read_only_fields = [
|
read_only_fields = [
|
||||||
|
@ -61,6 +62,7 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||||
"document_type",
|
"document_type",
|
||||||
"number_pages",
|
"number_pages",
|
||||||
"ocr_metadata",
|
"ocr_metadata",
|
||||||
|
"sent_from",
|
||||||
"date_uploaded",
|
"date_uploaded",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -81,6 +83,7 @@ class DocumentFileSerializer(serializers.ModelSerializer):
|
||||||
"number_pages",
|
"number_pages",
|
||||||
"ocr_metadata",
|
"ocr_metadata",
|
||||||
"date_uploaded",
|
"date_uploaded",
|
||||||
|
"sent_from",
|
||||||
"file",
|
"file",
|
||||||
]
|
]
|
||||||
read_only_fields = [
|
read_only_fields = [
|
||||||
|
@ -90,5 +93,6 @@ class DocumentFileSerializer(serializers.ModelSerializer):
|
||||||
"number_pages",
|
"number_pages",
|
||||||
"ocr_metadata",
|
"ocr_metadata",
|
||||||
"date_uploaded",
|
"date_uploaded",
|
||||||
|
"sent_from",
|
||||||
"file",
|
"file",
|
||||||
]
|
]
|
||||||
|
|
|
@ -8,6 +8,7 @@ class RequestUpdateEmail(email.BaseEmailMessage):
|
||||||
def get_context_data(self):
|
def get_context_data(self):
|
||||||
context = super().get_context_data()
|
context = super().get_context_data()
|
||||||
context["request_status"] = context.get("request_status")
|
context["request_status"] = context.get("request_status")
|
||||||
|
context["remarks"] = context.get("remarks")
|
||||||
context["url"] = FRONTEND_URL
|
context["url"] = FRONTEND_URL
|
||||||
context.update(self.context)
|
context.update(self.context)
|
||||||
return context
|
return context
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
{% block text_body %}
|
{% block text_body %}
|
||||||
{% blocktrans %}You're receiving this email because your document request has been {{ request_status }}.{% endblocktrans %}
|
{% blocktrans %}You're receiving this email because your document request has been {{ request_status }}.{% endblocktrans %}
|
||||||
|
|
||||||
|
{% blocktrans %}Remarks: {{ remarks }}{% endblocktrans %}
|
||||||
|
|
||||||
{% trans 'Please visit the site to check your request:' %}
|
{% trans 'Please visit the site to check your request:' %}
|
||||||
{{ url|safe }}
|
{{ url|safe }}
|
||||||
|
|
||||||
|
@ -18,6 +20,10 @@
|
||||||
{% blocktrans %}You're receiving this email because your document request has been {{ request_status }}.{% endblocktrans %}
|
{% blocktrans %}You're receiving this email because your document request has been {{ request_status }}.{% endblocktrans %}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
{% blocktrans %}Remarks {{ remarks }}{% endblocktrans %}
|
||||||
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
{% trans 'Please visit the site to check your request:' %}
|
{% trans 'Please visit the site to check your request:' %}
|
||||||
</p>
|
</p>
|
||||||
|
|
Loading…
Reference in a new issue