mirror of
https://github.com/lemeow125/DocManagerBackend.git
synced 2025-04-20 08:51:31 +08:00
Compare commits
No commits in common. "37f656e3a8f605bfd3e969d9f6b5ba371f1cade8" and "9b78fdd9ae325010f0ab9af41691dd0b5e8e3647" have entirely different histories.
37f656e3a8
...
9b78fdd9ae
10 changed files with 11 additions and 113 deletions
|
@ -1,10 +0,0 @@
|
||||||
media/
|
|
||||||
static/
|
|
||||||
documentation/
|
|
||||||
.env
|
|
||||||
.venv/
|
|
||||||
.vscode/
|
|
||||||
.git/
|
|
||||||
.gitignore
|
|
||||||
.woodpecker/
|
|
||||||
**/__pycache__/
|
|
|
@ -10,7 +10,7 @@ COPY scripts/ /app/scripts/
|
||||||
RUN chmod +x /app/scripts/start.sh
|
RUN chmod +x /app/scripts/start.sh
|
||||||
|
|
||||||
# Install packages
|
# Install packages
|
||||||
RUN apt update && apt install -y graphviz libgraphviz-dev graphviz-dev tesseract-ocr
|
RUN apt update && apt install -y graphviz libgraphviz-dev graphviz-dev
|
||||||
RUN pip3 install --upgrade pip && pip3 install --no-cache-dir -r requirements.txt
|
RUN pip3 install --upgrade pip && pip3 install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Expose port 8000 for the web server
|
# Expose port 8000 for the web server
|
||||||
|
|
|
@ -4,7 +4,3 @@ from django.apps import AppConfig
|
||||||
class DocumentsConfig(AppConfig):
|
class DocumentsConfig(AppConfig):
|
||||||
default_auto_field = "django.db.models.BigAutoField"
|
default_auto_field = "django.db.models.BigAutoField"
|
||||||
name = "documents"
|
name = "documents"
|
||||||
|
|
||||||
def ready(self) -> None:
|
|
||||||
import documents.signals
|
|
||||||
return super().ready()
|
|
||||||
|
|
|
@ -1,25 +0,0 @@
|
||||||
# Generated by Django 5.1.3 on 2024-11-24 05:17
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
("documents", "0001_initial"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.AddField(
|
|
||||||
model_name="document",
|
|
||||||
name="metadata",
|
|
||||||
field=models.TextField(null=True),
|
|
||||||
),
|
|
||||||
migrations.AlterField(
|
|
||||||
model_name="document",
|
|
||||||
name="document_type",
|
|
||||||
field=models.CharField(
|
|
||||||
choices=[("memorandum", "Memorandum"), ("hoa", "HOA")], max_length=32
|
|
||||||
),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -1,22 +0,0 @@
|
||||||
# Generated by Django 5.1.3 on 2024-11-24 06:04
|
|
||||||
|
|
||||||
from django.db import migrations, models
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
("documents", "0002_document_metadata_alter_document_document_type"),
|
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
|
||||||
migrations.RemoveField(
|
|
||||||
model_name="document",
|
|
||||||
name="metadata",
|
|
||||||
),
|
|
||||||
migrations.AddField(
|
|
||||||
model_name="document",
|
|
||||||
name="ocr_metadata",
|
|
||||||
field=models.TextField(blank=True, null=True),
|
|
||||||
),
|
|
||||||
]
|
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils.timezone import now
|
from django.utils.timezone import now
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -8,15 +7,17 @@ class Document(models.Model):
|
||||||
name = models.CharField(max_length=100)
|
name = models.CharField(max_length=100)
|
||||||
|
|
||||||
DOCUMENT_TYPE_CHOICES = (
|
DOCUMENT_TYPE_CHOICES = (
|
||||||
("memorandum", "Memorandum"),
|
("pdf", "PDF"),
|
||||||
("hoa", "HOA"),
|
("image", "Image"),
|
||||||
# TODO: Update this list on types of documents
|
("video", "Video"),
|
||||||
|
("doc", "Word Document"),
|
||||||
|
("excel", "Excel Document"),
|
||||||
|
("ppt", "Powerpoint Document"),
|
||||||
)
|
)
|
||||||
document_type = models.CharField(
|
document_type = models.CharField(
|
||||||
max_length=32, choices=DOCUMENT_TYPE_CHOICES, null=False, blank=False
|
max_length=32, choices=DOCUMENT_TYPE_CHOICES, null=False, blank=False
|
||||||
)
|
)
|
||||||
number_pages = models.IntegerField(null=False, blank=False)
|
number_pages = models.IntegerField(null=False, blank=False)
|
||||||
ocr_metadata = models.TextField(null=True, blank=True)
|
|
||||||
|
|
||||||
def upload_to(instance, filename):
|
def upload_to(instance, filename):
|
||||||
_, extension = filename.split(".")
|
_, extension = filename.split(".")
|
||||||
|
|
|
@ -36,13 +36,12 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Document
|
model = Document
|
||||||
fields = ["id", "name", "document_type",
|
fields = ["id", "name", "document_type",
|
||||||
"number_pages", "ocr_metadata", "date_uploaded"]
|
"number_pages", "date_uploaded"]
|
||||||
read_only_fields = [
|
read_only_fields = [
|
||||||
"id",
|
"id",
|
||||||
"name",
|
"name",
|
||||||
"document_type",
|
"document_type",
|
||||||
"number_pages",
|
"number_pages",
|
||||||
"ocr_metadata",
|
|
||||||
"date_uploaded",
|
"date_uploaded",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -60,17 +59,15 @@ class DocumentFileSerializer(serializers.ModelSerializer):
|
||||||
"id",
|
"id",
|
||||||
"name",
|
"name",
|
||||||
"document_type",
|
"document_type",
|
||||||
"number_pages",
|
|
||||||
"ocr_metadata",
|
|
||||||
"date_uploaded",
|
|
||||||
"file",
|
"file",
|
||||||
|
"number_pages",
|
||||||
|
"date_uploaded",
|
||||||
]
|
]
|
||||||
read_only_fields = [
|
read_only_fields = [
|
||||||
"id",
|
"id",
|
||||||
"name",
|
"name",
|
||||||
"document_type",
|
"document_type",
|
||||||
"number_pages",
|
"number_pages",
|
||||||
"ocr_metadata",
|
|
||||||
"date_uploaded",
|
"date_uploaded",
|
||||||
"file",
|
"file",
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
from django.db.models.signals import post_save
|
|
||||||
from django.dispatch import receiver
|
|
||||||
from config.settings import MEDIA_ROOT
|
|
||||||
import os
|
|
||||||
import fitz
|
|
||||||
import pytesseract
|
|
||||||
from PIL import Image
|
|
||||||
from .models import Document
|
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save, sender=Document)
|
|
||||||
def domain_post_save(sender, instance, **kwargs):
|
|
||||||
if not instance.ocr_metadata:
|
|
||||||
metadata = ""
|
|
||||||
with fitz.open(os.path.join(MEDIA_ROOT, instance.file.name)) as doc:
|
|
||||||
mat = fitz.Matrix(1.2, 1.2)
|
|
||||||
for page in doc:
|
|
||||||
pix = page.get_pixmap(matrix=mat)
|
|
||||||
output = f'{page.number}.jpg'
|
|
||||||
pix.save(output)
|
|
||||||
res = str(pytesseract.image_to_string(Image.open(output)))
|
|
||||||
os.remove(output)
|
|
||||||
metadata += res
|
|
||||||
|
|
||||||
instance.ocr_metadata = metadata
|
|
||||||
instance.save()
|
|
Binary file not shown.
Before Width: | Height: | Size: 132 KiB After Width: | Height: | Size: 132 KiB |
|
@ -11,7 +11,6 @@ defusedxml==0.8.0rc2
|
||||||
Django==5.1.3
|
Django==5.1.3
|
||||||
django-cleanup==9.0.0
|
django-cleanup==9.0.0
|
||||||
django-cors-headers==4.6.0
|
django-cors-headers==4.6.0
|
||||||
django-extensions==3.2.3
|
|
||||||
django-rest-framework==0.1.0
|
django-rest-framework==0.1.0
|
||||||
django-unfold==0.41.0
|
django-unfold==0.41.0
|
||||||
djangorestframework==3.15.2
|
djangorestframework==3.15.2
|
||||||
|
@ -19,28 +18,18 @@ djangorestframework-simplejwt==5.3.1
|
||||||
djoser==2.3.1
|
djoser==2.3.1
|
||||||
drf-spectacular==0.27.2
|
drf-spectacular==0.27.2
|
||||||
drf-spectacular-sidecar==2024.11.1
|
drf-spectacular-sidecar==2024.11.1
|
||||||
filelock==3.16.1
|
|
||||||
fsspec==2024.10.0
|
|
||||||
gunicorn==23.0.0
|
gunicorn==23.0.0
|
||||||
idna==3.10
|
idna==3.10
|
||||||
inflection==0.5.1
|
inflection==0.5.1
|
||||||
Jinja2==3.1.4
|
|
||||||
jsonschema==4.23.0
|
jsonschema==4.23.0
|
||||||
jsonschema-specifications==2024.10.1
|
jsonschema-specifications==2024.10.1
|
||||||
MarkupSafe==2.1.5
|
|
||||||
mpmath==1.3.0
|
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
networkx==3.4.2
|
|
||||||
oauthlib==3.2.2
|
oauthlib==3.2.2
|
||||||
packaging==24.2
|
packaging==24.2
|
||||||
pathspec==0.12.1
|
pathspec==0.12.1
|
||||||
pillow==11.0.0
|
|
||||||
platformdirs==4.3.6
|
platformdirs==4.3.6
|
||||||
pycparser==2.22
|
pycparser==2.22
|
||||||
pyflakes==3.2.0
|
|
||||||
PyJWT==2.10.0
|
PyJWT==2.10.0
|
||||||
PyMuPDF==1.24.14
|
|
||||||
pytesseract==0.3.13
|
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
python3-openid==3.2.0
|
python3-openid==3.2.0
|
||||||
PyYAML==6.0.2
|
PyYAML==6.0.2
|
||||||
|
@ -48,14 +37,12 @@ referencing==0.35.1
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
requests-oauthlib==2.0.0
|
requests-oauthlib==2.0.0
|
||||||
rpds-py==0.21.0
|
rpds-py==0.21.0
|
||||||
setuptools==70.2.0
|
|
||||||
social-auth-app-django==5.4.2
|
social-auth-app-django==5.4.2
|
||||||
social-auth-core==4.5.4
|
social-auth-core==4.5.4
|
||||||
sqlparse==0.5.2
|
sqlparse==0.5.2
|
||||||
sympy==1.13.1
|
|
||||||
typing_extensions==4.12.2
|
|
||||||
tzdata==2024.2
|
tzdata==2024.2
|
||||||
uritemplate==4.1.1
|
uritemplate==4.1.1
|
||||||
urllib3==2.2.3
|
urllib3==2.2.3
|
||||||
whitenoise==6.8.2
|
whitenoise==6.8.2
|
||||||
|
django-extensions==3.2.3
|
||||||
pygraphviz==1.14; platform_system == 'Linux'
|
pygraphviz==1.14; platform_system == 'Linux'
|
Loading…
Add table
Reference in a new issue