diff --git a/.env.sample b/.env.sample index 72470fd..362394d 100644 --- a/.env.sample +++ b/.env.sample @@ -20,11 +20,4 @@ ADMIN_EMAIL = 'admin@test.com' ADMIN_PASSWORD = '' # To insert test data or not (UNUSED) -TEST_DATA = "True" - -# Ollama for Categorization -OLLAMA_URL = "localhost:11434" -OLLAMA_USE_AUTH = "False" -OLLAMA_MODEL = "knoopx/mobile-vlm:3b-fp16" -OLLAMA_USERNAME = "" -OLLAMA_PASSWORD = "" \ No newline at end of file +TEST_DATA = "True" \ No newline at end of file diff --git a/docmanager_backend/accounts/permissions.py b/docmanager_backend/accounts/permissions.py index e2be857..5e45e97 100644 --- a/docmanager_backend/accounts/permissions.py +++ b/docmanager_backend/accounts/permissions.py @@ -8,8 +8,7 @@ class IsStaff(BasePermission): def has_permission(self, request, view): return bool( - request.user and request.user.role in ( - "head", "admin", "planning", "staff") + request.user and request.user.role in ("head", "admin", "planning", "staff") ) @@ -19,7 +18,7 @@ class IsPlanning(BasePermission): """ def has_permission(self, request, view): - return bool(request.user and request.user.role == "planning") + return bool(request.user and request.user.role in ("head", "admin", "planning")) class IsHead(BasePermission): diff --git a/docmanager_backend/accounts/serializers.py b/docmanager_backend/accounts/serializers.py index 80384a0..32ede3c 100644 --- a/docmanager_backend/accounts/serializers.py +++ b/docmanager_backend/accounts/serializers.py @@ -6,7 +6,7 @@ from rest_framework.settings import api_settings class CustomUserSerializer(serializers.ModelSerializer): - birthday = serializers.DateField(format="%Y-%m-%d") + birthday = serializers.DateField(format="%m-%d-%Y") class Meta: model = CustomUser diff --git a/docmanager_backend/accounts/signals.py b/docmanager_backend/accounts/signals.py index 6baddc5..bb53c71 100644 --- a/docmanager_backend/accounts/signals.py +++ b/docmanager_backend/accounts/signals.py @@ -10,48 +10,19 @@ from .models import CustomUser def create_admin_user(sender, **kwargs): # Programatically creates the administrator account if sender.name == "accounts": - users = [{ - "email": get_secret("ADMIN_EMAIL"), - "role": "head", - "admin": True, - }, { - "email": "staff@test.com", - "role": "staff", - "admin": False, - }, { - "email": "planning@test.com", - "role": "planning", - "admin": False, - }, { - "email": "client@test.com", - "role": "client", - "admin": False, - },] - for user in users: - USER = CustomUser.objects.filter( - email=user["email"]).first() - if not USER: - if user["admin"]: - USER = CustomUser.objects.create_superuser( - username=user["email"], - email=user["email"], - password=get_secret("ADMIN_PASSWORD"), - sex="male", - birthday=localdate(now()), - role=user["role"] - ) - else: - USER = CustomUser.objects.create_user( - username=user["email"], - email=user["email"], - password=get_secret("ADMIN_PASSWORD"), - sex="male", - birthday=localdate(now()), - role=user["role"] + ADMIN_USER = CustomUser.objects.filter( + email=get_secret("ADMIN_EMAIL")).first() + if not ADMIN_USER: + ADMIN_USER = CustomUser.objects.create_superuser( + username=get_secret("ADMIN_EMAIL"), + email=get_secret("ADMIN_EMAIL"), + password=get_secret("ADMIN_PASSWORD"), + sex="male", + birthday=localdate(now()), + ) - ) - print(f"Created {user['role']} account: {USER.email}") + print("Created administrator account:", ADMIN_USER.email) - USER.first_name = f"DEBUG_USER:{USER.email}" - USER.is_active = True - USER.save() + ADMIN_USER.first_name = "Administrator" + ADMIN_USER.is_active = True + ADMIN_USER.save() diff --git a/docmanager_backend/config/management/commands/start_watcher.py b/docmanager_backend/config/management/commands/start_watcher.py index 2fe93fe..656afcb 100644 --- a/docmanager_backend/config/management/commands/start_watcher.py +++ b/docmanager_backend/config/management/commands/start_watcher.py @@ -1,7 +1,4 @@ -from ollama import ChatResponse -import base64 -import httpx -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from io import BytesIO from documents.models import Document @@ -13,11 +10,9 @@ from config.settings import MEDIA_ROOT from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler from documents.models import Document -from config.settings import get_secret from django.core.files import File import logging import time -from ollama import Client class PDFHandler(FileSystemEventHandler): @@ -41,26 +36,8 @@ class PDFHandler(FileSystemEventHandler): def process_pdf(self, file_path): try: - # Get the original filename and directory - original_filename = os.path.basename(file_path) - original_dir = os.path.dirname(file_path) - - # Check if the filename contains spaces - if " " in original_filename: - # Create the new filename by replacing spaces - new_filename = original_filename.replace(" ", "_") - - # Construct the new full file path - new_file_path = os.path.join(original_dir, new_filename) - - # Rename the file - os.rename(file_path, new_file_path) - - # Update the filename and file_path variables - filename = new_filename - file_path = new_file_path - else: - filename = original_filename + filename = os.path.basename(file_path) + filename = str(filename).replace(" ", "") metadata = "" document_type = "" @@ -83,70 +60,20 @@ class PDFHandler(FileSystemEventHandler): # Perform OCR text = pytesseract.image_to_string(img).strip() - # Get document category - # Try to pass image to the Ollama image recognition API first - try: - client = Client( - host=get_secret("OLLAMA_URL"), - auth=httpx.BasicAuth( - username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD")) if get_secret("OLLAMA_USE_AUTH") else None - ) + lines = text.split("\n") - encoded_image = base64.b64encode( - img_buffer.getvalue()).decode() - - attempts = 0 - while True: - if attempts >= 3: - raise Exception( - "Unable to categorize using Ollama API") - attempts += 1 - - content = f""" - Read the text from the image and provide a category. - - Possible categories are: Announcement, Manual, Form - - Respond only with the category. No explanations are necessary. - """ - - response: ChatResponse = client.chat( - model=get_secret("OLLAMA_MODEL"), - messages=[ - {"role": "user", "content": content, - "images": [encoded_image]}, - ], - ) - - document_type = response["message"]["content"].replace( - "*", "").replace(".", "") - - # A few safety checks if the model does not follow through with output instructions - if len(document_type) > 16: - self.logger.warning( - f"Ollama API gave incorrect document category: {response['message']['content']}. Retrying...") + for line in lines: + if line.strip(): + document_type = line.strip().lower() break - - # If that fails, just use regular OCR read the title as a dirty fix/fallback - except Exception as e: - self.logger.warning(f"Error! {e}") - self.logger.warning( - "Ollama OCR offloading failed. Falling back to default OCR") - lines = text.split("\n") - - for line in lines: - if line.strip(): - document_type = line.strip().lower() - break - - if not document_type: - document_type = "other" + if not document_type: + document_type = "other" metadata += text # Open the file for instance creation DOCUMENT, created = Document.objects.get_or_create( - name=filename.replace(".pdf", ""), + name=filename, defaults={ "number_pages": num_pages, "ocr_metadata": metadata, @@ -158,7 +85,8 @@ class PDFHandler(FileSystemEventHandler): DOCUMENT.file.save( name=filename, content=File(open(file_path, "rb"))) self.logger.info( - f"Document '{filename}' created successfully with type '{document_type}'." + f"Document '{filename}' created successfully with type '{ + document_type}'." ) else: diff --git a/docmanager_backend/documents/admin.py b/docmanager_backend/documents/admin.py index 356fe3e..4e56f79 100644 --- a/docmanager_backend/documents/admin.py +++ b/docmanager_backend/documents/admin.py @@ -8,4 +8,4 @@ from .models import Document class DocumentAdmin(ModelAdmin): model = Document search_fields = ["id", "name", "document_type"] - list_display = ["id", "name", "document_type", "date_uploaded"] + list_display = ["id", "name", "document_type"] diff --git a/docmanager_backend/documents/views.py b/docmanager_backend/documents/views.py index 9b7d1f1..0bcb527 100644 --- a/docmanager_backend/documents/views.py +++ b/docmanager_backend/documents/views.py @@ -51,7 +51,7 @@ class DocumentListView(generics.ListAPIView): http_method_names = ["get"] serializer_class = DocumentSerializer - queryset = Document.objects.all().order_by("-date_uploaded") + queryset = Document.objects.all() pagination_class = PageNumberPagination permission_classes = [IsAuthenticated] diff --git a/docmanager_backend/emails/templates/request_approved.html b/docmanager_backend/emails/templates/request_approved.html index 2b74432..783c865 100644 --- a/docmanager_backend/emails/templates/request_approved.html +++ b/docmanager_backend/emails/templates/request_approved.html @@ -10,7 +10,7 @@ {% trans 'Please visit the site to check your request:' %} {{ url|safe }} - {% trans 'For hardcopy requests, please proceed to the USTP office to avail of your requested copies. Hardcopy requests are valid only within 1 month of requesting.' %} + {% trans 'For hardcopy requests, please proceed to the USTP office to avail of your requested copies:' %} {% endblock %} {% block html_body %} @@ -27,6 +27,6 @@

- {% trans 'For hardcopy requests, please proceed to the USTP office to avail of your requested copies. Hardcopy requests are valid only within 1 month of requesting.' %} + {% trans 'For hardcopy requests, please proceed to the USTP office to avail of your requested copies:' %}

{% endblock %}