Add Ollama integration

This commit is contained in:
Keannu Bernasol 2024-12-07 02:44:45 +08:00
parent 9289166c0e
commit f39f5966d6
2 changed files with 87 additions and 9 deletions

View file

@ -21,3 +21,8 @@ ADMIN_PASSWORD = ''
# To insert test data or not (UNUSED) # To insert test data or not (UNUSED)
TEST_DATA = "True" TEST_DATA = "True"
# Ollama for Categorization
OLLAMA_URL = ""
OLLAMA_USERNAME = ""
OLLAMA_PASSWORD = ""

View file

@ -1,3 +1,6 @@
from ollama import ChatResponse
import base64
import httpx
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from io import BytesIO from io import BytesIO
@ -10,9 +13,11 @@ from config.settings import MEDIA_ROOT
from watchdog.observers import Observer from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
from documents.models import Document from documents.models import Document
from config.settings import get_secret
from django.core.files import File from django.core.files import File
import logging import logging
import time import time
from ollama import Client
class PDFHandler(FileSystemEventHandler): class PDFHandler(FileSystemEventHandler):
@ -36,8 +41,26 @@ class PDFHandler(FileSystemEventHandler):
def process_pdf(self, file_path): def process_pdf(self, file_path):
try: try:
filename = os.path.basename(file_path) # Get the original filename and directory
filename = str(filename).replace(" ", "") original_filename = os.path.basename(file_path)
original_dir = os.path.dirname(file_path)
# Check if the filename contains spaces
if " " in original_filename:
# Create the new filename by replacing spaces
new_filename = original_filename.replace(" ", "_")
# Construct the new full file path
new_file_path = os.path.join(original_dir, new_filename)
# Rename the file
os.rename(file_path, new_file_path)
# Update the filename and file_path variables
filename = new_filename
file_path = new_file_path
else:
filename = original_filename
metadata = "" metadata = ""
document_type = "" document_type = ""
@ -60,12 +83,62 @@ class PDFHandler(FileSystemEventHandler):
# Perform OCR # Perform OCR
text = pytesseract.image_to_string(img).strip() text = pytesseract.image_to_string(img).strip()
# Get document category
# Try to pass image to the Ollama image recognition API first
try:
client = Client(
host=get_secret("OLLAMA_URL"),
auth=httpx.BasicAuth(
username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD"))
)
encoded_image = base64.b64encode(
img_buffer.getvalue()).decode()
attempts = 0
while True:
if attempts >= 3:
raise Exception(
"Unable to categorize using Ollama API")
attempts += 1
content = f"""
Read the text from the image and provide a category.
Possible categories are: Announcement, Manual, Form
Respond only with the category. No explanations are necessary.
"""
response: ChatResponse = client.chat(
model="llama3.2-vision",
messages=[
{"role": "user", "content": content,
"images": [encoded_image]},
],
)
document_type = response["message"]["content"].split(":")[
0].replace("*", "").replace(".", "")
# A few safety checks if the model does not follow through with output instructions
if len(document_type) > 16:
self.logger.warning(
f"Ollama API gave incorrect document category: {response["message"]["content"]}. Retrying...")
break
# If that fails, just use regular OCR read the title as a dirty fix/fallback
except Exception as e:
self.logger.warning(f"Error! {e}")
self.logger.warning(
"Ollama OCR offloading failed. Falling back to default OCR")
lines = text.split("\n") lines = text.split("\n")
for line in lines: for line in lines:
if line.strip(): if line.strip():
document_type = line.strip().lower() document_type = line.strip().lower()
break break
if not document_type: if not document_type:
document_type = "other" document_type = "other"