Add Ollama integration

This commit is contained in:
Keannu Bernasol 2024-12-07 02:44:45 +08:00
parent 9289166c0e
commit f39f5966d6
2 changed files with 87 additions and 9 deletions

View file

@ -20,4 +20,9 @@ ADMIN_EMAIL = 'admin@test.com'
ADMIN_PASSWORD = ''
# To insert test data or not (UNUSED)
TEST_DATA = "True"
TEST_DATA = "True"
# Ollama for Categorization
OLLAMA_URL = ""
OLLAMA_USERNAME = ""
OLLAMA_PASSWORD = ""

View file

@ -1,3 +1,6 @@
from ollama import ChatResponse
import base64
import httpx
from django.core.management.base import BaseCommand, CommandError
from io import BytesIO
@ -10,9 +13,11 @@ from config.settings import MEDIA_ROOT
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from documents.models import Document
from config.settings import get_secret
from django.core.files import File
import logging
import time
from ollama import Client
class PDFHandler(FileSystemEventHandler):
@ -36,8 +41,26 @@ class PDFHandler(FileSystemEventHandler):
def process_pdf(self, file_path):
try:
filename = os.path.basename(file_path)
filename = str(filename).replace(" ", "")
# Get the original filename and directory
original_filename = os.path.basename(file_path)
original_dir = os.path.dirname(file_path)
# Check if the filename contains spaces
if " " in original_filename:
# Create the new filename by replacing spaces
new_filename = original_filename.replace(" ", "_")
# Construct the new full file path
new_file_path = os.path.join(original_dir, new_filename)
# Rename the file
os.rename(file_path, new_file_path)
# Update the filename and file_path variables
filename = new_filename
file_path = new_file_path
else:
filename = original_filename
metadata = ""
document_type = ""
@ -60,14 +83,64 @@ class PDFHandler(FileSystemEventHandler):
# Perform OCR
text = pytesseract.image_to_string(img).strip()
lines = text.split("\n")
# Get document category
# Try to pass image to the Ollama image recognition API first
try:
client = Client(
host=get_secret("OLLAMA_URL"),
auth=httpx.BasicAuth(
username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD"))
)
for line in lines:
if line.strip():
document_type = line.strip().lower()
encoded_image = base64.b64encode(
img_buffer.getvalue()).decode()
attempts = 0
while True:
if attempts >= 3:
raise Exception(
"Unable to categorize using Ollama API")
attempts += 1
content = f"""
Read the text from the image and provide a category.
Possible categories are: Announcement, Manual, Form
Respond only with the category. No explanations are necessary.
"""
response: ChatResponse = client.chat(
model="llama3.2-vision",
messages=[
{"role": "user", "content": content,
"images": [encoded_image]},
],
)
document_type = response["message"]["content"].split(":")[
0].replace("*", "").replace(".", "")
# A few safety checks if the model does not follow through with output instructions
if len(document_type) > 16:
self.logger.warning(
f"Ollama API gave incorrect document category: {response["message"]["content"]}. Retrying...")
break
if not document_type:
document_type = "other"
# If that fails, just use regular OCR read the title as a dirty fix/fallback
except Exception as e:
self.logger.warning(f"Error! {e}")
self.logger.warning(
"Ollama OCR offloading failed. Falling back to default OCR")
lines = text.split("\n")
for line in lines:
if line.strip():
document_type = line.strip().lower()
break
if not document_type:
document_type = "other"
metadata += text