mirror of
https://github.com/lemeow125/DocManagerBackend.git
synced 2025-01-19 01:23:02 +08:00
Add Ollama integration
This commit is contained in:
parent
9289166c0e
commit
f39f5966d6
2 changed files with 87 additions and 9 deletions
|
@ -21,3 +21,8 @@ ADMIN_PASSWORD = ''
|
||||||
|
|
||||||
# To insert test data or not (UNUSED)
|
# To insert test data or not (UNUSED)
|
||||||
TEST_DATA = "True"
|
TEST_DATA = "True"
|
||||||
|
|
||||||
|
# Ollama for Categorization
|
||||||
|
OLLAMA_URL = ""
|
||||||
|
OLLAMA_USERNAME = ""
|
||||||
|
OLLAMA_PASSWORD = ""
|
|
@ -1,3 +1,6 @@
|
||||||
|
from ollama import ChatResponse
|
||||||
|
import base64
|
||||||
|
import httpx
|
||||||
from django.core.management.base import BaseCommand, CommandError
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
@ -10,9 +13,11 @@ from config.settings import MEDIA_ROOT
|
||||||
from watchdog.observers import Observer
|
from watchdog.observers import Observer
|
||||||
from watchdog.events import FileSystemEventHandler
|
from watchdog.events import FileSystemEventHandler
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from config.settings import get_secret
|
||||||
from django.core.files import File
|
from django.core.files import File
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
from ollama import Client
|
||||||
|
|
||||||
|
|
||||||
class PDFHandler(FileSystemEventHandler):
|
class PDFHandler(FileSystemEventHandler):
|
||||||
|
@ -36,8 +41,26 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
|
|
||||||
def process_pdf(self, file_path):
|
def process_pdf(self, file_path):
|
||||||
try:
|
try:
|
||||||
filename = os.path.basename(file_path)
|
# Get the original filename and directory
|
||||||
filename = str(filename).replace(" ", "")
|
original_filename = os.path.basename(file_path)
|
||||||
|
original_dir = os.path.dirname(file_path)
|
||||||
|
|
||||||
|
# Check if the filename contains spaces
|
||||||
|
if " " in original_filename:
|
||||||
|
# Create the new filename by replacing spaces
|
||||||
|
new_filename = original_filename.replace(" ", "_")
|
||||||
|
|
||||||
|
# Construct the new full file path
|
||||||
|
new_file_path = os.path.join(original_dir, new_filename)
|
||||||
|
|
||||||
|
# Rename the file
|
||||||
|
os.rename(file_path, new_file_path)
|
||||||
|
|
||||||
|
# Update the filename and file_path variables
|
||||||
|
filename = new_filename
|
||||||
|
file_path = new_file_path
|
||||||
|
else:
|
||||||
|
filename = original_filename
|
||||||
metadata = ""
|
metadata = ""
|
||||||
document_type = ""
|
document_type = ""
|
||||||
|
|
||||||
|
@ -60,12 +83,62 @@ class PDFHandler(FileSystemEventHandler):
|
||||||
# Perform OCR
|
# Perform OCR
|
||||||
text = pytesseract.image_to_string(img).strip()
|
text = pytesseract.image_to_string(img).strip()
|
||||||
|
|
||||||
|
# Get document category
|
||||||
|
# Try to pass image to the Ollama image recognition API first
|
||||||
|
try:
|
||||||
|
client = Client(
|
||||||
|
host=get_secret("OLLAMA_URL"),
|
||||||
|
auth=httpx.BasicAuth(
|
||||||
|
username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD"))
|
||||||
|
)
|
||||||
|
|
||||||
|
encoded_image = base64.b64encode(
|
||||||
|
img_buffer.getvalue()).decode()
|
||||||
|
|
||||||
|
attempts = 0
|
||||||
|
while True:
|
||||||
|
if attempts >= 3:
|
||||||
|
raise Exception(
|
||||||
|
"Unable to categorize using Ollama API")
|
||||||
|
attempts += 1
|
||||||
|
|
||||||
|
content = f"""
|
||||||
|
Read the text from the image and provide a category.
|
||||||
|
|
||||||
|
Possible categories are: Announcement, Manual, Form
|
||||||
|
|
||||||
|
Respond only with the category. No explanations are necessary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
response: ChatResponse = client.chat(
|
||||||
|
model="llama3.2-vision",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": content,
|
||||||
|
"images": [encoded_image]},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
document_type = response["message"]["content"].split(":")[
|
||||||
|
0].replace("*", "").replace(".", "")
|
||||||
|
|
||||||
|
# A few safety checks if the model does not follow through with output instructions
|
||||||
|
if len(document_type) > 16:
|
||||||
|
self.logger.warning(
|
||||||
|
f"Ollama API gave incorrect document category: {response["message"]["content"]}. Retrying...")
|
||||||
|
break
|
||||||
|
|
||||||
|
# If that fails, just use regular OCR read the title as a dirty fix/fallback
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Error! {e}")
|
||||||
|
self.logger.warning(
|
||||||
|
"Ollama OCR offloading failed. Falling back to default OCR")
|
||||||
lines = text.split("\n")
|
lines = text.split("\n")
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if line.strip():
|
if line.strip():
|
||||||
document_type = line.strip().lower()
|
document_type = line.strip().lower()
|
||||||
break
|
break
|
||||||
|
|
||||||
if not document_type:
|
if not document_type:
|
||||||
document_type = "other"
|
document_type = "other"
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue