mirror of
https://github.com/lemeow125/DocManagerBackend.git
synced 2025-01-18 17:13:00 +08:00
Add Ollama integration
This commit is contained in:
parent
9289166c0e
commit
f39f5966d6
2 changed files with 87 additions and 9 deletions
|
@ -21,3 +21,8 @@ ADMIN_PASSWORD = ''
|
|||
|
||||
# To insert test data or not (UNUSED)
|
||||
TEST_DATA = "True"
|
||||
|
||||
# Ollama for Categorization
|
||||
OLLAMA_URL = ""
|
||||
OLLAMA_USERNAME = ""
|
||||
OLLAMA_PASSWORD = ""
|
|
@ -1,3 +1,6 @@
|
|||
from ollama import ChatResponse
|
||||
import base64
|
||||
import httpx
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from io import BytesIO
|
||||
|
@ -10,9 +13,11 @@ from config.settings import MEDIA_ROOT
|
|||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from documents.models import Document
|
||||
from config.settings import get_secret
|
||||
from django.core.files import File
|
||||
import logging
|
||||
import time
|
||||
from ollama import Client
|
||||
|
||||
|
||||
class PDFHandler(FileSystemEventHandler):
|
||||
|
@ -36,8 +41,26 @@ class PDFHandler(FileSystemEventHandler):
|
|||
|
||||
def process_pdf(self, file_path):
|
||||
try:
|
||||
filename = os.path.basename(file_path)
|
||||
filename = str(filename).replace(" ", "")
|
||||
# Get the original filename and directory
|
||||
original_filename = os.path.basename(file_path)
|
||||
original_dir = os.path.dirname(file_path)
|
||||
|
||||
# Check if the filename contains spaces
|
||||
if " " in original_filename:
|
||||
# Create the new filename by replacing spaces
|
||||
new_filename = original_filename.replace(" ", "_")
|
||||
|
||||
# Construct the new full file path
|
||||
new_file_path = os.path.join(original_dir, new_filename)
|
||||
|
||||
# Rename the file
|
||||
os.rename(file_path, new_file_path)
|
||||
|
||||
# Update the filename and file_path variables
|
||||
filename = new_filename
|
||||
file_path = new_file_path
|
||||
else:
|
||||
filename = original_filename
|
||||
metadata = ""
|
||||
document_type = ""
|
||||
|
||||
|
@ -60,14 +83,64 @@ class PDFHandler(FileSystemEventHandler):
|
|||
# Perform OCR
|
||||
text = pytesseract.image_to_string(img).strip()
|
||||
|
||||
lines = text.split("\n")
|
||||
# Get document category
|
||||
# Try to pass image to the Ollama image recognition API first
|
||||
try:
|
||||
client = Client(
|
||||
host=get_secret("OLLAMA_URL"),
|
||||
auth=httpx.BasicAuth(
|
||||
username=get_secret("OLLAMA_USERNAME"), password=get_secret("OLLAMA_PASSWORD"))
|
||||
)
|
||||
|
||||
for line in lines:
|
||||
if line.strip():
|
||||
document_type = line.strip().lower()
|
||||
encoded_image = base64.b64encode(
|
||||
img_buffer.getvalue()).decode()
|
||||
|
||||
attempts = 0
|
||||
while True:
|
||||
if attempts >= 3:
|
||||
raise Exception(
|
||||
"Unable to categorize using Ollama API")
|
||||
attempts += 1
|
||||
|
||||
content = f"""
|
||||
Read the text from the image and provide a category.
|
||||
|
||||
Possible categories are: Announcement, Manual, Form
|
||||
|
||||
Respond only with the category. No explanations are necessary.
|
||||
"""
|
||||
|
||||
response: ChatResponse = client.chat(
|
||||
model="llama3.2-vision",
|
||||
messages=[
|
||||
{"role": "user", "content": content,
|
||||
"images": [encoded_image]},
|
||||
],
|
||||
)
|
||||
|
||||
document_type = response["message"]["content"].split(":")[
|
||||
0].replace("*", "").replace(".", "")
|
||||
|
||||
# A few safety checks if the model does not follow through with output instructions
|
||||
if len(document_type) > 16:
|
||||
self.logger.warning(
|
||||
f"Ollama API gave incorrect document category: {response["message"]["content"]}. Retrying...")
|
||||
break
|
||||
if not document_type:
|
||||
document_type = "other"
|
||||
|
||||
# If that fails, just use regular OCR read the title as a dirty fix/fallback
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error! {e}")
|
||||
self.logger.warning(
|
||||
"Ollama OCR offloading failed. Falling back to default OCR")
|
||||
lines = text.split("\n")
|
||||
|
||||
for line in lines:
|
||||
if line.strip():
|
||||
document_type = line.strip().lower()
|
||||
break
|
||||
|
||||
if not document_type:
|
||||
document_type = "other"
|
||||
|
||||
metadata += text
|
||||
|
||||
|
|
Loading…
Reference in a new issue