Files
natureinpots_community/plugins/media/tasks.py
2025-07-09 01:05:45 -05:00

146 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# File: plugins/media/tasks.py
import os
import shutil
import zipfile
from werkzeug.utils import secure_filename
from PIL import Image, UnidentifiedImageError
from celery.schedules import crontab
from flask import current_app
from app import db
from app.celery_app import celery
from plugins.media.models import Media, ZipJob
# ─── Constants ────────────────────────────────────────────────────────────────
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp'}
DOC_EXTS = {'.pdf', '.txt', '.csv'}
MAX_ZIP_FILES = 1000
MAX_PIXELS = 8000 * 8000
def validate_image(path):
try:
with Image.open(path) as img:
img.verify()
w, h = Image.open(path).size
return (w * h) <= MAX_PIXELS
except (UnidentifiedImageError, IOError):
return False
@celery.task(
bind=True,
name='plugins.media.tasks.process_zip',
queue='media'
)
def process_zip(self, job_id, zip_path):
"""
Unpack and validate a useruploaded ZIP batch.
"""
job = ZipJob.query.get(job_id)
job.status = 'processing'
db.session.commit()
extract_dir = f"{zip_path}_contents"
try:
with zipfile.ZipFile(zip_path) as zf:
names = zf.namelist()
if len(names) > MAX_ZIP_FILES:
raise ValueError('ZIP contains too many files.')
os.makedirs(extract_dir, exist_ok=True)
for member in names:
safe = secure_filename(member)
if safe != member:
raise ValueError(f'Illegal filename {member}')
_, ext = os.path.splitext(safe.lower())
if ext not in IMAGE_EXTS | DOC_EXTS:
raise ValueError(f'Unsupported type {ext}')
target = os.path.join(extract_dir, safe)
with zf.open(member) as src, open(target, 'wb') as dst:
dst.write(src.read())
if ext in IMAGE_EXTS:
if not validate_image(target):
raise ValueError(f'Bad image: {member}')
elif ext == '.pdf':
with open(target, 'rb') as f:
header = f.read(5)
if header != b'%PDF-':
raise ValueError(f'Bad PDF: {member}')
else:
with open(target, 'rb') as f:
f.read(1024).decode('utf-8')
job.status = 'done'
except Exception as e:
job.status = 'failed'
job.error = str(e)
finally:
db.session.commit()
if os.path.isdir(extract_dir):
shutil.rmtree(extract_dir)
@celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
"""
Schedule periodic media prune job every day at 2am.
"""
sender.add_periodic_task(
crontab(hour=2, minute=0),
prune_orphans.s(),
name='media_prune',
queue='media'
)
@celery.task(
name='plugins.media.tasks.prune_orphans',
queue='media'
)
def prune_orphans():
"""
Mark orphaned Media records, move their files to /static/orphaned/,
and log the change in the DB.
"""
orphan_dir = os.path.join(current_app.root_path, 'static', 'orphaned')
os.makedirs(orphan_dir, exist_ok=True)
candidates = Media.query.filter(
Media.status == 'active',
Media.plant_id.is_(None),
Media.growlog_id.is_(None),
Media.related_id.is_(None)
).all()
for m in candidates:
src_rel = m.file_url.lstrip('/')
src_abs = os.path.join(current_app.root_path, src_rel)
if not os.path.isfile(src_abs):
current_app.logger.warning(f"Orphan prune: file not found {src_abs}")
continue
filename = os.path.basename(src_abs)
dest_abs = os.path.join(orphan_dir, filename)
shutil.move(src_abs, dest_abs)
new_url = f"/static/orphaned/{filename}"
m.mark_orphaned(new_url)
current_app.logger.info(
f"Orphaned media #{m.id}: moved {src_rel}{new_url}"
)
db.session.commit()
def init_media_tasks(celery_app):
"""
Called by the JSONdriven loader so tasks_init no longer errors.
Celery scheduling is handled via on_after_configure.
"""
celery_app.logger.info("[Media] init_media_tasks called (noop)")