# File: plugins/media/tasks.py import os import shutil import zipfile from werkzeug.utils import secure_filename from PIL import Image, UnidentifiedImageError from celery.schedules import crontab from flask import current_app from app import db from app.celery_app import celery from plugins.media.models import Media, ZipJob # ─── Constants ──────────────────────────────────────────────────────────────── IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp'} DOC_EXTS = {'.pdf', '.txt', '.csv'} MAX_ZIP_FILES = 1000 MAX_PIXELS = 8000 * 8000 def validate_image(path): try: with Image.open(path) as img: img.verify() w, h = Image.open(path).size return (w * h) <= MAX_PIXELS except (UnidentifiedImageError, IOError): return False @celery.task( bind=True, name='plugins.media.tasks.process_zip', queue='media' ) def process_zip(self, job_id, zip_path): """ Unpack and validate a user‐uploaded ZIP batch. """ job = ZipJob.query.get(job_id) job.status = 'processing' db.session.commit() extract_dir = f"{zip_path}_contents" try: with zipfile.ZipFile(zip_path) as zf: names = zf.namelist() if len(names) > MAX_ZIP_FILES: raise ValueError('ZIP contains too many files.') os.makedirs(extract_dir, exist_ok=True) for member in names: safe = secure_filename(member) if safe != member: raise ValueError(f'Illegal filename {member}') _, ext = os.path.splitext(safe.lower()) if ext not in IMAGE_EXTS | DOC_EXTS: raise ValueError(f'Unsupported type {ext}') target = os.path.join(extract_dir, safe) with zf.open(member) as src, open(target, 'wb') as dst: dst.write(src.read()) if ext in IMAGE_EXTS: if not validate_image(target): raise ValueError(f'Bad image: {member}') elif ext == '.pdf': with open(target, 'rb') as f: header = f.read(5) if header != b'%PDF-': raise ValueError(f'Bad PDF: {member}') else: with open(target, 'rb') as f: f.read(1024).decode('utf-8') job.status = 'done' except Exception as e: job.status = 'failed' job.error = str(e) finally: db.session.commit() if os.path.isdir(extract_dir): shutil.rmtree(extract_dir) @celery.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): """ Schedule periodic media prune job every day at 2am. """ sender.add_periodic_task( crontab(hour=2, minute=0), prune_orphans.s(), name='media_prune', queue='media' ) @celery.task( name='plugins.media.tasks.prune_orphans', queue='media' ) def prune_orphans(): """ Mark orphaned Media records, move their files to /static/orphaned/, and log the change in the DB. """ orphan_dir = os.path.join(current_app.root_path, 'static', 'orphaned') os.makedirs(orphan_dir, exist_ok=True) candidates = Media.query.filter( Media.status == 'active', Media.plant_id.is_(None), Media.growlog_id.is_(None), Media.related_id.is_(None) ).all() for m in candidates: src_rel = m.file_url.lstrip('/') src_abs = os.path.join(current_app.root_path, src_rel) if not os.path.isfile(src_abs): current_app.logger.warning(f"Orphan prune: file not found {src_abs}") continue filename = os.path.basename(src_abs) dest_abs = os.path.join(orphan_dir, filename) shutil.move(src_abs, dest_abs) new_url = f"/static/orphaned/{filename}" m.mark_orphaned(new_url) current_app.logger.info( f"Orphaned media #{m.id}: moved {src_rel} → {new_url}" ) db.session.commit() def init_media_tasks(celery_app): """ Called by the JSON‐driven loader so tasks_init no longer errors. Celery scheduling is handled via on_after_configure. """ celery_app.logger.info("[Media] init_media_tasks called (no‐op)")