146 lines
4.4 KiB
Python
146 lines
4.4 KiB
Python
# File: plugins/media/tasks.py
|
||
|
||
import os
|
||
import shutil
|
||
import zipfile
|
||
from werkzeug.utils import secure_filename
|
||
from PIL import Image, UnidentifiedImageError
|
||
from celery.schedules import crontab
|
||
from flask import current_app
|
||
from app import db
|
||
from app.celery_app import celery
|
||
from plugins.media.models import Media, ZipJob
|
||
|
||
# ─── Constants ────────────────────────────────────────────────────────────────
|
||
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp'}
|
||
DOC_EXTS = {'.pdf', '.txt', '.csv'}
|
||
MAX_ZIP_FILES = 1000
|
||
MAX_PIXELS = 8000 * 8000
|
||
|
||
|
||
def validate_image(path):
|
||
try:
|
||
with Image.open(path) as img:
|
||
img.verify()
|
||
w, h = Image.open(path).size
|
||
return (w * h) <= MAX_PIXELS
|
||
except (UnidentifiedImageError, IOError):
|
||
return False
|
||
|
||
|
||
@celery.task(
|
||
bind=True,
|
||
name='plugins.media.tasks.process_zip',
|
||
queue='media'
|
||
)
|
||
def process_zip(self, job_id, zip_path):
|
||
"""
|
||
Unpack and validate a user‐uploaded ZIP batch.
|
||
"""
|
||
job = ZipJob.query.get(job_id)
|
||
job.status = 'processing'
|
||
db.session.commit()
|
||
|
||
extract_dir = f"{zip_path}_contents"
|
||
try:
|
||
with zipfile.ZipFile(zip_path) as zf:
|
||
names = zf.namelist()
|
||
if len(names) > MAX_ZIP_FILES:
|
||
raise ValueError('ZIP contains too many files.')
|
||
|
||
os.makedirs(extract_dir, exist_ok=True)
|
||
for member in names:
|
||
safe = secure_filename(member)
|
||
if safe != member:
|
||
raise ValueError(f'Illegal filename {member}')
|
||
|
||
_, ext = os.path.splitext(safe.lower())
|
||
if ext not in IMAGE_EXTS | DOC_EXTS:
|
||
raise ValueError(f'Unsupported type {ext}')
|
||
|
||
target = os.path.join(extract_dir, safe)
|
||
with zf.open(member) as src, open(target, 'wb') as dst:
|
||
dst.write(src.read())
|
||
|
||
if ext in IMAGE_EXTS:
|
||
if not validate_image(target):
|
||
raise ValueError(f'Bad image: {member}')
|
||
elif ext == '.pdf':
|
||
with open(target, 'rb') as f:
|
||
header = f.read(5)
|
||
if header != b'%PDF-':
|
||
raise ValueError(f'Bad PDF: {member}')
|
||
else:
|
||
with open(target, 'rb') as f:
|
||
f.read(1024).decode('utf-8')
|
||
|
||
job.status = 'done'
|
||
except Exception as e:
|
||
job.status = 'failed'
|
||
job.error = str(e)
|
||
finally:
|
||
db.session.commit()
|
||
if os.path.isdir(extract_dir):
|
||
shutil.rmtree(extract_dir)
|
||
|
||
|
||
@celery.on_after_configure.connect
|
||
def setup_periodic_tasks(sender, **kwargs):
|
||
"""
|
||
Schedule periodic media prune job every day at 2am.
|
||
"""
|
||
sender.add_periodic_task(
|
||
crontab(hour=2, minute=0),
|
||
prune_orphans.s(),
|
||
name='media_prune',
|
||
queue='media'
|
||
)
|
||
|
||
|
||
@celery.task(
|
||
name='plugins.media.tasks.prune_orphans',
|
||
queue='media'
|
||
)
|
||
def prune_orphans():
|
||
"""
|
||
Mark orphaned Media records, move their files to /static/orphaned/,
|
||
and log the change in the DB.
|
||
"""
|
||
orphan_dir = os.path.join(current_app.root_path, 'static', 'orphaned')
|
||
os.makedirs(orphan_dir, exist_ok=True)
|
||
|
||
candidates = Media.query.filter(
|
||
Media.status == 'active',
|
||
Media.plant_id.is_(None),
|
||
Media.growlog_id.is_(None),
|
||
Media.related_id.is_(None)
|
||
).all()
|
||
|
||
for m in candidates:
|
||
src_rel = m.file_url.lstrip('/')
|
||
src_abs = os.path.join(current_app.root_path, src_rel)
|
||
if not os.path.isfile(src_abs):
|
||
current_app.logger.warning(f"Orphan prune: file not found {src_abs}")
|
||
continue
|
||
|
||
filename = os.path.basename(src_abs)
|
||
dest_abs = os.path.join(orphan_dir, filename)
|
||
shutil.move(src_abs, dest_abs)
|
||
|
||
new_url = f"/static/orphaned/{filename}"
|
||
m.mark_orphaned(new_url)
|
||
|
||
current_app.logger.info(
|
||
f"Orphaned media #{m.id}: moved {src_rel} → {new_url}"
|
||
)
|
||
|
||
db.session.commit()
|
||
|
||
|
||
def init_media_tasks(celery_app):
|
||
"""
|
||
Called by the JSON‐driven loader so tasks_init no longer errors.
|
||
Celery scheduling is handled via on_after_configure.
|
||
"""
|
||
celery_app.logger.info("[Media] init_media_tasks called (no‐op)")
|