import os import zipfile from werkzeug.utils import secure_filename from PIL import Image, UnidentifiedImageError from app import db from plugins.media.models import ZipJob # Re‐import your create_app and utility plugin to get Celery from plugins.utility.celery import celery_app # Constants IMAGE_EXTS = {'.jpg','.jpeg','.png','.gif'} DOC_EXTS = {'.pdf','.txt','.csv'} MAX_ZIP_FILES = 1000 MAX_PIXELS = 8000 * 8000 def validate_image(path): try: with Image.open(path) as img: img.verify() w, h = Image.open(path).size return (w*h) <= MAX_PIXELS except (UnidentifiedImageError, IOError): return False @celery_app.task(bind=True) def process_zip(self, job_id, zip_path): job = ZipJob.query.get(job_id) job.status = 'processing' db.session.commit() extract_dir = zip_path + '_contents' try: with zipfile.ZipFile(zip_path) as zf: names = zf.namelist() if len(names) > MAX_ZIP_FILES: raise ValueError('ZIP contains too many files.') os.makedirs(extract_dir, exist_ok=True) for member in names: safe = secure_filename(member) if safe != member: raise ValueError(f'Illegal filename {member}') _, ext = os.path.splitext(safe.lower()) if ext not in IMAGE_EXTS | DOC_EXTS: raise ValueError(f'Unsupported type {ext}') target = os.path.join(extract_dir, safe) with zf.open(member) as src, open(target, 'wb') as dst: dst.write(src.read()) if ext in IMAGE_EXTS and not validate_image(target): raise ValueError(f'Bad image: {member}') elif ext == '.pdf': if open(target,'rb').read(5)!=b'%PDF-': raise ValueError(f'Bad PDF: {member}') else: # txt/csv → simple UTF-8 check open(target,'rb').read(1024).decode('utf-8') job.status = 'done' except Exception as e: job.status = 'failed' job.error = str(e) finally: db.session.commit()