# File: plugins/utility/tasks.py # Standard library import csv import os import zipfile import tempfile import shutil import io from datetime import datetime # Third-party from celery.utils.log import get_task_logger from celery.exceptions import Retry from flask import current_app from werkzeug.datastructures import FileStorage # Application from app import db from app.neo4j_utils import get_neo4j_handler from app.celery_app import celery # Plugins from plugins.plant.models import ( Plant, PlantCommonName, PlantScientificName, PlantOwnershipLog, ) from plugins.utility.models import ImportBatch from plugins.media.routes import _process_upload_file logger = get_task_logger(__name__) @celery.task(name="plugins.utility.tasks.import_text_data", bind=True) def import_text_data(self, filepath, import_type, batch_id): """ Celery task entrypoint for both ZIP and CSV imports. filepath: path to uploaded .zip or .csv import_type: "zip" or "csv" batch_id: ImportBatch.id to update status """ batch = ImportBatch.query.get(batch_id) try: # mark as started batch.status = 'started' db.session.commit() # ZIP import if import_type == "zip": tmpdir = tempfile.mkdtemp() try: with zipfile.ZipFile(filepath) as zf: zf.extractall(tmpdir) _do_import_zip(tmpdir, batch) finally: os.remove(filepath) shutil.rmtree(tmpdir, ignore_errors=True) # CSV import (reviewed rows) elif import_type == "csv": _do_import_csv(filepath, batch) # mark as complete batch.status = 'complete' db.session.commit() except Exception as exc: logger.exception("Import failed") batch.status = 'failed' batch.error = str(exc) db.session.commit() raise self.retry(exc=exc, countdown=60) def _do_import_zip(tmpdir, batch): """ Perform the plants.csv + media.csv import from tmpdir and log into Neo4j. """ # 1) read plants.csv plant_path = os.path.join(tmpdir, "plants.csv") with open(plant_path, newline="", encoding="utf-8-sig") as pf: reader = csv.DictReader(pf) plant_rows = list(reader) # 2) insert plants neo = get_neo4j_handler() plant_map = {} for row in plant_rows: common = PlantCommonName.query.filter_by(name=row["Name"]).first() if not common: common = PlantCommonName(name=row["Name"]) db.session.add(common) db.session.flush() scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first() if not scientific: scientific = PlantScientificName( name=row["Scientific Name"], common_id=common.id ) db.session.add(scientific) db.session.flush() raw_mu = row.get("Mother UUID") or None mu_for_insert = raw_mu if raw_mu in plant_map else None p = Plant( uuid=row["UUID"], common_id=common.id, scientific_id=scientific.id, plant_type=row["Type"], owner_id=batch.user_id, vendor_name=row["Vendor Name"] or None, price=float(row["Price"]) if row["Price"] else None, mother_uuid=mu_for_insert, notes=row["Notes"] or None, short_id=row.get("Short ID") or None, data_verified=True ) db.session.add(p) db.session.flush() plant_map[p.uuid] = p.id log = PlantOwnershipLog( plant_id=p.id, user_id=batch.user_id, date_acquired=datetime.utcnow(), transferred=False, is_verified=True ) db.session.add(log) neo.create_plant_node(p.uuid, row["Name"]) if raw_mu: neo.create_lineage(child_uuid=p.uuid, parent_uuid=raw_mu) db.session.commit() # 3) import media.csv media_path = os.path.join(tmpdir, "media.csv") with open(media_path, newline="", encoding="utf-8-sig") as mf: mreader = csv.DictReader(mf) media_rows = list(mreader) for mrow in media_rows: puuid = mrow["Plant UUID"] pid = plant_map.get(puuid) if not pid: continue subpath = mrow["Image Path"].split('uploads/', 1)[-1] src = os.path.join(tmpdir, "images", subpath) if not os.path.isfile(src): continue with open(src, "rb") as f: fs = io.BytesIO(f.read()) file_storage = FileStorage( stream=fs, filename=os.path.basename(subpath), content_type='image/jpeg' ) media = _process_upload_file( file=file_storage, uploader_id=batch.user_id, plugin="plant", related_id=pid, plant_id=pid ) media.uploaded_at = datetime.fromisoformat(mrow["Uploaded At"]) media.caption = mrow["Source Type"] db.session.add(media) db.session.commit() neo.close() def _do_import_csv(filepath, batch): """ Perform a reviewed‐CSV import (only plants, no media) from filepath. """ stream = io.StringIO(open(filepath, encoding='utf-8-sig').read()) reader = csv.DictReader(stream) neo = get_neo4j_handler() for row in reader: uuid_val = row.get("uuid", "").strip() name = row.get("name", "").strip() sci_name = row.get("scientific_name", "").strip() plant_type = row.get("plant_type", "").strip() or "plant" mother_uuid = row.get("mother_uuid", "").strip() or None common = PlantCommonName.query.filter_by(name=name).first() if not common: common = PlantCommonName(name=name) db.session.add(common) db.session.flush() scientific = PlantScientificName.query.filter_by(name=sci_name).first() if not scientific: scientific = PlantScientificName( name=sci_name, common_id=common.id ) db.session.add(scientific) db.session.flush() plant = Plant.query.filter_by(uuid=uuid_val).first() if not plant: plant = Plant( uuid=uuid_val, common_id=common.id, scientific_id=scientific.id, plant_type=plant_type, owner_id=batch.user_id, mother_uuid=mother_uuid, data_verified=True ) db.session.add(plant) db.session.flush() neo.create_plant_node(plant.uuid, common.name) if mother_uuid: neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid) db.session.commit() neo.close()