natureinpots_community/plugins/utility/tasks.py

# File: plugins/utility/tasks.py

# Standard library
import csv
import os
import zipfile
import tempfile
import shutil
import io
from datetime import datetime

# Third-party
from celery.utils.log import get_task_logger
from celery.exceptions import Retry
from flask import current_app
from werkzeug.datastructures import FileStorage

# Application
from app import db
from app.neo4j_utils import get_neo4j_handler
from app.celery_app import celery

# Plugins
from plugins.plant.models import (
    Plant,
    PlantCommonName,
    PlantScientificName,
    PlantOwnershipLog,
)
from plugins.utility.models import ImportBatch
from plugins.media.routes import _process_upload_file

logger = get_task_logger(__name__)


@celery.task(name="plugins.utility.tasks.import_text_data", bind=True)
def import_text_data(self, filepath, import_type, batch_id):
    """
    Celery task entrypoint for both ZIP and CSV imports.
    filepath: path to uploaded .zip or .csv
    import_type: "zip" or "csv"
    batch_id: ImportBatch.id to update status
    """
    batch = ImportBatch.query.get(batch_id)
    try:
        # mark as started
        batch.status = 'started'
        db.session.commit()

        # ZIP import
        if import_type == "zip":
            tmpdir = tempfile.mkdtemp()
            try:
                with zipfile.ZipFile(filepath) as zf:
                    zf.extractall(tmpdir)
                _do_import_zip(tmpdir, batch)
            finally:
                os.remove(filepath)
                shutil.rmtree(tmpdir, ignore_errors=True)

        # CSV import (reviewed rows)
        elif import_type == "csv":
            _do_import_csv(filepath, batch)

        # mark as complete
        batch.status = 'complete'
        db.session.commit()

    except Exception as exc:
        logger.exception("Import failed")
        batch.status = 'failed'
        batch.error  = str(exc)
        db.session.commit()
        raise self.retry(exc=exc, countdown=60)


def _do_import_zip(tmpdir, batch):
    """
    Perform the plants.csv + media.csv import from tmpdir and log into Neo4j.
    """
    # 1) read plants.csv
    plant_path = os.path.join(tmpdir, "plants.csv")
    with open(plant_path, newline="", encoding="utf-8-sig") as pf:
        reader = csv.DictReader(pf)
        plant_rows = list(reader)

    # 2) insert plants
    neo = get_neo4j_handler()
    plant_map = {}
    for row in plant_rows:
        common = PlantCommonName.query.filter_by(name=row["Name"]).first()
        if not common:
            common = PlantCommonName(name=row["Name"])
            db.session.add(common)
            db.session.flush()
        scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
        if not scientific:
            scientific = PlantScientificName(
                name=row["Scientific Name"],
                common_id=common.id
            )
            db.session.add(scientific)
            db.session.flush()
        raw_mu = row.get("Mother UUID") or None
        mu_for_insert = raw_mu if raw_mu in plant_map else None
        p = Plant(
            uuid=row["UUID"],
            common_id=common.id,
            scientific_id=scientific.id,
            plant_type=row["Type"],
            owner_id=batch.user_id,
            vendor_name=row["Vendor Name"] or None,
            price=float(row["Price"]) if row["Price"] else None,
            mother_uuid=mu_for_insert,
            notes=row["Notes"] or None,
            short_id=row.get("Short ID") or None,
            data_verified=True
        )
        db.session.add(p)
        db.session.flush()
        plant_map[p.uuid] = p.id
        log = PlantOwnershipLog(
            plant_id=p.id,
            user_id=batch.user_id,
            date_acquired=datetime.utcnow(),
            transferred=False,
            is_verified=True
        )
        db.session.add(log)
        neo.create_plant_node(p.uuid, row["Name"])
        if raw_mu:
            neo.create_lineage(child_uuid=p.uuid, parent_uuid=raw_mu)
    db.session.commit()

    # 3) import media.csv
    media_path = os.path.join(tmpdir, "media.csv")
    with open(media_path, newline="", encoding="utf-8-sig") as mf:
        mreader = csv.DictReader(mf)
        media_rows = list(mreader)

    for mrow in media_rows:
        puuid = mrow["Plant UUID"]
        pid   = plant_map.get(puuid)
        if not pid:
            continue
        subpath = mrow["Image Path"].split('uploads/', 1)[-1]
        src     = os.path.join(tmpdir, "images", subpath)
        if not os.path.isfile(src):
            continue
        with open(src, "rb") as f:
            fs = io.BytesIO(f.read())
            file_storage = FileStorage(
                stream=fs,
                filename=os.path.basename(subpath),
                content_type='image/jpeg'
            )
            media = _process_upload_file(
                file=file_storage,
                uploader_id=batch.user_id,
                plugin="plant",
                related_id=pid,
                plant_id=pid
            )
            media.uploaded_at = datetime.fromisoformat(mrow["Uploaded At"])
            media.caption     = mrow["Source Type"]
            db.session.add(media)
    db.session.commit()
    neo.close()


def _do_import_csv(filepath, batch):
    """
    Perform a reviewed‐CSV import (only plants, no media) from filepath.
    """
    stream = io.StringIO(open(filepath, encoding='utf-8-sig').read())
    reader = csv.DictReader(stream)
    neo = get_neo4j_handler()
    for row in reader:
        uuid_val    = row.get("uuid", "").strip()
        name        = row.get("name", "").strip()
        sci_name    = row.get("scientific_name", "").strip()
        plant_type  = row.get("plant_type", "").strip() or "plant"
        mother_uuid = row.get("mother_uuid", "").strip() or None

        common = PlantCommonName.query.filter_by(name=name).first()
        if not common:
            common = PlantCommonName(name=name)
            db.session.add(common)
            db.session.flush()
        scientific = PlantScientificName.query.filter_by(name=sci_name).first()
        if not scientific:
            scientific = PlantScientificName(
                name=sci_name,
                common_id=common.id
            )
            db.session.add(scientific)
            db.session.flush()

        plant = Plant.query.filter_by(uuid=uuid_val).first()
        if not plant:
            plant = Plant(
                uuid=uuid_val,
                common_id=common.id,
                scientific_id=scientific.id,
                plant_type=plant_type,
                owner_id=batch.user_id,
                mother_uuid=mother_uuid,
                data_verified=True
            )
            db.session.add(plant)
            db.session.flush()

        neo.create_plant_node(plant.uuid, common.name)
        if mother_uuid:
            neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)

    db.session.commit()
    neo.close()