natureinpots_community/plugins/importer/routes.py

# plugins/importer/routes.py

import csv
import io
import uuid
import difflib
import os
import zipfile
import tempfile

from datetime import datetime
from flask import (
    Blueprint, request, render_template, redirect, flash,
    session, url_for, current_app
)
from flask_login import login_required, current_user
from flask_wtf.csrf import generate_csrf

from app.neo4j_utils import get_neo4j_handler
from plugins.plant.models import (
    db,
    Plant,
    PlantCommonName,
    PlantScientificName,
    PlantOwnershipLog,
)
from plugins.media.models import Media
from plugins.importer.models import ImportBatch  # tracks which exports have been imported

bp = Blueprint(
    'importer',
    __name__,
    template_folder='templates',
    url_prefix='/importer'
)

@bp.route("/", methods=["GET"])
@login_required
def index():
    # When someone hits /importer/, redirect to /importer/upload
    return redirect(url_for("importer.upload"))

# ────────────────────────────────────────────────────────────────────────────────
# Required headers for your sub-app export ZIP
PLANT_HEADERS = [
    "UUID","Type","Name","Scientific Name",
    "Vendor Name","Price","Mother UUID","Notes"
]
MEDIA_HEADERS = [
    "Plant UUID","Image Path","Uploaded At","Source Type"
]

# Headers for standalone CSV review flow
REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"}

@bp.route("/upload", methods=["GET", "POST"])
@login_required
def upload():
    if request.method == "POST":
        file = request.files.get("file")
        if not file or not file.filename:
            flash("No file selected", "error")
            return redirect(request.url)

        filename = file.filename.lower().strip()

        # ── ZIP Import Flow ───────────────────────────────────────────────────
        if filename.endswith(".zip"):
            # 1) Save upload to disk
            tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
            file.save(tmp_zip.name)
            tmp_zip.close()

            # 2) Open as ZIP
            try:
                z = zipfile.ZipFile(tmp_zip.name)
            except zipfile.BadZipFile:
                os.remove(tmp_zip.name)
                flash("Uploaded file is not a valid ZIP.", "danger")
                return redirect(request.url)

            # 3) Ensure both CSVs
            names = z.namelist()
            if "plants.csv" not in names or "media.csv" not in names:
                os.remove(tmp_zip.name)
                flash("ZIP must contain both plants.csv and media.csv", "danger")
                return redirect(request.url)

            # 4) Read export_id from metadata.txt
            export_id = None
            if "metadata.txt" in names:
                meta = z.read("metadata.txt").decode("utf-8", "ignore")
                for line in meta.splitlines():
                    if line.startswith("export_id,"):
                        export_id = line.split(",", 1)[1].strip()
                        break
            if not export_id:
                os.remove(tmp_zip.name)
                flash("metadata.txt missing or missing export_id", "danger")
                return redirect(request.url)

            # 5) Skip if already imported
            if ImportBatch.query.filter_by(export_id=export_id, user_id=current_user.id).first():
                os.remove(tmp_zip.name)
                flash("This export has already been imported.", "info")
                return redirect(request.url)

            # 6) Record import batch
            batch = ImportBatch(
                export_id=export_id,
                user_id=current_user.id,
                imported_at=datetime.utcnow()
            )
            db.session.add(batch)
            db.session.commit()

            # 7) Extract into temp dir
            tmpdir = tempfile.mkdtemp()
            z.extractall(tmpdir)

            # 8) Validate plants.csv
            plant_path = os.path.join(tmpdir, "plants.csv")
            with open(plant_path, newline="", encoding="utf-8-sig") as pf:
                reader = csv.DictReader(pf)
                if reader.fieldnames != PLANT_HEADERS:
                    missing = set(PLANT_HEADERS) - set(reader.fieldnames or [])
                    extra   = set(reader.fieldnames or []) - set(PLANT_HEADERS)
                    os.remove(tmp_zip.name)
                    flash(f"plants.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
                    return redirect(request.url)
                plant_rows = list(reader)

            # 9) Validate media.csv
            media_path = os.path.join(tmpdir, "media.csv")
            with open(media_path, newline="", encoding="utf-8-sig") as mf:
                mreader = csv.DictReader(mf)
                if mreader.fieldnames != MEDIA_HEADERS:
                    missing = set(MEDIA_HEADERS) - set(mreader.fieldnames or [])
                    extra   = set(reader.fieldnames or []) - set(MEDIA_HEADERS)
                    os.remove(tmp_zip.name)
                    flash(f"media.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
                    return redirect(request.url)
                media_rows = list(mreader)

            # 10) Import plants + Neo4j
            neo = get_neo4j_handler()
            added_plants = 0
            for row in plant_rows:
                common = PlantCommonName.query.filter_by(name=row["Name"]).first()
                if not common:
                    common = PlantCommonName(name=row["Name"])
                    db.session.add(common)
                    db.session.flush()

                scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
                if not scientific:
                    scientific = PlantScientificName(
                        name=row["Scientific Name"],
                        common_id=common.id
                    )
                    db.session.add(scientific)
                    db.session.flush()

                p = Plant(
                    uuid=row["UUID"],
                    common_id=common.id,
                    scientific_id=scientific.id,
                    plant_type=row["Type"],
                    owner_id=current_user.id,
                    data_verified=True
                )
                db.session.add(p)
                db.session.flush()

                log = PlantOwnershipLog(
                    plant_id=p.id,
                    user_id=current_user.id,
                    date_acquired=datetime.utcnow(),
                    transferred=False,
                    is_verified=True
                )
                db.session.add(log)

                neo.create_plant_node(p.uuid, row["Name"])
                if row.get("Mother UUID"):
                    neo.create_lineage(child_uuid=p.uuid, parent_uuid=row["Mother UUID"])

                added_plants += 1

            # 11) Import media files (by Plant UUID)
            added_media = 0
            for mrow in media_rows:
                plant_uuid = mrow["Plant UUID"]
                plant_obj = Plant.query.filter_by(uuid=plant_uuid).first()
                if not plant_obj:
                    continue

                # derive subpath inside ZIP by stripping "uploads/"
                subpath = mrow["Image Path"].split('uploads/', 1)[1]
                src = os.path.join(tmpdir, "images", subpath)
                if not os.path.isfile(src):
                    continue

                dest_dir = os.path.join(
                    current_app.static_folder, "uploads",
                    str(current_user.id), str(plant_obj.id)
                )
                os.makedirs(dest_dir, exist_ok=True)

                ext = os.path.splitext(src)[1]
                fname = f"{uuid.uuid4().hex}{ext}"
                dst = os.path.join(dest_dir, fname)
                with open(src, "rb") as sf, open(dst, "wb") as df:
                    df.write(sf.read())

                media = Media(
                    user_id=current_user.id,
                    plant_id=plant_obj.id,
                    original_filename=os.path.basename(src),
                    path=f"uploads/{current_user.id}/{plant_obj.id}/{fname}",
                    uploaded_at=datetime.fromisoformat(mrow["Uploaded At"]),
                    source_type=mrow["Source Type"]
                )
                db.session.add(media)
                added_media += 1

            # 12) Finalize & cleanup
            db.session.commit()
            neo.close()
            os.remove(tmp_zip.name)

            flash(f"Imported {added_plants} plants and {added_media} images.", "success")
            return redirect(request.url)

        # ── Standalone CSV Review Flow ─────────────────────────────────────
        if filename.endswith(".csv"):
            try:
                stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
                reader = csv.DictReader(stream)
            except Exception:
                flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
                return redirect(request.url)

            headers = set(reader.fieldnames or [])
            missing = REQUIRED_HEADERS - headers
            if missing:
                flash(f"Missing required CSV headers: {missing}", "error")
                return redirect(request.url)

            session["pending_rows"] = []
            review_list = []

            all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
            all_sci    = {s.name.lower(): s for s in PlantScientificName.query.all()}

            for row in reader:
                uuid_raw    = row.get("uuid", "")
                uuid_val    = uuid_raw.strip().strip('"')
                name_raw    = row.get("name", "")
                name        = name_raw.strip()
                sci_raw     = row.get("scientific_name", "")
                sci_name    = sci_raw.strip()
                plant_type  = row.get("plant_type", "").strip() or "plant"
                mother_raw  = row.get("mother_uuid", "")
                mother_uuid = mother_raw.strip().strip('"')

                if not (uuid_val and name and plant_type):
                    continue

                suggestions = difflib.get_close_matches(
                    sci_name.lower(),
                    list(all_sci.keys()),
                    n=1, cutoff=0.8
                )
                suggested = (all_sci[suggestions[0]].name
                             if suggestions and suggestions[0] != sci_name.lower()
                             else None)

                item = {
                    "uuid":        uuid_val,
                    "name":        name,
                    "sci_name":    sci_name,
                    "suggested":   suggested,
                    "plant_type":  plant_type,
                    "mother_uuid": mother_uuid
                }
                review_list.append(item)
                session["pending_rows"].append(item)

            session["review_list"] = review_list
            return redirect(url_for("importer.review"))

        flash("Unsupported file type. Please upload a ZIP or CSV.", "danger")
        return redirect(request.url)

    # GET → render the upload form
    return render_template("importer/upload.html", csrf_token=generate_csrf())


@bp.route("/review", methods=["GET", "POST"])
@login_required
def review():
    rows        = session.get("pending_rows", [])
    review_list = session.get("review_list", [])

    if request.method == "POST":
        neo     = get_neo4j_handler()
        added   = 0

        all_common     = {c.name.lower(): c for c in PlantCommonName.query.all()}
        all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}

        for row in rows:
            uuid_val    = row.get("uuid")
            name        = row.get("name")
            sci_name    = row.get("sci_name")
            suggested   = row.get("suggested")
            plant_type  = row.get("plant_type")
            mother_uuid = row.get("mother_uuid")

            accepted = request.form.get(f"confirm_{uuid_val}")

            common = PlantCommonName.query.filter_by(name=name).first()
            if not common:
                common = PlantCommonName(name=name)
                db.session.add(common)
                db.session.flush()
            all_common[common.name.lower()] = common

            use_name   = suggested if (suggested and accepted) else sci_name
            scientific = PlantScientificName.query.filter_by(name=use_name).first()
            if not scientific:
                scientific = PlantScientificName(
                    name      = use_name,
                    common_id = common.id
                )
                db.session.add(scientific)
                db.session.flush()
            all_sci    = all_scientific[scientific.name.lower()] = scientific

            verified = not suggested or (suggested and accepted)

            plant = Plant.query.filter_by(uuid=uuid_val).first()
            if not plant:
                plant = Plant(
                    uuid           = uuid_val,
                    common_id      = common.id,
                    scientific_id  = scientific.id,
                    plant_type     = plant_type,
                    owner_id       = current_user.id,
                    data_verified  = verified
                )
                db.session.add(plant)
                db.session.flush()
                log = PlantOwnershipLog(
                    plant_id      = plant.id,
                    user_id       = current_user.id,
                    date_acquired = datetime.utcnow(),
                    transferred   = False,
                    is_verified   = verified
                )
                db.session.add(log)
                added += 1

            neo.create_plant_node(plant.uuid, plant.common.name)
            if mother_uuid:
                neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)

        db.session.commit()
        neo.close()

        flash(f"{added} plants added (MySQL) and Neo4j updated.", "success")
        session.pop("pending_rows", None)
        session.pop("review_list", None)
        return redirect(url_for("importer.upload"))

    return render_template(
        "importer/review.html",
        review_list=review_list,
        csrf_token=generate_csrf()
    )