more

2025-06-09 00:25:02 -05:00
parent 45448e783f
commit d442cad0bb
19 changed files with 688 additions and 117 deletions
--- a/plugins/importer/models.py
+++ b/plugins/importer/models.py
@ -0,0 +1,15 @@
+from datetime import datetime
+from plugins.plant.models import db
+
+class ImportBatch(db.Model):
+    __tablename__ = 'import_batches'
+
+    id = db.Column(db.Integer, primary_key=True)
+    export_id = db.Column(db.String(64), nullable=False)
+    user_id   = db.Column(db.Integer, nullable=False, index=True)
+    imported_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+
+    __table_args__ = (
+        # ensure a given user can’t import the same export twice
+        db.UniqueConstraint('export_id', 'user_id', name='uix_export_user'),
+    )
--- a/plugins/importer/routes.py
+++ b/plugins/importer/routes.py
@ -2,10 +2,17 @@

 import csv
 import io
+import uuid
 import difflib
+import os
+import zipfile
+import tempfile

 from datetime import datetime
-from flask import Blueprint, request, render_template, redirect, flash, session, url_for
+from flask import (
+    Blueprint, request, render_template, redirect, flash,
+    session, url_for, current_app
+)
 from flask_login import login_required, current_user
 from flask_wtf.csrf import generate_csrf

@ -15,106 +22,278 @@ from plugins.plant.models import (
    Plant,
    PlantCommonName,
    PlantScientificName,
-    PlantOwnershipLog
+    PlantOwnershipLog,
 )
+from plugins.media.models import Media
+from plugins.importer.models import ImportBatch  # tracks which exports have been imported

-bp = Blueprint('importer', __name__, template_folder='templates', url_prefix='/import')
-
-# ────────────────────────────────────────────────────────────────────────────────
-# Redirect “/import/” → “/import/upload”
-# ────────────────────────────────────────────────────────────────────────────────
+bp = Blueprint(
+    'importer',
+    __name__,
+    template_folder='templates',
+    url_prefix='/importer'
+)

@bp.route("/", methods=["GET"])
@login_required
 def index():
-    # When someone hits /import, send them to /import/upload
+    # When someone hits /importer/, redirect to /importer/upload
    return redirect(url_for("importer.upload"))

+# ────────────────────────────────────────────────────────────────────────────────
+# Required headers for your sub-app export ZIP
+PLANT_HEADERS = [
+    "UUID","Type","Name","Scientific Name",
+    "Vendor Name","Price","Mother UUID","Notes"
+]
+MEDIA_HEADERS = [
+    "Plant UUID","Image Path","Uploaded At","Source Type"
+]

-# ────────────────────────────────────────────────────────────────────────────────
-# Required CSV headers for import
-# ────────────────────────────────────────────────────────────────────────────────
+# Headers for standalone CSV review flow
 REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"}

-
@bp.route("/upload", methods=["GET", "POST"])
@login_required
 def upload():
    if request.method == "POST":
        file = request.files.get("file")
-        if not file:
+        if not file or not file.filename:
            flash("No file selected", "error")
            return redirect(request.url)

-        # Decode as UTF-8-SIG to strip any BOM, then parse with csv.DictReader
-        try:
-            stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
-            reader = csv.DictReader(stream)
-        except Exception:
-            flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
-            return redirect(request.url)
+        filename = file.filename.lower().strip()

-        headers = set(reader.fieldnames or [])
-        missing = REQUIRED_HEADERS - headers
-        if missing:
-            flash(f"Missing required CSV headers: {missing}", "error")
-            return redirect(request.url)
+        # ── ZIP Import Flow ───────────────────────────────────────────────────
+        if filename.endswith(".zip"):
+            # 1) Save upload to disk
+            tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
+            file.save(tmp_zip.name)
+            tmp_zip.close()

-        # Prepare session storage for the rows under review
-        session["pending_rows"] = []
-        review_list = []
+            # 2) Open as ZIP
+            try:
+                z = zipfile.ZipFile(tmp_zip.name)
+            except zipfile.BadZipFile:
+                os.remove(tmp_zip.name)
+                flash("Uploaded file is not a valid ZIP.", "danger")
+                return redirect(request.url)

-        # Preload existing common/scientific names (lowercased keys for fuzzy matching)
-        all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
-        all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
+            # 3) Ensure both CSVs
+            names = z.namelist()
+            if "plants.csv" not in names or "media.csv" not in names:
+                os.remove(tmp_zip.name)
+                flash("ZIP must contain both plants.csv and media.csv", "danger")
+                return redirect(request.url)

-        for row in reader:
-            uuid_raw    = row.get("uuid", "")
-            uuid        = uuid_raw.strip().strip('"')
+            # 4) Read export_id from metadata.txt
+            export_id = None
+            if "metadata.txt" in names:
+                meta = z.read("metadata.txt").decode("utf-8", "ignore")
+                for line in meta.splitlines():
+                    if line.startswith("export_id,"):
+                        export_id = line.split(",", 1)[1].strip()
+                        break
+            if not export_id:
+                os.remove(tmp_zip.name)
+                flash("metadata.txt missing or missing export_id", "danger")
+                return redirect(request.url)

-            name_raw    = row.get("name", "")
-            name        = name_raw.strip()
+            # 5) Skip if already imported
+            if ImportBatch.query.filter_by(export_id=export_id, user_id=current_user.id).first():
+                os.remove(tmp_zip.name)
+                flash("This export has already been imported.", "info")
+                return redirect(request.url)

-            sci_raw     = row.get("scientific_name", "")
-            sci_name    = sci_raw.strip()
-
-            plant_type  = row.get("plant_type", "").strip() or "plant"
-
-            mother_raw  = row.get("mother_uuid", "")
-            mother_uuid = mother_raw.strip().strip('"')
-
-            # Skip any row where required fields are missing
-            if not (uuid and name and plant_type):
-                continue
-
-            # ─── If the scientific name doesn’t match exactly, suggest a close match ─────
-            # Only suggest if the “closest key” differs from the raw input:
-            suggestions = difflib.get_close_matches(
-                sci_name.lower(),
-                list(all_scientific.keys()),
-                n=1,
-                cutoff=0.8
+            # 6) Record import batch
+            batch = ImportBatch(
+                export_id=export_id,
+                user_id=current_user.id,
+                imported_at=datetime.utcnow()
            )
-            if suggestions and suggestions[0] != sci_name.lower():
-                suggested = all_scientific[suggestions[0]].name
-            else:
-                suggested = None
+            db.session.add(batch)
+            db.session.commit()

-            review_item = {
-                "uuid":        uuid,
-                "name":        name,
-                "sci_name":    sci_name,
-                "suggested":   suggested,
-                "plant_type":  plant_type,
-                "mother_uuid": mother_uuid
-            }
-            review_list.append(review_item)
-            session["pending_rows"].append(review_item)
+            # 7) Extract into temp dir
+            tmpdir = tempfile.mkdtemp()
+            z.extractall(tmpdir)

-        session["review_list"] = review_list
-        return redirect(url_for("importer.review"))
+            # 8) Validate plants.csv
+            plant_path = os.path.join(tmpdir, "plants.csv")
+            with open(plant_path, newline="", encoding="utf-8-sig") as pf:
+                reader = csv.DictReader(pf)
+                if reader.fieldnames != PLANT_HEADERS:
+                    missing = set(PLANT_HEADERS) - set(reader.fieldnames or [])
+                    extra   = set(reader.fieldnames or []) - set(PLANT_HEADERS)
+                    os.remove(tmp_zip.name)
+                    flash(f"plants.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
+                    return redirect(request.url)
+                plant_rows = list(reader)

-    # GET → show upload form
+            # 9) Validate media.csv
+            media_path = os.path.join(tmpdir, "media.csv")
+            with open(media_path, newline="", encoding="utf-8-sig") as mf:
+                mreader = csv.DictReader(mf)
+                if mreader.fieldnames != MEDIA_HEADERS:
+                    missing = set(MEDIA_HEADERS) - set(mreader.fieldnames or [])
+                    extra   = set(reader.fieldnames or []) - set(MEDIA_HEADERS)
+                    os.remove(tmp_zip.name)
+                    flash(f"media.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
+                    return redirect(request.url)
+                media_rows = list(mreader)
+
+            # 10) Import plants + Neo4j
+            neo = get_neo4j_handler()
+            added_plants = 0
+            for row in plant_rows:
+                common = PlantCommonName.query.filter_by(name=row["Name"]).first()
+                if not common:
+                    common = PlantCommonName(name=row["Name"])
+                    db.session.add(common)
+                    db.session.flush()
+
+                scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
+                if not scientific:
+                    scientific = PlantScientificName(
+                        name=row["Scientific Name"],
+                        common_id=common.id
+                    )
+                    db.session.add(scientific)
+                    db.session.flush()
+
+                p = Plant(
+                    uuid=row["UUID"],
+                    common_id=common.id,
+                    scientific_id=scientific.id,
+                    plant_type=row["Type"],
+                    owner_id=current_user.id,
+                    data_verified=True
+                )
+                db.session.add(p)
+                db.session.flush()
+
+                log = PlantOwnershipLog(
+                    plant_id=p.id,
+                    user_id=current_user.id,
+                    date_acquired=datetime.utcnow(),
+                    transferred=False,
+                    is_verified=True
+                )
+                db.session.add(log)
+
+                neo.create_plant_node(p.uuid, row["Name"])
+                if row.get("Mother UUID"):
+                    neo.create_lineage(child_uuid=p.uuid, parent_uuid=row["Mother UUID"])
+
+                added_plants += 1
+
+            # 11) Import media files (by Plant UUID)
+            added_media = 0
+            for mrow in media_rows:
+                plant_uuid = mrow["Plant UUID"]
+                plant_obj = Plant.query.filter_by(uuid=plant_uuid).first()
+                if not plant_obj:
+                    continue
+
+                # derive subpath inside ZIP by stripping "uploads/"
+                subpath = mrow["Image Path"].split('uploads/', 1)[1]
+                src = os.path.join(tmpdir, "images", subpath)
+                if not os.path.isfile(src):
+                    continue
+
+                dest_dir = os.path.join(
+                    current_app.static_folder, "uploads",
+                    str(current_user.id), str(plant_obj.id)
+                )
+                os.makedirs(dest_dir, exist_ok=True)
+
+                ext = os.path.splitext(src)[1]
+                fname = f"{uuid.uuid4().hex}{ext}"
+                dst = os.path.join(dest_dir, fname)
+                with open(src, "rb") as sf, open(dst, "wb") as df:
+                    df.write(sf.read())
+
+                media = Media(
+                    user_id=current_user.id,
+                    plant_id=plant_obj.id,
+                    original_filename=os.path.basename(src),
+                    path=f"uploads/{current_user.id}/{plant_obj.id}/{fname}",
+                    uploaded_at=datetime.fromisoformat(mrow["Uploaded At"]),
+                    source_type=mrow["Source Type"]
+                )
+                db.session.add(media)
+                added_media += 1
+
+            # 12) Finalize & cleanup
+            db.session.commit()
+            neo.close()
+            os.remove(tmp_zip.name)
+
+            flash(f"Imported {added_plants} plants and {added_media} images.", "success")
+            return redirect(request.url)
+
+        # ── Standalone CSV Review Flow ─────────────────────────────────────
+        if filename.endswith(".csv"):
+            try:
+                stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
+                reader = csv.DictReader(stream)
+            except Exception:
+                flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
+                return redirect(request.url)
+
+            headers = set(reader.fieldnames or [])
+            missing = REQUIRED_HEADERS - headers
+            if missing:
+                flash(f"Missing required CSV headers: {missing}", "error")
+                return redirect(request.url)
+
+            session["pending_rows"] = []
+            review_list = []
+
+            all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
+            all_sci    = {s.name.lower(): s for s in PlantScientificName.query.all()}
+
+            for row in reader:
+                uuid_raw    = row.get("uuid", "")
+                uuid_val    = uuid_raw.strip().strip('"')
+                name_raw    = row.get("name", "")
+                name        = name_raw.strip()
+                sci_raw     = row.get("scientific_name", "")
+                sci_name    = sci_raw.strip()
+                plant_type  = row.get("plant_type", "").strip() or "plant"
+                mother_raw  = row.get("mother_uuid", "")
+                mother_uuid = mother_raw.strip().strip('"')
+
+                if not (uuid_val and name and plant_type):
+                    continue
+
+                suggestions = difflib.get_close_matches(
+                    sci_name.lower(),
+                    list(all_sci.keys()),
+                    n=1, cutoff=0.8
+                )
+                suggested = (all_sci[suggestions[0]].name
+                             if suggestions and suggestions[0] != sci_name.lower()
+                             else None)
+
+                item = {
+                    "uuid":        uuid_val,
+                    "name":        name,
+                    "sci_name":    sci_name,
+                    "suggested":   suggested,
+                    "plant_type":  plant_type,
+                    "mother_uuid": mother_uuid
+                }
+                review_list.append(item)
+                session["pending_rows"].append(item)
+
+            session["review_list"] = review_list
+            return redirect(url_for("importer.review"))
+
+        flash("Unsupported file type. Please upload a ZIP or CSV.", "danger")
+        return redirect(request.url)
+
+    # GET → render the upload form
    return render_template("importer/upload.html", csrf_token=generate_csrf())


@ -125,100 +304,76 @@ def review():
    review_list = session.get("review_list", [])

    if request.method == "POST":
-        neo   = get_neo4j_handler()
-        added = 0
+        neo     = get_neo4j_handler()
+        added   = 0

-        # Re-load preload maps to avoid NameError if used below
-        all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
+        all_common     = {c.name.lower(): c for c in PlantCommonName.query.all()}
        all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}

        for row in rows:
-            uuid        = row.get("uuid")
+            uuid_val    = row.get("uuid")
            name        = row.get("name")
            sci_name    = row.get("sci_name")
            suggested   = row.get("suggested")
            plant_type  = row.get("plant_type")
            mother_uuid = row.get("mother_uuid")

-            # Check if user clicked "confirm" for a suggested scientific name
-            accepted_key = f"confirm_{uuid}"
-            accepted = request.form.get(accepted_key)
+            accepted = request.form.get(f"confirm_{uuid_val}")

-            # ─── MySQL: PlantCommonName ────────────────────────────────────────────────
            common = PlantCommonName.query.filter_by(name=name).first()
            if not common:
                common = PlantCommonName(name=name)
                db.session.add(common)
                db.session.flush()
-                all_common[common.name.lower()] = common
-            else:
-                all_common[common.name.lower()] = common
+            all_common[common.name.lower()] = common

-            # ─── MySQL: PlantScientificName ───────────────────────────────────────────
-            sci_to_use = suggested if (suggested and accepted) else sci_name
-            scientific = PlantScientificName.query.filter_by(name=sci_to_use).first()
+            use_name   = suggested if (suggested and accepted) else sci_name
+            scientific = PlantScientificName.query.filter_by(name=use_name).first()
            if not scientific:
                scientific = PlantScientificName(
-                    name      = sci_to_use,
+                    name      = use_name,
                    common_id = common.id
                )
                db.session.add(scientific)
                db.session.flush()
-                all_scientific[scientific.name.lower()] = scientific
-            else:
-                all_scientific[scientific.name.lower()] = scientific
+            all_sci    = all_scientific[scientific.name.lower()] = scientific

-            # ─── Decide if this plant’s data is “verified” by the user ────────────────
-            data_verified = False
-            if (not suggested) or (suggested and accepted):
-                data_verified = True
+            verified = not suggested or (suggested and accepted)

-            # ─── MySQL: Plant record ─────────────────────────────────────────────────
-            plant = Plant.query.filter_by(uuid=uuid).first()
+            plant = Plant.query.filter_by(uuid=uuid_val).first()
            if not plant:
                plant = Plant(
-                    uuid           = uuid,
+                    uuid           = uuid_val,
                    common_id      = common.id,
                    scientific_id  = scientific.id,
                    plant_type     = plant_type,
                    owner_id       = current_user.id,
-                    data_verified  = data_verified
+                    data_verified  = verified
                )
                db.session.add(plant)
-                db.session.flush()  # so plant.id is now available
-
+                db.session.flush()
                log = PlantOwnershipLog(
                    plant_id      = plant.id,
                    user_id       = current_user.id,
                    date_acquired = datetime.utcnow(),
                    transferred   = False,
-                    is_verified   = data_verified
+                    is_verified   = verified
                )
                db.session.add(log)
                added += 1
-            else:
-                # Skip duplicates if the same UUID already exists
-                pass

-            # ─── Neo4j: ensure the Plant node exists ─────────────────────────────────
-            neo.create_plant_node(uuid, name)
-
-            # ─── Neo4j: create a LINEAGE relationship if mother_uuid was provided ─────
+            neo.create_plant_node(plant.uuid, plant.common.name)
            if mother_uuid:
-                # Replace the old call with the correct method name:
-                neo.create_lineage(child_uuid=uuid, parent_uuid=mother_uuid)
+                neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)

-        # Commit all MySQL changes at once
        db.session.commit()
        neo.close()

-        flash(f"{added} plants added (MySQL) and Neo4j nodes/relationships created.", "success")
-
+        flash(f"{added} plants added (MySQL) and Neo4j updated.", "success")
        session.pop("pending_rows", None)
        session.pop("review_list", None)
        return redirect(url_for("importer.upload"))

-    # GET → re-render the review page with the same review_list
    return render_template(
        "importer/review.html",
        review_list=review_list,
--- a/plugins/importer/templates/importer/upload.html
+++ b/plugins/importer/templates/importer/upload.html
@ -31,7 +31,7 @@
                class="form-control"
                id="file"
                name="file"
-                accept=".csv"
+                accept=".csv,.zip"
                required
            >
            <div class="form-text">