# plugins/importer/routes.py import csv import io import uuid import difflib import os import zipfile import tempfile from datetime import datetime from flask import ( Blueprint, request, render_template, redirect, flash, session, url_for, current_app ) from flask_login import login_required, current_user from flask_wtf.csrf import generate_csrf from app.neo4j_utils import get_neo4j_handler from plugins.plant.models import ( db, Plant, PlantCommonName, PlantScientificName, PlantOwnershipLog, ) from plugins.media.models import Media from plugins.importer.models import ImportBatch # tracks which exports have been imported bp = Blueprint( 'importer', __name__, template_folder='templates', url_prefix='/importer' ) @bp.route("/", methods=["GET"]) @login_required def index(): # When someone hits /importer/, redirect to /importer/upload return redirect(url_for("importer.upload")) # ──────────────────────────────────────────────────────────────────────────────── # Required headers for your sub-app export ZIP PLANT_HEADERS = [ "UUID","Type","Name","Scientific Name", "Vendor Name","Price","Mother UUID","Notes" ] MEDIA_HEADERS = [ "Plant UUID","Image Path","Uploaded At","Source Type" ] # Headers for standalone CSV review flow REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"} @bp.route("/upload", methods=["GET", "POST"]) @login_required def upload(): if request.method == "POST": file = request.files.get("file") if not file or not file.filename: flash("No file selected", "error") return redirect(request.url) filename = file.filename.lower().strip() # ── ZIP Import Flow ─────────────────────────────────────────────────── if filename.endswith(".zip"): # 1) Save upload to disk tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip") file.save(tmp_zip.name) tmp_zip.close() # 2) Open as ZIP try: z = zipfile.ZipFile(tmp_zip.name) except zipfile.BadZipFile: os.remove(tmp_zip.name) flash("Uploaded file is not a valid ZIP.", "danger") return redirect(request.url) # 3) Ensure both CSVs names = z.namelist() if "plants.csv" not in names or "media.csv" not in names: os.remove(tmp_zip.name) flash("ZIP must contain both plants.csv and media.csv", "danger") return redirect(request.url) # 4) Read export_id from metadata.txt export_id = None if "metadata.txt" in names: meta = z.read("metadata.txt").decode("utf-8", "ignore") for line in meta.splitlines(): if line.startswith("export_id,"): export_id = line.split(",", 1)[1].strip() break if not export_id: os.remove(tmp_zip.name) flash("metadata.txt missing or missing export_id", "danger") return redirect(request.url) # 5) Skip if already imported if ImportBatch.query.filter_by(export_id=export_id, user_id=current_user.id).first(): os.remove(tmp_zip.name) flash("This export has already been imported.", "info") return redirect(request.url) # 6) Record import batch batch = ImportBatch( export_id=export_id, user_id=current_user.id, imported_at=datetime.utcnow() ) db.session.add(batch) db.session.commit() # 7) Extract into temp dir tmpdir = tempfile.mkdtemp() z.extractall(tmpdir) # 8) Validate plants.csv plant_path = os.path.join(tmpdir, "plants.csv") with open(plant_path, newline="", encoding="utf-8-sig") as pf: reader = csv.DictReader(pf) if reader.fieldnames != PLANT_HEADERS: missing = set(PLANT_HEADERS) - set(reader.fieldnames or []) extra = set(reader.fieldnames or []) - set(PLANT_HEADERS) os.remove(tmp_zip.name) flash(f"plants.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger") return redirect(request.url) plant_rows = list(reader) # 9) Validate media.csv media_path = os.path.join(tmpdir, "media.csv") with open(media_path, newline="", encoding="utf-8-sig") as mf: mreader = csv.DictReader(mf) if mreader.fieldnames != MEDIA_HEADERS: missing = set(MEDIA_HEADERS) - set(mreader.fieldnames or []) extra = set(reader.fieldnames or []) - set(MEDIA_HEADERS) os.remove(tmp_zip.name) flash(f"media.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger") return redirect(request.url) media_rows = list(mreader) # 10) Import plants + Neo4j neo = get_neo4j_handler() added_plants = 0 for row in plant_rows: common = PlantCommonName.query.filter_by(name=row["Name"]).first() if not common: common = PlantCommonName(name=row["Name"]) db.session.add(common) db.session.flush() scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first() if not scientific: scientific = PlantScientificName( name=row["Scientific Name"], common_id=common.id ) db.session.add(scientific) db.session.flush() p = Plant( uuid=row["UUID"], common_id=common.id, scientific_id=scientific.id, plant_type=row["Type"], owner_id=current_user.id, data_verified=True ) db.session.add(p) db.session.flush() log = PlantOwnershipLog( plant_id=p.id, user_id=current_user.id, date_acquired=datetime.utcnow(), transferred=False, is_verified=True ) db.session.add(log) neo.create_plant_node(p.uuid, row["Name"]) if row.get("Mother UUID"): neo.create_lineage(child_uuid=p.uuid, parent_uuid=row["Mother UUID"]) added_plants += 1 # 11) Import media files (by Plant UUID) added_media = 0 for mrow in media_rows: plant_uuid = mrow["Plant UUID"] plant_obj = Plant.query.filter_by(uuid=plant_uuid).first() if not plant_obj: continue # derive subpath inside ZIP by stripping "uploads/" subpath = mrow["Image Path"].split('uploads/', 1)[1] src = os.path.join(tmpdir, "images", subpath) if not os.path.isfile(src): continue dest_dir = os.path.join( current_app.static_folder, "uploads", str(current_user.id), str(plant_obj.id) ) os.makedirs(dest_dir, exist_ok=True) ext = os.path.splitext(src)[1] fname = f"{uuid.uuid4().hex}{ext}" dst = os.path.join(dest_dir, fname) with open(src, "rb") as sf, open(dst, "wb") as df: df.write(sf.read()) media = Media( user_id=current_user.id, plant_id=plant_obj.id, original_filename=os.path.basename(src), path=f"uploads/{current_user.id}/{plant_obj.id}/{fname}", uploaded_at=datetime.fromisoformat(mrow["Uploaded At"]), source_type=mrow["Source Type"] ) db.session.add(media) added_media += 1 # 12) Finalize & cleanup db.session.commit() neo.close() os.remove(tmp_zip.name) flash(f"Imported {added_plants} plants and {added_media} images.", "success") return redirect(request.url) # ── Standalone CSV Review Flow ───────────────────────────────────── if filename.endswith(".csv"): try: stream = io.StringIO(file.stream.read().decode("utf-8-sig")) reader = csv.DictReader(stream) except Exception: flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error") return redirect(request.url) headers = set(reader.fieldnames or []) missing = REQUIRED_HEADERS - headers if missing: flash(f"Missing required CSV headers: {missing}", "error") return redirect(request.url) session["pending_rows"] = [] review_list = [] all_common = {c.name.lower(): c for c in PlantCommonName.query.all()} all_sci = {s.name.lower(): s for s in PlantScientificName.query.all()} for row in reader: uuid_raw = row.get("uuid", "") uuid_val = uuid_raw.strip().strip('"') name_raw = row.get("name", "") name = name_raw.strip() sci_raw = row.get("scientific_name", "") sci_name = sci_raw.strip() plant_type = row.get("plant_type", "").strip() or "plant" mother_raw = row.get("mother_uuid", "") mother_uuid = mother_raw.strip().strip('"') if not (uuid_val and name and plant_type): continue suggestions = difflib.get_close_matches( sci_name.lower(), list(all_sci.keys()), n=1, cutoff=0.8 ) suggested = (all_sci[suggestions[0]].name if suggestions and suggestions[0] != sci_name.lower() else None) item = { "uuid": uuid_val, "name": name, "sci_name": sci_name, "suggested": suggested, "plant_type": plant_type, "mother_uuid": mother_uuid } review_list.append(item) session["pending_rows"].append(item) session["review_list"] = review_list return redirect(url_for("importer.review")) flash("Unsupported file type. Please upload a ZIP or CSV.", "danger") return redirect(request.url) # GET → render the upload form return render_template("importer/upload.html", csrf_token=generate_csrf()) @bp.route("/review", methods=["GET", "POST"]) @login_required def review(): rows = session.get("pending_rows", []) review_list = session.get("review_list", []) if request.method == "POST": neo = get_neo4j_handler() added = 0 all_common = {c.name.lower(): c for c in PlantCommonName.query.all()} all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()} for row in rows: uuid_val = row.get("uuid") name = row.get("name") sci_name = row.get("sci_name") suggested = row.get("suggested") plant_type = row.get("plant_type") mother_uuid = row.get("mother_uuid") accepted = request.form.get(f"confirm_{uuid_val}") common = PlantCommonName.query.filter_by(name=name).first() if not common: common = PlantCommonName(name=name) db.session.add(common) db.session.flush() all_common[common.name.lower()] = common use_name = suggested if (suggested and accepted) else sci_name scientific = PlantScientificName.query.filter_by(name=use_name).first() if not scientific: scientific = PlantScientificName( name = use_name, common_id = common.id ) db.session.add(scientific) db.session.flush() all_sci = all_scientific[scientific.name.lower()] = scientific verified = not suggested or (suggested and accepted) plant = Plant.query.filter_by(uuid=uuid_val).first() if not plant: plant = Plant( uuid = uuid_val, common_id = common.id, scientific_id = scientific.id, plant_type = plant_type, owner_id = current_user.id, data_verified = verified ) db.session.add(plant) db.session.flush() log = PlantOwnershipLog( plant_id = plant.id, user_id = current_user.id, date_acquired = datetime.utcnow(), transferred = False, is_verified = verified ) db.session.add(log) added += 1 neo.create_plant_node(plant.uuid, plant.common.name) if mother_uuid: neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid) db.session.commit() neo.close() flash(f"{added} plants added (MySQL) and Neo4j updated.", "success") session.pop("pending_rows", None) session.pop("review_list", None) return redirect(url_for("importer.upload")) return render_template( "importer/review.html", review_list=review_list, csrf_token=generate_csrf() )