Files
natureinpots_community/plugins/importer/routes.py
2025-06-09 00:25:02 -05:00

382 lines
14 KiB
Python

# plugins/importer/routes.py
import csv
import io
import uuid
import difflib
import os
import zipfile
import tempfile
from datetime import datetime
from flask import (
Blueprint, request, render_template, redirect, flash,
session, url_for, current_app
)
from flask_login import login_required, current_user
from flask_wtf.csrf import generate_csrf
from app.neo4j_utils import get_neo4j_handler
from plugins.plant.models import (
db,
Plant,
PlantCommonName,
PlantScientificName,
PlantOwnershipLog,
)
from plugins.media.models import Media
from plugins.importer.models import ImportBatch # tracks which exports have been imported
bp = Blueprint(
'importer',
__name__,
template_folder='templates',
url_prefix='/importer'
)
@bp.route("/", methods=["GET"])
@login_required
def index():
# When someone hits /importer/, redirect to /importer/upload
return redirect(url_for("importer.upload"))
# ────────────────────────────────────────────────────────────────────────────────
# Required headers for your sub-app export ZIP
PLANT_HEADERS = [
"UUID","Type","Name","Scientific Name",
"Vendor Name","Price","Mother UUID","Notes"
]
MEDIA_HEADERS = [
"Plant UUID","Image Path","Uploaded At","Source Type"
]
# Headers for standalone CSV review flow
REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"}
@bp.route("/upload", methods=["GET", "POST"])
@login_required
def upload():
if request.method == "POST":
file = request.files.get("file")
if not file or not file.filename:
flash("No file selected", "error")
return redirect(request.url)
filename = file.filename.lower().strip()
# ── ZIP Import Flow ───────────────────────────────────────────────────
if filename.endswith(".zip"):
# 1) Save upload to disk
tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
file.save(tmp_zip.name)
tmp_zip.close()
# 2) Open as ZIP
try:
z = zipfile.ZipFile(tmp_zip.name)
except zipfile.BadZipFile:
os.remove(tmp_zip.name)
flash("Uploaded file is not a valid ZIP.", "danger")
return redirect(request.url)
# 3) Ensure both CSVs
names = z.namelist()
if "plants.csv" not in names or "media.csv" not in names:
os.remove(tmp_zip.name)
flash("ZIP must contain both plants.csv and media.csv", "danger")
return redirect(request.url)
# 4) Read export_id from metadata.txt
export_id = None
if "metadata.txt" in names:
meta = z.read("metadata.txt").decode("utf-8", "ignore")
for line in meta.splitlines():
if line.startswith("export_id,"):
export_id = line.split(",", 1)[1].strip()
break
if not export_id:
os.remove(tmp_zip.name)
flash("metadata.txt missing or missing export_id", "danger")
return redirect(request.url)
# 5) Skip if already imported
if ImportBatch.query.filter_by(export_id=export_id, user_id=current_user.id).first():
os.remove(tmp_zip.name)
flash("This export has already been imported.", "info")
return redirect(request.url)
# 6) Record import batch
batch = ImportBatch(
export_id=export_id,
user_id=current_user.id,
imported_at=datetime.utcnow()
)
db.session.add(batch)
db.session.commit()
# 7) Extract into temp dir
tmpdir = tempfile.mkdtemp()
z.extractall(tmpdir)
# 8) Validate plants.csv
plant_path = os.path.join(tmpdir, "plants.csv")
with open(plant_path, newline="", encoding="utf-8-sig") as pf:
reader = csv.DictReader(pf)
if reader.fieldnames != PLANT_HEADERS:
missing = set(PLANT_HEADERS) - set(reader.fieldnames or [])
extra = set(reader.fieldnames or []) - set(PLANT_HEADERS)
os.remove(tmp_zip.name)
flash(f"plants.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
return redirect(request.url)
plant_rows = list(reader)
# 9) Validate media.csv
media_path = os.path.join(tmpdir, "media.csv")
with open(media_path, newline="", encoding="utf-8-sig") as mf:
mreader = csv.DictReader(mf)
if mreader.fieldnames != MEDIA_HEADERS:
missing = set(MEDIA_HEADERS) - set(mreader.fieldnames or [])
extra = set(reader.fieldnames or []) - set(MEDIA_HEADERS)
os.remove(tmp_zip.name)
flash(f"media.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
return redirect(request.url)
media_rows = list(mreader)
# 10) Import plants + Neo4j
neo = get_neo4j_handler()
added_plants = 0
for row in plant_rows:
common = PlantCommonName.query.filter_by(name=row["Name"]).first()
if not common:
common = PlantCommonName(name=row["Name"])
db.session.add(common)
db.session.flush()
scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
if not scientific:
scientific = PlantScientificName(
name=row["Scientific Name"],
common_id=common.id
)
db.session.add(scientific)
db.session.flush()
p = Plant(
uuid=row["UUID"],
common_id=common.id,
scientific_id=scientific.id,
plant_type=row["Type"],
owner_id=current_user.id,
data_verified=True
)
db.session.add(p)
db.session.flush()
log = PlantOwnershipLog(
plant_id=p.id,
user_id=current_user.id,
date_acquired=datetime.utcnow(),
transferred=False,
is_verified=True
)
db.session.add(log)
neo.create_plant_node(p.uuid, row["Name"])
if row.get("Mother UUID"):
neo.create_lineage(child_uuid=p.uuid, parent_uuid=row["Mother UUID"])
added_plants += 1
# 11) Import media files (by Plant UUID)
added_media = 0
for mrow in media_rows:
plant_uuid = mrow["Plant UUID"]
plant_obj = Plant.query.filter_by(uuid=plant_uuid).first()
if not plant_obj:
continue
# derive subpath inside ZIP by stripping "uploads/"
subpath = mrow["Image Path"].split('uploads/', 1)[1]
src = os.path.join(tmpdir, "images", subpath)
if not os.path.isfile(src):
continue
dest_dir = os.path.join(
current_app.static_folder, "uploads",
str(current_user.id), str(plant_obj.id)
)
os.makedirs(dest_dir, exist_ok=True)
ext = os.path.splitext(src)[1]
fname = f"{uuid.uuid4().hex}{ext}"
dst = os.path.join(dest_dir, fname)
with open(src, "rb") as sf, open(dst, "wb") as df:
df.write(sf.read())
media = Media(
user_id=current_user.id,
plant_id=plant_obj.id,
original_filename=os.path.basename(src),
path=f"uploads/{current_user.id}/{plant_obj.id}/{fname}",
uploaded_at=datetime.fromisoformat(mrow["Uploaded At"]),
source_type=mrow["Source Type"]
)
db.session.add(media)
added_media += 1
# 12) Finalize & cleanup
db.session.commit()
neo.close()
os.remove(tmp_zip.name)
flash(f"Imported {added_plants} plants and {added_media} images.", "success")
return redirect(request.url)
# ── Standalone CSV Review Flow ─────────────────────────────────────
if filename.endswith(".csv"):
try:
stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
reader = csv.DictReader(stream)
except Exception:
flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
return redirect(request.url)
headers = set(reader.fieldnames or [])
missing = REQUIRED_HEADERS - headers
if missing:
flash(f"Missing required CSV headers: {missing}", "error")
return redirect(request.url)
session["pending_rows"] = []
review_list = []
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
all_sci = {s.name.lower(): s for s in PlantScientificName.query.all()}
for row in reader:
uuid_raw = row.get("uuid", "")
uuid_val = uuid_raw.strip().strip('"')
name_raw = row.get("name", "")
name = name_raw.strip()
sci_raw = row.get("scientific_name", "")
sci_name = sci_raw.strip()
plant_type = row.get("plant_type", "").strip() or "plant"
mother_raw = row.get("mother_uuid", "")
mother_uuid = mother_raw.strip().strip('"')
if not (uuid_val and name and plant_type):
continue
suggestions = difflib.get_close_matches(
sci_name.lower(),
list(all_sci.keys()),
n=1, cutoff=0.8
)
suggested = (all_sci[suggestions[0]].name
if suggestions and suggestions[0] != sci_name.lower()
else None)
item = {
"uuid": uuid_val,
"name": name,
"sci_name": sci_name,
"suggested": suggested,
"plant_type": plant_type,
"mother_uuid": mother_uuid
}
review_list.append(item)
session["pending_rows"].append(item)
session["review_list"] = review_list
return redirect(url_for("importer.review"))
flash("Unsupported file type. Please upload a ZIP or CSV.", "danger")
return redirect(request.url)
# GET → render the upload form
return render_template("importer/upload.html", csrf_token=generate_csrf())
@bp.route("/review", methods=["GET", "POST"])
@login_required
def review():
rows = session.get("pending_rows", [])
review_list = session.get("review_list", [])
if request.method == "POST":
neo = get_neo4j_handler()
added = 0
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
for row in rows:
uuid_val = row.get("uuid")
name = row.get("name")
sci_name = row.get("sci_name")
suggested = row.get("suggested")
plant_type = row.get("plant_type")
mother_uuid = row.get("mother_uuid")
accepted = request.form.get(f"confirm_{uuid_val}")
common = PlantCommonName.query.filter_by(name=name).first()
if not common:
common = PlantCommonName(name=name)
db.session.add(common)
db.session.flush()
all_common[common.name.lower()] = common
use_name = suggested if (suggested and accepted) else sci_name
scientific = PlantScientificName.query.filter_by(name=use_name).first()
if not scientific:
scientific = PlantScientificName(
name = use_name,
common_id = common.id
)
db.session.add(scientific)
db.session.flush()
all_sci = all_scientific[scientific.name.lower()] = scientific
verified = not suggested or (suggested and accepted)
plant = Plant.query.filter_by(uuid=uuid_val).first()
if not plant:
plant = Plant(
uuid = uuid_val,
common_id = common.id,
scientific_id = scientific.id,
plant_type = plant_type,
owner_id = current_user.id,
data_verified = verified
)
db.session.add(plant)
db.session.flush()
log = PlantOwnershipLog(
plant_id = plant.id,
user_id = current_user.id,
date_acquired = datetime.utcnow(),
transferred = False,
is_verified = verified
)
db.session.add(log)
added += 1
neo.create_plant_node(plant.uuid, plant.common.name)
if mother_uuid:
neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)
db.session.commit()
neo.close()
flash(f"{added} plants added (MySQL) and Neo4j updated.", "success")
session.pop("pending_rows", None)
session.pop("review_list", None)
return redirect(url_for("importer.upload"))
return render_template(
"importer/review.html",
review_list=review_list,
csrf_token=generate_csrf()
)