382 lines
14 KiB
Python
382 lines
14 KiB
Python
# plugins/importer/routes.py
|
|
|
|
import csv
|
|
import io
|
|
import uuid
|
|
import difflib
|
|
import os
|
|
import zipfile
|
|
import tempfile
|
|
|
|
from datetime import datetime
|
|
from flask import (
|
|
Blueprint, request, render_template, redirect, flash,
|
|
session, url_for, current_app
|
|
)
|
|
from flask_login import login_required, current_user
|
|
from flask_wtf.csrf import generate_csrf
|
|
|
|
from app.neo4j_utils import get_neo4j_handler
|
|
from plugins.plant.models import (
|
|
db,
|
|
Plant,
|
|
PlantCommonName,
|
|
PlantScientificName,
|
|
PlantOwnershipLog,
|
|
)
|
|
from plugins.media.models import Media
|
|
from plugins.importer.models import ImportBatch # tracks which exports have been imported
|
|
|
|
bp = Blueprint(
|
|
'importer',
|
|
__name__,
|
|
template_folder='templates',
|
|
url_prefix='/importer'
|
|
)
|
|
|
|
@bp.route("/", methods=["GET"])
|
|
@login_required
|
|
def index():
|
|
# When someone hits /importer/, redirect to /importer/upload
|
|
return redirect(url_for("importer.upload"))
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────────
|
|
# Required headers for your sub-app export ZIP
|
|
PLANT_HEADERS = [
|
|
"UUID","Type","Name","Scientific Name",
|
|
"Vendor Name","Price","Mother UUID","Notes"
|
|
]
|
|
MEDIA_HEADERS = [
|
|
"Plant UUID","Image Path","Uploaded At","Source Type"
|
|
]
|
|
|
|
# Headers for standalone CSV review flow
|
|
REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"}
|
|
|
|
@bp.route("/upload", methods=["GET", "POST"])
|
|
@login_required
|
|
def upload():
|
|
if request.method == "POST":
|
|
file = request.files.get("file")
|
|
if not file or not file.filename:
|
|
flash("No file selected", "error")
|
|
return redirect(request.url)
|
|
|
|
filename = file.filename.lower().strip()
|
|
|
|
# ── ZIP Import Flow ───────────────────────────────────────────────────
|
|
if filename.endswith(".zip"):
|
|
# 1) Save upload to disk
|
|
tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
|
|
file.save(tmp_zip.name)
|
|
tmp_zip.close()
|
|
|
|
# 2) Open as ZIP
|
|
try:
|
|
z = zipfile.ZipFile(tmp_zip.name)
|
|
except zipfile.BadZipFile:
|
|
os.remove(tmp_zip.name)
|
|
flash("Uploaded file is not a valid ZIP.", "danger")
|
|
return redirect(request.url)
|
|
|
|
# 3) Ensure both CSVs
|
|
names = z.namelist()
|
|
if "plants.csv" not in names or "media.csv" not in names:
|
|
os.remove(tmp_zip.name)
|
|
flash("ZIP must contain both plants.csv and media.csv", "danger")
|
|
return redirect(request.url)
|
|
|
|
# 4) Read export_id from metadata.txt
|
|
export_id = None
|
|
if "metadata.txt" in names:
|
|
meta = z.read("metadata.txt").decode("utf-8", "ignore")
|
|
for line in meta.splitlines():
|
|
if line.startswith("export_id,"):
|
|
export_id = line.split(",", 1)[1].strip()
|
|
break
|
|
if not export_id:
|
|
os.remove(tmp_zip.name)
|
|
flash("metadata.txt missing or missing export_id", "danger")
|
|
return redirect(request.url)
|
|
|
|
# 5) Skip if already imported
|
|
if ImportBatch.query.filter_by(export_id=export_id, user_id=current_user.id).first():
|
|
os.remove(tmp_zip.name)
|
|
flash("This export has already been imported.", "info")
|
|
return redirect(request.url)
|
|
|
|
# 6) Record import batch
|
|
batch = ImportBatch(
|
|
export_id=export_id,
|
|
user_id=current_user.id,
|
|
imported_at=datetime.utcnow()
|
|
)
|
|
db.session.add(batch)
|
|
db.session.commit()
|
|
|
|
# 7) Extract into temp dir
|
|
tmpdir = tempfile.mkdtemp()
|
|
z.extractall(tmpdir)
|
|
|
|
# 8) Validate plants.csv
|
|
plant_path = os.path.join(tmpdir, "plants.csv")
|
|
with open(plant_path, newline="", encoding="utf-8-sig") as pf:
|
|
reader = csv.DictReader(pf)
|
|
if reader.fieldnames != PLANT_HEADERS:
|
|
missing = set(PLANT_HEADERS) - set(reader.fieldnames or [])
|
|
extra = set(reader.fieldnames or []) - set(PLANT_HEADERS)
|
|
os.remove(tmp_zip.name)
|
|
flash(f"plants.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
|
|
return redirect(request.url)
|
|
plant_rows = list(reader)
|
|
|
|
# 9) Validate media.csv
|
|
media_path = os.path.join(tmpdir, "media.csv")
|
|
with open(media_path, newline="", encoding="utf-8-sig") as mf:
|
|
mreader = csv.DictReader(mf)
|
|
if mreader.fieldnames != MEDIA_HEADERS:
|
|
missing = set(MEDIA_HEADERS) - set(mreader.fieldnames or [])
|
|
extra = set(reader.fieldnames or []) - set(MEDIA_HEADERS)
|
|
os.remove(tmp_zip.name)
|
|
flash(f"media.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
|
|
return redirect(request.url)
|
|
media_rows = list(mreader)
|
|
|
|
# 10) Import plants + Neo4j
|
|
neo = get_neo4j_handler()
|
|
added_plants = 0
|
|
for row in plant_rows:
|
|
common = PlantCommonName.query.filter_by(name=row["Name"]).first()
|
|
if not common:
|
|
common = PlantCommonName(name=row["Name"])
|
|
db.session.add(common)
|
|
db.session.flush()
|
|
|
|
scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
|
|
if not scientific:
|
|
scientific = PlantScientificName(
|
|
name=row["Scientific Name"],
|
|
common_id=common.id
|
|
)
|
|
db.session.add(scientific)
|
|
db.session.flush()
|
|
|
|
p = Plant(
|
|
uuid=row["UUID"],
|
|
common_id=common.id,
|
|
scientific_id=scientific.id,
|
|
plant_type=row["Type"],
|
|
owner_id=current_user.id,
|
|
data_verified=True
|
|
)
|
|
db.session.add(p)
|
|
db.session.flush()
|
|
|
|
log = PlantOwnershipLog(
|
|
plant_id=p.id,
|
|
user_id=current_user.id,
|
|
date_acquired=datetime.utcnow(),
|
|
transferred=False,
|
|
is_verified=True
|
|
)
|
|
db.session.add(log)
|
|
|
|
neo.create_plant_node(p.uuid, row["Name"])
|
|
if row.get("Mother UUID"):
|
|
neo.create_lineage(child_uuid=p.uuid, parent_uuid=row["Mother UUID"])
|
|
|
|
added_plants += 1
|
|
|
|
# 11) Import media files (by Plant UUID)
|
|
added_media = 0
|
|
for mrow in media_rows:
|
|
plant_uuid = mrow["Plant UUID"]
|
|
plant_obj = Plant.query.filter_by(uuid=plant_uuid).first()
|
|
if not plant_obj:
|
|
continue
|
|
|
|
# derive subpath inside ZIP by stripping "uploads/"
|
|
subpath = mrow["Image Path"].split('uploads/', 1)[1]
|
|
src = os.path.join(tmpdir, "images", subpath)
|
|
if not os.path.isfile(src):
|
|
continue
|
|
|
|
dest_dir = os.path.join(
|
|
current_app.static_folder, "uploads",
|
|
str(current_user.id), str(plant_obj.id)
|
|
)
|
|
os.makedirs(dest_dir, exist_ok=True)
|
|
|
|
ext = os.path.splitext(src)[1]
|
|
fname = f"{uuid.uuid4().hex}{ext}"
|
|
dst = os.path.join(dest_dir, fname)
|
|
with open(src, "rb") as sf, open(dst, "wb") as df:
|
|
df.write(sf.read())
|
|
|
|
media = Media(
|
|
user_id=current_user.id,
|
|
plant_id=plant_obj.id,
|
|
original_filename=os.path.basename(src),
|
|
path=f"uploads/{current_user.id}/{plant_obj.id}/{fname}",
|
|
uploaded_at=datetime.fromisoformat(mrow["Uploaded At"]),
|
|
source_type=mrow["Source Type"]
|
|
)
|
|
db.session.add(media)
|
|
added_media += 1
|
|
|
|
# 12) Finalize & cleanup
|
|
db.session.commit()
|
|
neo.close()
|
|
os.remove(tmp_zip.name)
|
|
|
|
flash(f"Imported {added_plants} plants and {added_media} images.", "success")
|
|
return redirect(request.url)
|
|
|
|
# ── Standalone CSV Review Flow ─────────────────────────────────────
|
|
if filename.endswith(".csv"):
|
|
try:
|
|
stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
|
|
reader = csv.DictReader(stream)
|
|
except Exception:
|
|
flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
|
|
return redirect(request.url)
|
|
|
|
headers = set(reader.fieldnames or [])
|
|
missing = REQUIRED_HEADERS - headers
|
|
if missing:
|
|
flash(f"Missing required CSV headers: {missing}", "error")
|
|
return redirect(request.url)
|
|
|
|
session["pending_rows"] = []
|
|
review_list = []
|
|
|
|
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
|
|
all_sci = {s.name.lower(): s for s in PlantScientificName.query.all()}
|
|
|
|
for row in reader:
|
|
uuid_raw = row.get("uuid", "")
|
|
uuid_val = uuid_raw.strip().strip('"')
|
|
name_raw = row.get("name", "")
|
|
name = name_raw.strip()
|
|
sci_raw = row.get("scientific_name", "")
|
|
sci_name = sci_raw.strip()
|
|
plant_type = row.get("plant_type", "").strip() or "plant"
|
|
mother_raw = row.get("mother_uuid", "")
|
|
mother_uuid = mother_raw.strip().strip('"')
|
|
|
|
if not (uuid_val and name and plant_type):
|
|
continue
|
|
|
|
suggestions = difflib.get_close_matches(
|
|
sci_name.lower(),
|
|
list(all_sci.keys()),
|
|
n=1, cutoff=0.8
|
|
)
|
|
suggested = (all_sci[suggestions[0]].name
|
|
if suggestions and suggestions[0] != sci_name.lower()
|
|
else None)
|
|
|
|
item = {
|
|
"uuid": uuid_val,
|
|
"name": name,
|
|
"sci_name": sci_name,
|
|
"suggested": suggested,
|
|
"plant_type": plant_type,
|
|
"mother_uuid": mother_uuid
|
|
}
|
|
review_list.append(item)
|
|
session["pending_rows"].append(item)
|
|
|
|
session["review_list"] = review_list
|
|
return redirect(url_for("importer.review"))
|
|
|
|
flash("Unsupported file type. Please upload a ZIP or CSV.", "danger")
|
|
return redirect(request.url)
|
|
|
|
# GET → render the upload form
|
|
return render_template("importer/upload.html", csrf_token=generate_csrf())
|
|
|
|
|
|
@bp.route("/review", methods=["GET", "POST"])
|
|
@login_required
|
|
def review():
|
|
rows = session.get("pending_rows", [])
|
|
review_list = session.get("review_list", [])
|
|
|
|
if request.method == "POST":
|
|
neo = get_neo4j_handler()
|
|
added = 0
|
|
|
|
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
|
|
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
|
|
|
|
for row in rows:
|
|
uuid_val = row.get("uuid")
|
|
name = row.get("name")
|
|
sci_name = row.get("sci_name")
|
|
suggested = row.get("suggested")
|
|
plant_type = row.get("plant_type")
|
|
mother_uuid = row.get("mother_uuid")
|
|
|
|
accepted = request.form.get(f"confirm_{uuid_val}")
|
|
|
|
common = PlantCommonName.query.filter_by(name=name).first()
|
|
if not common:
|
|
common = PlantCommonName(name=name)
|
|
db.session.add(common)
|
|
db.session.flush()
|
|
all_common[common.name.lower()] = common
|
|
|
|
use_name = suggested if (suggested and accepted) else sci_name
|
|
scientific = PlantScientificName.query.filter_by(name=use_name).first()
|
|
if not scientific:
|
|
scientific = PlantScientificName(
|
|
name = use_name,
|
|
common_id = common.id
|
|
)
|
|
db.session.add(scientific)
|
|
db.session.flush()
|
|
all_sci = all_scientific[scientific.name.lower()] = scientific
|
|
|
|
verified = not suggested or (suggested and accepted)
|
|
|
|
plant = Plant.query.filter_by(uuid=uuid_val).first()
|
|
if not plant:
|
|
plant = Plant(
|
|
uuid = uuid_val,
|
|
common_id = common.id,
|
|
scientific_id = scientific.id,
|
|
plant_type = plant_type,
|
|
owner_id = current_user.id,
|
|
data_verified = verified
|
|
)
|
|
db.session.add(plant)
|
|
db.session.flush()
|
|
log = PlantOwnershipLog(
|
|
plant_id = plant.id,
|
|
user_id = current_user.id,
|
|
date_acquired = datetime.utcnow(),
|
|
transferred = False,
|
|
is_verified = verified
|
|
)
|
|
db.session.add(log)
|
|
added += 1
|
|
|
|
neo.create_plant_node(plant.uuid, plant.common.name)
|
|
if mother_uuid:
|
|
neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)
|
|
|
|
db.session.commit()
|
|
neo.close()
|
|
|
|
flash(f"{added} plants added (MySQL) and Neo4j updated.", "success")
|
|
session.pop("pending_rows", None)
|
|
session.pop("review_list", None)
|
|
return redirect(url_for("importer.upload"))
|
|
|
|
return render_template(
|
|
"importer/review.html",
|
|
review_list=review_list,
|
|
csrf_token=generate_csrf()
|
|
)
|