more
This commit is contained in:
15
plugins/importer/models.py
Normal file
15
plugins/importer/models.py
Normal file
@ -0,0 +1,15 @@
|
||||
from datetime import datetime
|
||||
from plugins.plant.models import db
|
||||
|
||||
class ImportBatch(db.Model):
|
||||
__tablename__ = 'import_batches'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
export_id = db.Column(db.String(64), nullable=False)
|
||||
user_id = db.Column(db.Integer, nullable=False, index=True)
|
||||
imported_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
# ensure a given user can’t import the same export twice
|
||||
db.UniqueConstraint('export_id', 'user_id', name='uix_export_user'),
|
||||
)
|
@ -2,10 +2,17 @@
|
||||
|
||||
import csv
|
||||
import io
|
||||
import uuid
|
||||
import difflib
|
||||
import os
|
||||
import zipfile
|
||||
import tempfile
|
||||
|
||||
from datetime import datetime
|
||||
from flask import Blueprint, request, render_template, redirect, flash, session, url_for
|
||||
from flask import (
|
||||
Blueprint, request, render_template, redirect, flash,
|
||||
session, url_for, current_app
|
||||
)
|
||||
from flask_login import login_required, current_user
|
||||
from flask_wtf.csrf import generate_csrf
|
||||
|
||||
@ -15,106 +22,278 @@ from plugins.plant.models import (
|
||||
Plant,
|
||||
PlantCommonName,
|
||||
PlantScientificName,
|
||||
PlantOwnershipLog
|
||||
PlantOwnershipLog,
|
||||
)
|
||||
from plugins.media.models import Media
|
||||
from plugins.importer.models import ImportBatch # tracks which exports have been imported
|
||||
|
||||
bp = Blueprint('importer', __name__, template_folder='templates', url_prefix='/import')
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────────
|
||||
# Redirect “/import/” → “/import/upload”
|
||||
# ────────────────────────────────────────────────────────────────────────────────
|
||||
bp = Blueprint(
|
||||
'importer',
|
||||
__name__,
|
||||
template_folder='templates',
|
||||
url_prefix='/importer'
|
||||
)
|
||||
|
||||
@bp.route("/", methods=["GET"])
|
||||
@login_required
|
||||
def index():
|
||||
# When someone hits /import, send them to /import/upload
|
||||
# When someone hits /importer/, redirect to /importer/upload
|
||||
return redirect(url_for("importer.upload"))
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────────
|
||||
# Required headers for your sub-app export ZIP
|
||||
PLANT_HEADERS = [
|
||||
"UUID","Type","Name","Scientific Name",
|
||||
"Vendor Name","Price","Mother UUID","Notes"
|
||||
]
|
||||
MEDIA_HEADERS = [
|
||||
"Plant UUID","Image Path","Uploaded At","Source Type"
|
||||
]
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────────
|
||||
# Required CSV headers for import
|
||||
# ────────────────────────────────────────────────────────────────────────────────
|
||||
# Headers for standalone CSV review flow
|
||||
REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"}
|
||||
|
||||
|
||||
@bp.route("/upload", methods=["GET", "POST"])
|
||||
@login_required
|
||||
def upload():
|
||||
if request.method == "POST":
|
||||
file = request.files.get("file")
|
||||
if not file:
|
||||
if not file or not file.filename:
|
||||
flash("No file selected", "error")
|
||||
return redirect(request.url)
|
||||
|
||||
# Decode as UTF-8-SIG to strip any BOM, then parse with csv.DictReader
|
||||
try:
|
||||
stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
|
||||
reader = csv.DictReader(stream)
|
||||
except Exception:
|
||||
flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
|
||||
return redirect(request.url)
|
||||
filename = file.filename.lower().strip()
|
||||
|
||||
headers = set(reader.fieldnames or [])
|
||||
missing = REQUIRED_HEADERS - headers
|
||||
if missing:
|
||||
flash(f"Missing required CSV headers: {missing}", "error")
|
||||
return redirect(request.url)
|
||||
# ── ZIP Import Flow ───────────────────────────────────────────────────
|
||||
if filename.endswith(".zip"):
|
||||
# 1) Save upload to disk
|
||||
tmp_zip = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
|
||||
file.save(tmp_zip.name)
|
||||
tmp_zip.close()
|
||||
|
||||
# Prepare session storage for the rows under review
|
||||
session["pending_rows"] = []
|
||||
review_list = []
|
||||
# 2) Open as ZIP
|
||||
try:
|
||||
z = zipfile.ZipFile(tmp_zip.name)
|
||||
except zipfile.BadZipFile:
|
||||
os.remove(tmp_zip.name)
|
||||
flash("Uploaded file is not a valid ZIP.", "danger")
|
||||
return redirect(request.url)
|
||||
|
||||
# Preload existing common/scientific names (lowercased keys for fuzzy matching)
|
||||
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
|
||||
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
|
||||
# 3) Ensure both CSVs
|
||||
names = z.namelist()
|
||||
if "plants.csv" not in names or "media.csv" not in names:
|
||||
os.remove(tmp_zip.name)
|
||||
flash("ZIP must contain both plants.csv and media.csv", "danger")
|
||||
return redirect(request.url)
|
||||
|
||||
for row in reader:
|
||||
uuid_raw = row.get("uuid", "")
|
||||
uuid = uuid_raw.strip().strip('"')
|
||||
# 4) Read export_id from metadata.txt
|
||||
export_id = None
|
||||
if "metadata.txt" in names:
|
||||
meta = z.read("metadata.txt").decode("utf-8", "ignore")
|
||||
for line in meta.splitlines():
|
||||
if line.startswith("export_id,"):
|
||||
export_id = line.split(",", 1)[1].strip()
|
||||
break
|
||||
if not export_id:
|
||||
os.remove(tmp_zip.name)
|
||||
flash("metadata.txt missing or missing export_id", "danger")
|
||||
return redirect(request.url)
|
||||
|
||||
name_raw = row.get("name", "")
|
||||
name = name_raw.strip()
|
||||
# 5) Skip if already imported
|
||||
if ImportBatch.query.filter_by(export_id=export_id, user_id=current_user.id).first():
|
||||
os.remove(tmp_zip.name)
|
||||
flash("This export has already been imported.", "info")
|
||||
return redirect(request.url)
|
||||
|
||||
sci_raw = row.get("scientific_name", "")
|
||||
sci_name = sci_raw.strip()
|
||||
|
||||
plant_type = row.get("plant_type", "").strip() or "plant"
|
||||
|
||||
mother_raw = row.get("mother_uuid", "")
|
||||
mother_uuid = mother_raw.strip().strip('"')
|
||||
|
||||
# Skip any row where required fields are missing
|
||||
if not (uuid and name and plant_type):
|
||||
continue
|
||||
|
||||
# ─── If the scientific name doesn’t match exactly, suggest a close match ─────
|
||||
# Only suggest if the “closest key” differs from the raw input:
|
||||
suggestions = difflib.get_close_matches(
|
||||
sci_name.lower(),
|
||||
list(all_scientific.keys()),
|
||||
n=1,
|
||||
cutoff=0.8
|
||||
# 6) Record import batch
|
||||
batch = ImportBatch(
|
||||
export_id=export_id,
|
||||
user_id=current_user.id,
|
||||
imported_at=datetime.utcnow()
|
||||
)
|
||||
if suggestions and suggestions[0] != sci_name.lower():
|
||||
suggested = all_scientific[suggestions[0]].name
|
||||
else:
|
||||
suggested = None
|
||||
db.session.add(batch)
|
||||
db.session.commit()
|
||||
|
||||
review_item = {
|
||||
"uuid": uuid,
|
||||
"name": name,
|
||||
"sci_name": sci_name,
|
||||
"suggested": suggested,
|
||||
"plant_type": plant_type,
|
||||
"mother_uuid": mother_uuid
|
||||
}
|
||||
review_list.append(review_item)
|
||||
session["pending_rows"].append(review_item)
|
||||
# 7) Extract into temp dir
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
z.extractall(tmpdir)
|
||||
|
||||
session["review_list"] = review_list
|
||||
return redirect(url_for("importer.review"))
|
||||
# 8) Validate plants.csv
|
||||
plant_path = os.path.join(tmpdir, "plants.csv")
|
||||
with open(plant_path, newline="", encoding="utf-8-sig") as pf:
|
||||
reader = csv.DictReader(pf)
|
||||
if reader.fieldnames != PLANT_HEADERS:
|
||||
missing = set(PLANT_HEADERS) - set(reader.fieldnames or [])
|
||||
extra = set(reader.fieldnames or []) - set(PLANT_HEADERS)
|
||||
os.remove(tmp_zip.name)
|
||||
flash(f"plants.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
|
||||
return redirect(request.url)
|
||||
plant_rows = list(reader)
|
||||
|
||||
# GET → show upload form
|
||||
# 9) Validate media.csv
|
||||
media_path = os.path.join(tmpdir, "media.csv")
|
||||
with open(media_path, newline="", encoding="utf-8-sig") as mf:
|
||||
mreader = csv.DictReader(mf)
|
||||
if mreader.fieldnames != MEDIA_HEADERS:
|
||||
missing = set(MEDIA_HEADERS) - set(mreader.fieldnames or [])
|
||||
extra = set(reader.fieldnames or []) - set(MEDIA_HEADERS)
|
||||
os.remove(tmp_zip.name)
|
||||
flash(f"media.csv header mismatch. Missing: {missing}, Extra: {extra}", "danger")
|
||||
return redirect(request.url)
|
||||
media_rows = list(mreader)
|
||||
|
||||
# 10) Import plants + Neo4j
|
||||
neo = get_neo4j_handler()
|
||||
added_plants = 0
|
||||
for row in plant_rows:
|
||||
common = PlantCommonName.query.filter_by(name=row["Name"]).first()
|
||||
if not common:
|
||||
common = PlantCommonName(name=row["Name"])
|
||||
db.session.add(common)
|
||||
db.session.flush()
|
||||
|
||||
scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
|
||||
if not scientific:
|
||||
scientific = PlantScientificName(
|
||||
name=row["Scientific Name"],
|
||||
common_id=common.id
|
||||
)
|
||||
db.session.add(scientific)
|
||||
db.session.flush()
|
||||
|
||||
p = Plant(
|
||||
uuid=row["UUID"],
|
||||
common_id=common.id,
|
||||
scientific_id=scientific.id,
|
||||
plant_type=row["Type"],
|
||||
owner_id=current_user.id,
|
||||
data_verified=True
|
||||
)
|
||||
db.session.add(p)
|
||||
db.session.flush()
|
||||
|
||||
log = PlantOwnershipLog(
|
||||
plant_id=p.id,
|
||||
user_id=current_user.id,
|
||||
date_acquired=datetime.utcnow(),
|
||||
transferred=False,
|
||||
is_verified=True
|
||||
)
|
||||
db.session.add(log)
|
||||
|
||||
neo.create_plant_node(p.uuid, row["Name"])
|
||||
if row.get("Mother UUID"):
|
||||
neo.create_lineage(child_uuid=p.uuid, parent_uuid=row["Mother UUID"])
|
||||
|
||||
added_plants += 1
|
||||
|
||||
# 11) Import media files (by Plant UUID)
|
||||
added_media = 0
|
||||
for mrow in media_rows:
|
||||
plant_uuid = mrow["Plant UUID"]
|
||||
plant_obj = Plant.query.filter_by(uuid=plant_uuid).first()
|
||||
if not plant_obj:
|
||||
continue
|
||||
|
||||
# derive subpath inside ZIP by stripping "uploads/"
|
||||
subpath = mrow["Image Path"].split('uploads/', 1)[1]
|
||||
src = os.path.join(tmpdir, "images", subpath)
|
||||
if not os.path.isfile(src):
|
||||
continue
|
||||
|
||||
dest_dir = os.path.join(
|
||||
current_app.static_folder, "uploads",
|
||||
str(current_user.id), str(plant_obj.id)
|
||||
)
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
|
||||
ext = os.path.splitext(src)[1]
|
||||
fname = f"{uuid.uuid4().hex}{ext}"
|
||||
dst = os.path.join(dest_dir, fname)
|
||||
with open(src, "rb") as sf, open(dst, "wb") as df:
|
||||
df.write(sf.read())
|
||||
|
||||
media = Media(
|
||||
user_id=current_user.id,
|
||||
plant_id=plant_obj.id,
|
||||
original_filename=os.path.basename(src),
|
||||
path=f"uploads/{current_user.id}/{plant_obj.id}/{fname}",
|
||||
uploaded_at=datetime.fromisoformat(mrow["Uploaded At"]),
|
||||
source_type=mrow["Source Type"]
|
||||
)
|
||||
db.session.add(media)
|
||||
added_media += 1
|
||||
|
||||
# 12) Finalize & cleanup
|
||||
db.session.commit()
|
||||
neo.close()
|
||||
os.remove(tmp_zip.name)
|
||||
|
||||
flash(f"Imported {added_plants} plants and {added_media} images.", "success")
|
||||
return redirect(request.url)
|
||||
|
||||
# ── Standalone CSV Review Flow ─────────────────────────────────────
|
||||
if filename.endswith(".csv"):
|
||||
try:
|
||||
stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
|
||||
reader = csv.DictReader(stream)
|
||||
except Exception:
|
||||
flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
|
||||
return redirect(request.url)
|
||||
|
||||
headers = set(reader.fieldnames or [])
|
||||
missing = REQUIRED_HEADERS - headers
|
||||
if missing:
|
||||
flash(f"Missing required CSV headers: {missing}", "error")
|
||||
return redirect(request.url)
|
||||
|
||||
session["pending_rows"] = []
|
||||
review_list = []
|
||||
|
||||
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
|
||||
all_sci = {s.name.lower(): s for s in PlantScientificName.query.all()}
|
||||
|
||||
for row in reader:
|
||||
uuid_raw = row.get("uuid", "")
|
||||
uuid_val = uuid_raw.strip().strip('"')
|
||||
name_raw = row.get("name", "")
|
||||
name = name_raw.strip()
|
||||
sci_raw = row.get("scientific_name", "")
|
||||
sci_name = sci_raw.strip()
|
||||
plant_type = row.get("plant_type", "").strip() or "plant"
|
||||
mother_raw = row.get("mother_uuid", "")
|
||||
mother_uuid = mother_raw.strip().strip('"')
|
||||
|
||||
if not (uuid_val and name and plant_type):
|
||||
continue
|
||||
|
||||
suggestions = difflib.get_close_matches(
|
||||
sci_name.lower(),
|
||||
list(all_sci.keys()),
|
||||
n=1, cutoff=0.8
|
||||
)
|
||||
suggested = (all_sci[suggestions[0]].name
|
||||
if suggestions and suggestions[0] != sci_name.lower()
|
||||
else None)
|
||||
|
||||
item = {
|
||||
"uuid": uuid_val,
|
||||
"name": name,
|
||||
"sci_name": sci_name,
|
||||
"suggested": suggested,
|
||||
"plant_type": plant_type,
|
||||
"mother_uuid": mother_uuid
|
||||
}
|
||||
review_list.append(item)
|
||||
session["pending_rows"].append(item)
|
||||
|
||||
session["review_list"] = review_list
|
||||
return redirect(url_for("importer.review"))
|
||||
|
||||
flash("Unsupported file type. Please upload a ZIP or CSV.", "danger")
|
||||
return redirect(request.url)
|
||||
|
||||
# GET → render the upload form
|
||||
return render_template("importer/upload.html", csrf_token=generate_csrf())
|
||||
|
||||
|
||||
@ -125,100 +304,76 @@ def review():
|
||||
review_list = session.get("review_list", [])
|
||||
|
||||
if request.method == "POST":
|
||||
neo = get_neo4j_handler()
|
||||
added = 0
|
||||
neo = get_neo4j_handler()
|
||||
added = 0
|
||||
|
||||
# Re-load preload maps to avoid NameError if used below
|
||||
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
|
||||
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
|
||||
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
|
||||
|
||||
for row in rows:
|
||||
uuid = row.get("uuid")
|
||||
uuid_val = row.get("uuid")
|
||||
name = row.get("name")
|
||||
sci_name = row.get("sci_name")
|
||||
suggested = row.get("suggested")
|
||||
plant_type = row.get("plant_type")
|
||||
mother_uuid = row.get("mother_uuid")
|
||||
|
||||
# Check if user clicked "confirm" for a suggested scientific name
|
||||
accepted_key = f"confirm_{uuid}"
|
||||
accepted = request.form.get(accepted_key)
|
||||
accepted = request.form.get(f"confirm_{uuid_val}")
|
||||
|
||||
# ─── MySQL: PlantCommonName ────────────────────────────────────────────────
|
||||
common = PlantCommonName.query.filter_by(name=name).first()
|
||||
if not common:
|
||||
common = PlantCommonName(name=name)
|
||||
db.session.add(common)
|
||||
db.session.flush()
|
||||
all_common[common.name.lower()] = common
|
||||
else:
|
||||
all_common[common.name.lower()] = common
|
||||
all_common[common.name.lower()] = common
|
||||
|
||||
# ─── MySQL: PlantScientificName ───────────────────────────────────────────
|
||||
sci_to_use = suggested if (suggested and accepted) else sci_name
|
||||
scientific = PlantScientificName.query.filter_by(name=sci_to_use).first()
|
||||
use_name = suggested if (suggested and accepted) else sci_name
|
||||
scientific = PlantScientificName.query.filter_by(name=use_name).first()
|
||||
if not scientific:
|
||||
scientific = PlantScientificName(
|
||||
name = sci_to_use,
|
||||
name = use_name,
|
||||
common_id = common.id
|
||||
)
|
||||
db.session.add(scientific)
|
||||
db.session.flush()
|
||||
all_scientific[scientific.name.lower()] = scientific
|
||||
else:
|
||||
all_scientific[scientific.name.lower()] = scientific
|
||||
all_sci = all_scientific[scientific.name.lower()] = scientific
|
||||
|
||||
# ─── Decide if this plant’s data is “verified” by the user ────────────────
|
||||
data_verified = False
|
||||
if (not suggested) or (suggested and accepted):
|
||||
data_verified = True
|
||||
verified = not suggested or (suggested and accepted)
|
||||
|
||||
# ─── MySQL: Plant record ─────────────────────────────────────────────────
|
||||
plant = Plant.query.filter_by(uuid=uuid).first()
|
||||
plant = Plant.query.filter_by(uuid=uuid_val).first()
|
||||
if not plant:
|
||||
plant = Plant(
|
||||
uuid = uuid,
|
||||
uuid = uuid_val,
|
||||
common_id = common.id,
|
||||
scientific_id = scientific.id,
|
||||
plant_type = plant_type,
|
||||
owner_id = current_user.id,
|
||||
data_verified = data_verified
|
||||
data_verified = verified
|
||||
)
|
||||
db.session.add(plant)
|
||||
db.session.flush() # so plant.id is now available
|
||||
|
||||
db.session.flush()
|
||||
log = PlantOwnershipLog(
|
||||
plant_id = plant.id,
|
||||
user_id = current_user.id,
|
||||
date_acquired = datetime.utcnow(),
|
||||
transferred = False,
|
||||
is_verified = data_verified
|
||||
is_verified = verified
|
||||
)
|
||||
db.session.add(log)
|
||||
added += 1
|
||||
else:
|
||||
# Skip duplicates if the same UUID already exists
|
||||
pass
|
||||
|
||||
# ─── Neo4j: ensure the Plant node exists ─────────────────────────────────
|
||||
neo.create_plant_node(uuid, name)
|
||||
|
||||
# ─── Neo4j: create a LINEAGE relationship if mother_uuid was provided ─────
|
||||
neo.create_plant_node(plant.uuid, plant.common.name)
|
||||
if mother_uuid:
|
||||
# Replace the old call with the correct method name:
|
||||
neo.create_lineage(child_uuid=uuid, parent_uuid=mother_uuid)
|
||||
neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)
|
||||
|
||||
# Commit all MySQL changes at once
|
||||
db.session.commit()
|
||||
neo.close()
|
||||
|
||||
flash(f"{added} plants added (MySQL) and Neo4j nodes/relationships created.", "success")
|
||||
|
||||
flash(f"{added} plants added (MySQL) and Neo4j updated.", "success")
|
||||
session.pop("pending_rows", None)
|
||||
session.pop("review_list", None)
|
||||
return redirect(url_for("importer.upload"))
|
||||
|
||||
# GET → re-render the review page with the same review_list
|
||||
return render_template(
|
||||
"importer/review.html",
|
||||
review_list=review_list,
|
||||
|
@ -31,7 +31,7 @@
|
||||
class="form-control"
|
||||
id="file"
|
||||
name="file"
|
||||
accept=".csv"
|
||||
accept=".csv,.zip"
|
||||
required
|
||||
>
|
||||
<div class="form-text">
|
||||
|
Reference in New Issue
Block a user