lots of changes

This commit is contained in:
2025-06-06 02:00:05 -05:00
parent 6cf2fdec61
commit 9daee50a3a
33 changed files with 1478 additions and 260 deletions

View File

@ -4,6 +4,7 @@ import csv
import io
import difflib
from datetime import datetime
from flask import Blueprint, request, render_template, redirect, flash, session, url_for
from flask_login import login_required, current_user
from flask_wtf.csrf import generate_csrf
@ -11,109 +12,109 @@ from flask_wtf.csrf import generate_csrf
from app.neo4j_utils import get_neo4j_handler
from plugins.plant.models import (
db,
Plant, PlantCommonName, PlantScientificName, PlantOwnershipLog
Plant,
PlantCommonName,
PlantScientificName,
PlantOwnershipLog
)
bp = Blueprint("importer", __name__, template_folder="templates", url_prefix="/import")
bp = Blueprint('importer', __name__, template_folder='templates', url_prefix='/import')
REQUIRED_HEADERS = {"uuid", "plant_type", "name"}
# ────────────────────────────────────────────────────────────────────────────────
# Redirect “/import/” → “/import/upload”
# ────────────────────────────────────────────────────────────────────────────────
@bp.route("/", methods=["GET"])
@login_required
def index():
# When someone hits /import, send them to /import/upload
return redirect(url_for("importer.upload"))
@bp.route("/", methods=["GET", "POST"])
# ────────────────────────────────────────────────────────────────────────────────
# Required CSV headers for import
# ────────────────────────────────────────────────────────────────────────────────
REQUIRED_HEADERS = {"uuid", "plant_type", "name", "scientific_name", "mother_uuid"}
@bp.route("/upload", methods=["GET", "POST"])
@login_required
def upload():
if request.method == "POST":
file = request.files.get("file")
if not file:
flash("No file uploaded.", "error")
flash("No file selected", "error")
return redirect(request.url)
# Decode as UTF-8-SIG to strip any BOM, then parse with csv.DictReader
try:
decoded = file.read().decode("utf-8-sig")
stream = io.StringIO(decoded)
stream = io.StringIO(file.stream.read().decode("utf-8-sig"))
reader = csv.DictReader(stream)
headers = set(reader.fieldnames or [])
missing = REQUIRED_HEADERS - headers
if missing:
flash(f"Missing required CSV headers: {missing}", "error")
return redirect(request.url)
session["pending_rows"] = []
review_list = []
# Preload existing common/scientific names
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
for row in reader:
uuid_raw = row.get("uuid", "")
uuid = uuid_raw.strip().strip('"')
name_raw = row.get("name", "")
name = name_raw.strip()
sci_raw = row.get("scientific_name", "")
sci_name = sci_raw.strip()
plant_type = row.get("plant_type", "").strip() or "plant"
mother_raw = row.get("mother_uuid", "")
mother_uuid = mother_raw.strip().strip('"')
# If any required field is missing, skip
if not (uuid and name and plant_type):
continue
# Try fuzzymatching scientific names if needed
suggested_match = None
original_sci = sci_name
name_lc = name.lower()
sci_lc = sci_name.lower()
if sci_lc and sci_lc not in all_scientific:
close = difflib.get_close_matches(sci_lc, all_scientific.keys(), n=1, cutoff=0.85)
if close:
suggested_match = all_scientific[close[0]].name
if not sci_lc and name_lc in all_common:
sci_obj = PlantScientificName.query.filter_by(common_id=all_common[name_lc].id).first()
if sci_obj:
sci_name = sci_obj.name
elif not sci_lc:
close_common = difflib.get_close_matches(name_lc, all_common.keys(), n=1, cutoff=0.85)
if close_common:
match_name = close_common[0]
sci_obj = PlantScientificName.query.filter_by(common_id=all_common[match_name].id).first()
if sci_obj:
suggested_match = sci_obj.name
sci_name = sci_obj.name
session["pending_rows"].append({
"uuid": uuid,
"name": name,
"sci_name": sci_name,
"original_sci_name": original_sci,
"plant_type": plant_type,
"mother_uuid": mother_uuid,
"suggested_scientific_name": suggested_match,
})
if suggested_match and suggested_match != original_sci:
review_list.append({
"uuid": uuid,
"common_name": name,
"user_input": original_sci or "(blank)",
"suggested_name": suggested_match
})
session["review_list"] = review_list
return redirect(url_for("importer.review"))
except Exception as e:
flash(f"Import failed: {e}", "error")
except Exception:
flash("Failed to read CSV file. Ensure it is valid UTF-8.", "error")
return redirect(request.url)
headers = set(reader.fieldnames or [])
missing = REQUIRED_HEADERS - headers
if missing:
flash(f"Missing required CSV headers: {missing}", "error")
return redirect(request.url)
# Prepare session storage for the rows under review
session["pending_rows"] = []
review_list = []
# Preload existing common/scientific names (lowercased keys for fuzzy matching)
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
for row in reader:
uuid_raw = row.get("uuid", "")
uuid = uuid_raw.strip().strip('"')
name_raw = row.get("name", "")
name = name_raw.strip()
sci_raw = row.get("scientific_name", "")
sci_name = sci_raw.strip()
plant_type = row.get("plant_type", "").strip() or "plant"
mother_raw = row.get("mother_uuid", "")
mother_uuid = mother_raw.strip().strip('"')
# Skip any row where required fields are missing
if not (uuid and name and plant_type):
continue
# ─── If the scientific name doesnt match exactly, suggest a close match ─────
# Only suggest if the “closest key” differs from the raw input:
suggestions = difflib.get_close_matches(
sci_name.lower(),
list(all_scientific.keys()),
n=1,
cutoff=0.8
)
if suggestions and suggestions[0] != sci_name.lower():
suggested = all_scientific[suggestions[0]].name
else:
suggested = None
review_item = {
"uuid": uuid,
"name": name,
"sci_name": sci_name,
"suggested": suggested,
"plant_type": plant_type,
"mother_uuid": mother_uuid
}
review_list.append(review_item)
session["pending_rows"].append(review_item)
session["review_list"] = review_list
return redirect(url_for("importer.review"))
# GET → show upload form
return render_template("importer/upload.html", csrf_token=generate_csrf())
@ -127,107 +128,97 @@ def review():
neo = get_neo4j_handler()
added = 0
# —————————————————————————————————————————————
# (1) CREATE MySQL records & MERGE every Neo4j node
# —————————————————————————————————————————————
# Re-load preload maps to avoid NameError if used below
all_common = {c.name.lower(): c for c in PlantCommonName.query.all()}
all_scientific = {s.name.lower(): s for s in PlantScientificName.query.all()}
for row in rows:
uuid_raw = row["uuid"]
uuid = uuid_raw.strip().strip('"')
uuid = row.get("uuid")
name = row.get("name")
sci_name = row.get("sci_name")
suggested = row.get("suggested")
plant_type = row.get("plant_type")
mother_uuid = row.get("mother_uuid")
name_raw = row["name"]
name = name_raw.strip()
# Check if user clicked "confirm" for a suggested scientific name
accepted_key = f"confirm_{uuid}"
accepted = request.form.get(accepted_key)
sci_raw = row["sci_name"]
sci_name = sci_raw.strip()
plant_type = row["plant_type"].strip()
mother_raw = row["mother_uuid"]
mother_uuid = mother_raw.strip().strip('"')
suggested = row.get("suggested_scientific_name")
# ——— MySQL: PlantCommonName ———
# ─── MySQL: PlantCommonName ────────────────────────────────────────────────
common = PlantCommonName.query.filter_by(name=name).first()
if not common:
common = PlantCommonName(name=name)
db.session.add(common)
db.session.flush()
all_common[common.name.lower()] = common
else:
all_common[common.name.lower()] = common
# ——— MySQL: PlantScientificName ———
accepted = request.form.get(f"confirm_{uuid}")
# ─── MySQL: PlantScientificName ───────────────────────────────────────────
sci_to_use = suggested if (suggested and accepted) else sci_name
scientific = PlantScientificName.query.filter_by(name=sci_to_use).first()
if not scientific:
scientific = PlantScientificName(name=sci_to_use, common_id=common.id)
scientific = PlantScientificName(
name = sci_to_use,
common_id = common.id
)
db.session.add(scientific)
db.session.flush()
all_scientific[scientific.name.lower()] = scientific
else:
all_scientific[scientific.name.lower()] = scientific
# ——— MySQL: Plant row ———
# ─── Decide if this plants data is “verified” by the user ────────────────
data_verified = False
if (not suggested) or (suggested and accepted):
data_verified = True
# ─── MySQL: Plant record ─────────────────────────────────────────────────
plant = Plant.query.filter_by(uuid=uuid).first()
if not plant:
plant = Plant(
uuid=uuid,
common_id=common.id,
scientific_id=scientific.id,
plant_type=plant_type,
owner_id=current_user.id,
is_verified=bool(accepted)
uuid = uuid,
common_id = common.id,
scientific_id = scientific.id,
plant_type = plant_type,
owner_id = current_user.id,
data_verified = data_verified
)
db.session.add(plant)
db.session.flush() # so plant.id is available immediately
added += 1
db.session.flush() # so plant.id is now available
# ——— MySQL: Create initial ownership log entry ———
log = PlantOwnershipLog(
plant_id = plant.id,
user_id = current_user.id,
date_acquired = datetime.utcnow(),
transferred = False,
is_verified = bool(accepted)
is_verified = data_verified
)
db.session.add(log)
added += 1
else:
# Skip duplicates if the same UUID already exists
pass
# ——— Neo4j: ensure a node exists for this plant UUID ———
# ─── Neo4j: ensure the Plant node exists ─────────────────────────────────
neo.create_plant_node(uuid, name)
# Commit MySQL so that all Plant/OwnershipLog rows exist
db.session.commit()
# —————————————————————————————————————————————
# (2) CREATE Neo4j LINEAGE relationships (child → parent). (Unchanged)
# —————————————————————————————————————————————
for row in rows:
child_raw = row.get("uuid", "")
child_uuid = child_raw.strip().strip('"')
mother_raw = row.get("mother_uuid", "")
mother_uuid = mother_raw.strip().strip('"')
print(
f"[DEBUG] row → child_raw={child_raw!r}, child_uuid={child_uuid!r}; "
f"mother_raw={mother_raw!r}, mother_uuid={mother_uuid!r}"
)
# ─── Neo4j: create a LINEAGE relationship if mother_uuid was provided ─────
if mother_uuid:
neo.create_plant_node(mother_uuid, name="Unknown")
neo.create_lineage(child_uuid, mother_uuid)
else:
print(f"[DEBUG] Skipping LINEAGE creation for child {child_uuid!r} (no mother_uuid)")
# (Optional) Check two known UUIDs
neo.debug_check_node("8b1059c8-8dd3-487a-af19-1eb548788e87")
neo.debug_check_node("2ee2e0e7-69de-4d8f-abfe-4ed973c3d760")
# Replace the old call with the correct method name:
neo.create_lineage(child_uuid=uuid, parent_uuid=mother_uuid)
# Commit all MySQL changes at once
db.session.commit()
neo.close()
flash(f"{added} plants added (MySQL) + Neo4j nodes/relations created.", "success")
flash(f"{added} plants added (MySQL) and Neo4j nodes/relationships created.", "success")
session.pop("pending_rows", None)
session.pop("review_list", None)
return redirect(url_for("importer.upload"))
# GET → re-render the review page with the same review_list
return render_template(
"importer/review.html",
review_list=review_list,

View File

@ -1,17 +1,39 @@
{% extends "core_ui/base.html" %}
{% block title %}Review Matches{% endblock %}
{% block title %}Review Suggested Matches{% endblock %}
{% block content %}
<div class="container py-4">
<h2 class="mb-4">🔍 Review Suggested Matches</h2>
<form method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token }}">
{% if review_list %}
<p class="text-muted mb-3">Confirm the suggested scientific name replacements below. Only confirmed matches will override user input.</p>
<table class="table table-bordered table-sm align-middle">
<p>
Confirm the suggested scientificname replacements below.
Only checked boxes (“Confirm”) will override the raw user input.
</p>
{# Display flash messages (error, success, etc.) #}
{% with messages = get_flashed_messages(with_categories=true) %}
{% if messages %}
{% for category, message in messages %}
<div
class="alert alert-{{ 'danger' if category == 'error' else category }} alert-dismissible fade show"
role="alert"
>
{{ message }}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div>
{% endfor %}
{% endif %}
{% endwith %}
{% if review_list and review_list|length > 0 %}
<form method="POST">
{# Hidden CSRF token #}
<input type="hidden" name="csrf_token" value="{{ csrf_token }}">
<table class="table table-striped">
<thead>
<tr>
<th>Common Name</th>
<th>User Input</th>
<th>User Input (Scientific Name)</th>
<th>Suggested Match</th>
<th>Confirm</th>
</tr>
@ -19,20 +41,32 @@
<tbody>
{% for row in review_list %}
<tr>
<td>{{ row.common_name }}</td>
<td><code>{{ row.user_input }}</code></td>
<td><code>{{ row.suggested_name }}</code></td>
<td>{{ row.name }}</td>
<td>{{ row.sci_name }}</td>
<td>{{ row.suggested or '-' }}</td>
<td>
<input type="checkbox" name="confirm_{{ row.uuid }}" value="1">
{% if row.suggested %}
<input
type="checkbox"
name="confirm_{{ row.uuid }}"
aria-label="Confirm suggested match for {{ row.uuid }}"
>
{% else %}
&mdash;
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<p>No matches found that need confirmation.</p>
{% endif %}
<button type="submit" class="btn btn-primary mt-3">Finalize Import</button>
</form>
<button type="submit" class="btn btn-success">Confirm &amp; Import</button>
<a href="{{ url_for('importer.upload') }}" class="btn btn-secondary ms-2">Cancel</a>
</form>
{% else %}
<div class="alert alert-info">
No rows to review. <a href="{{ url_for('importer.upload') }}">Upload another CSV?</a>
</div>
{% endif %}
</div>
{% endblock %}

View File

@ -1,28 +1,45 @@
{% extends "core_ui/base.html" %}
{% block title %}CSV Import{% endblock %}
{% block content %}
<div class="container py-4">
<h2 class="mb-4">📤 Import Plant Data</h2>
{# Display flash messages (error, success, etc.) #}
{% with messages = get_flashed_messages(with_categories=true) %}
{% if messages %}
{% for category, message in messages %}
<div class="alert alert-{{ 'danger' if category == 'error' else category }} alert-dismissible fade show" role="alert">
<div
class="alert alert-{{ 'danger' if category == 'error' else category }} alert-dismissible fade show"
role="alert"
>
{{ message }}
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div>
{% endfor %}
{% endif %}
{% endwith %}
<form method="POST" enctype="multipart/form-data">
{# Hidden CSRF token #}
<input type="hidden" name="csrf_token" value="{{ csrf_token }}">
<div class="mb-3">
<label for="file" class="form-label">Choose CSV File</label>
<input type="file" class="form-control" id="file" name="file" required>
<input
type="file"
class="form-control"
id="file"
name="file"
accept=".csv"
required
>
<div class="form-text">
Required: <code>uuid</code>, <code>plant_type</code>, <code>name</code><br>
Optional: <code>scientific_name</code>, <code>mother_uuid</code>
Required columns: <code>uuid</code>, <code>plant_type</code>, <code>name</code><br>
Optional columns: <code>scientific_name</code>, <code>mother_uuid</code>
</div>
</div>
<button type="submit" class="btn btn-success">Upload</button>
</form>
</div>