219 lines
6.8 KiB
Python
219 lines
6.8 KiB
Python
# File: plugins/utility/tasks.py
|
||
|
||
# Standard library
|
||
import csv
|
||
import os
|
||
import zipfile
|
||
import tempfile
|
||
import shutil
|
||
import io
|
||
from datetime import datetime
|
||
|
||
# Third-party
|
||
from celery.utils.log import get_task_logger
|
||
from celery.exceptions import Retry
|
||
from flask import current_app
|
||
from werkzeug.datastructures import FileStorage
|
||
|
||
# Application
|
||
from app import db
|
||
from app.neo4j_utils import get_neo4j_handler
|
||
from app.celery_app import celery
|
||
|
||
# Plugins
|
||
from plugins.plant.models import (
|
||
Plant,
|
||
PlantCommonName,
|
||
PlantScientificName,
|
||
PlantOwnershipLog,
|
||
)
|
||
from plugins.utility.models import ImportBatch
|
||
from plugins.media.routes import _process_upload_file
|
||
|
||
logger = get_task_logger(__name__)
|
||
|
||
|
||
@celery.task(name="plugins.utility.tasks.import_text_data", bind=True)
|
||
def import_text_data(self, filepath, import_type, batch_id):
|
||
"""
|
||
Celery task entrypoint for both ZIP and CSV imports.
|
||
filepath: path to uploaded .zip or .csv
|
||
import_type: "zip" or "csv"
|
||
batch_id: ImportBatch.id to update status
|
||
"""
|
||
batch = ImportBatch.query.get(batch_id)
|
||
try:
|
||
# mark as started
|
||
batch.status = 'started'
|
||
db.session.commit()
|
||
|
||
# ZIP import
|
||
if import_type == "zip":
|
||
tmpdir = tempfile.mkdtemp()
|
||
try:
|
||
with zipfile.ZipFile(filepath) as zf:
|
||
zf.extractall(tmpdir)
|
||
_do_import_zip(tmpdir, batch)
|
||
finally:
|
||
os.remove(filepath)
|
||
shutil.rmtree(tmpdir, ignore_errors=True)
|
||
|
||
# CSV import (reviewed rows)
|
||
elif import_type == "csv":
|
||
_do_import_csv(filepath, batch)
|
||
|
||
# mark as complete
|
||
batch.status = 'complete'
|
||
db.session.commit()
|
||
|
||
except Exception as exc:
|
||
logger.exception("Import failed")
|
||
batch.status = 'failed'
|
||
batch.error = str(exc)
|
||
db.session.commit()
|
||
raise self.retry(exc=exc, countdown=60)
|
||
|
||
|
||
def _do_import_zip(tmpdir, batch):
|
||
"""
|
||
Perform the plants.csv + media.csv import from tmpdir and log into Neo4j.
|
||
"""
|
||
# 1) read plants.csv
|
||
plant_path = os.path.join(tmpdir, "plants.csv")
|
||
with open(plant_path, newline="", encoding="utf-8-sig") as pf:
|
||
reader = csv.DictReader(pf)
|
||
plant_rows = list(reader)
|
||
|
||
# 2) insert plants
|
||
neo = get_neo4j_handler()
|
||
plant_map = {}
|
||
for row in plant_rows:
|
||
common = PlantCommonName.query.filter_by(name=row["Name"]).first()
|
||
if not common:
|
||
common = PlantCommonName(name=row["Name"])
|
||
db.session.add(common)
|
||
db.session.flush()
|
||
scientific = PlantScientificName.query.filter_by(name=row["Scientific Name"]).first()
|
||
if not scientific:
|
||
scientific = PlantScientificName(
|
||
name=row["Scientific Name"],
|
||
common_id=common.id
|
||
)
|
||
db.session.add(scientific)
|
||
db.session.flush()
|
||
raw_mu = row.get("Mother UUID") or None
|
||
mu_for_insert = raw_mu if raw_mu in plant_map else None
|
||
p = Plant(
|
||
uuid=row["UUID"],
|
||
common_id=common.id,
|
||
scientific_id=scientific.id,
|
||
plant_type=row["Type"],
|
||
owner_id=batch.user_id,
|
||
vendor_name=row["Vendor Name"] or None,
|
||
price=float(row["Price"]) if row["Price"] else None,
|
||
mother_uuid=mu_for_insert,
|
||
notes=row["Notes"] or None,
|
||
short_id=row.get("Short ID") or None,
|
||
data_verified=True
|
||
)
|
||
db.session.add(p)
|
||
db.session.flush()
|
||
plant_map[p.uuid] = p.id
|
||
log = PlantOwnershipLog(
|
||
plant_id=p.id,
|
||
user_id=batch.user_id,
|
||
date_acquired=datetime.utcnow(),
|
||
transferred=False,
|
||
is_verified=True
|
||
)
|
||
db.session.add(log)
|
||
neo.create_plant_node(p.uuid, row["Name"])
|
||
if raw_mu:
|
||
neo.create_lineage(child_uuid=p.uuid, parent_uuid=raw_mu)
|
||
db.session.commit()
|
||
|
||
# 3) import media.csv
|
||
media_path = os.path.join(tmpdir, "media.csv")
|
||
with open(media_path, newline="", encoding="utf-8-sig") as mf:
|
||
mreader = csv.DictReader(mf)
|
||
media_rows = list(mreader)
|
||
|
||
for mrow in media_rows:
|
||
puuid = mrow["Plant UUID"]
|
||
pid = plant_map.get(puuid)
|
||
if not pid:
|
||
continue
|
||
subpath = mrow["Image Path"].split('uploads/', 1)[-1]
|
||
src = os.path.join(tmpdir, "images", subpath)
|
||
if not os.path.isfile(src):
|
||
continue
|
||
with open(src, "rb") as f:
|
||
fs = io.BytesIO(f.read())
|
||
file_storage = FileStorage(
|
||
stream=fs,
|
||
filename=os.path.basename(subpath),
|
||
content_type='image/jpeg'
|
||
)
|
||
media = _process_upload_file(
|
||
file=file_storage,
|
||
uploader_id=batch.user_id,
|
||
plugin="plant",
|
||
related_id=pid,
|
||
plant_id=pid
|
||
)
|
||
media.uploaded_at = datetime.fromisoformat(mrow["Uploaded At"])
|
||
media.caption = mrow["Source Type"]
|
||
db.session.add(media)
|
||
db.session.commit()
|
||
neo.close()
|
||
|
||
|
||
def _do_import_csv(filepath, batch):
|
||
"""
|
||
Perform a reviewed‐CSV import (only plants, no media) from filepath.
|
||
"""
|
||
stream = io.StringIO(open(filepath, encoding='utf-8-sig').read())
|
||
reader = csv.DictReader(stream)
|
||
neo = get_neo4j_handler()
|
||
for row in reader:
|
||
uuid_val = row.get("uuid", "").strip()
|
||
name = row.get("name", "").strip()
|
||
sci_name = row.get("scientific_name", "").strip()
|
||
plant_type = row.get("plant_type", "").strip() or "plant"
|
||
mother_uuid = row.get("mother_uuid", "").strip() or None
|
||
|
||
common = PlantCommonName.query.filter_by(name=name).first()
|
||
if not common:
|
||
common = PlantCommonName(name=name)
|
||
db.session.add(common)
|
||
db.session.flush()
|
||
scientific = PlantScientificName.query.filter_by(name=sci_name).first()
|
||
if not scientific:
|
||
scientific = PlantScientificName(
|
||
name=sci_name,
|
||
common_id=common.id
|
||
)
|
||
db.session.add(scientific)
|
||
db.session.flush()
|
||
|
||
plant = Plant.query.filter_by(uuid=uuid_val).first()
|
||
if not plant:
|
||
plant = Plant(
|
||
uuid=uuid_val,
|
||
common_id=common.id,
|
||
scientific_id=scientific.id,
|
||
plant_type=plant_type,
|
||
owner_id=batch.user_id,
|
||
mother_uuid=mother_uuid,
|
||
data_verified=True
|
||
)
|
||
db.session.add(plant)
|
||
db.session.flush()
|
||
|
||
neo.create_plant_node(plant.uuid, common.name)
|
||
if mother_uuid:
|
||
neo.create_lineage(child_uuid=plant.uuid, parent_uuid=mother_uuid)
|
||
|
||
db.session.commit()
|
||
neo.close()
|