from typing import List
from geonature.core.imports.checks.errors import ImportCodeError
from geonature.core.imports.models import BibFields, Entity, TImports
import sqlalchemy as sa
from sqlalchemy.orm import aliased
from geonature.utils.env import db
from geonature.core.imports.checks.sql.utils import report_erroneous_rows
__all__ = [
"set_id_parent_from_destination",
"set_parent_line_no",
"check_no_parent_entity",
"check_erroneous_parent_entities",
]
[docs]
def set_id_parent_from_destination(
imprt: TImports,
parent_entity: Entity,
child_entity: Entity,
id_field: BibFields,
fields: List[BibFields],
) -> None:
"""
Complete the id_parent column in the transient table of an import when the parent already exists in the destination table.
Parameters
----------
imprt : TImports
The import to update.
parent_entity : Entity
The entity of the parent.
child_entity : Entity
The entity of the child.
id_field : BibFields
The field containing the id of the parent.
fields : List[BibFields]
The fields to use for matching the child with its parent in the destination table.
"""
transient_table = imprt.destination.get_transient_table()
parent_destination = parent_entity.get_destination_table()
for field in fields:
if field is None:
continue
db.session.execute(
sa.update(transient_table)
.where(
transient_table.c.id_import == imprt.id_import,
transient_table.c[child_entity.validity_column].isnot(None),
)
# We need to complete the id_parent only for child not on the same row than a parent
.where(transient_table.c[parent_entity.validity_column].is_(None))
# finding parent row:
.where(transient_table.c[field.dest_column] == parent_destination.c[field.dest_column])
.values({id_field.dest_column: parent_destination.c[id_field.dest_column]})
)
[docs]
def set_parent_line_no(
imprt: TImports,
parent_entity: Entity,
child_entity: Entity,
id_parent: BibFields,
parent_line_no: BibFields,
fields: List[BibFields],
) -> None:
"""
Set parent_line_no on child entities when:
- no parent entity on same line
- parent entity is valid
- looking for parent entity through each given field in fields
Parameters
----------
imprt : TImports
The import to update.
parent_entity : Entity
The entity of the parent.
child_entity : Entity
The entity of the child.
id_parent : BibFields
The field containing the id of the parent.
parent_line_no : BibFields
The field containing the line number of the parent.
fields : List[BibFields]
The fields to use for matching the child with its parent in the destination table.
"""
transient_child = imprt.destination.get_transient_table()
transient_parent = aliased(transient_child, name="transient_parent")
for field in fields:
if field is None:
continue
db.session.execute(
sa.update(transient_child)
.where(
transient_child.c.id_import == imprt.id_import,
transient_child.c[child_entity.validity_column].isnot(None),
)
# We need to complete the parent_line_no only for child not on the same row than a parent
.where(transient_child.c[parent_entity.validity_column].is_(None))
# finding parent row:
.where(
transient_parent.c.id_import == imprt.id_import,
transient_parent.c[parent_entity.validity_column].isnot(None),
transient_parent.c[field.dest_column] == transient_child.c[field.dest_column],
)
.values({parent_line_no: transient_parent.c.line_no})
)
[docs]
def check_no_parent_entity(
imprt: TImports,
parent_entity: Entity,
child_entity: Entity,
id_parent: BibFields,
parent_line_no: BibFields,
) -> None:
"""
Station may be referenced:
- on the same line (station_validity is not None)
- by id_parent (parent already exists in destination)
- by parent_line_no (new parent from another line of the imported file - see set_parent_line_no)
Parameters
----------
imprt : TImports
The import to check.
parent_entity : Entity
The entity of the parent.
child_entity : Entity
The entity of the child.
id_parent : BibFields
The field containing the id of the parent.
parent_line_no : BibFields
The field containing the line number of the parent.
"""
transient_table = imprt.destination.get_transient_table()
report_erroneous_rows(
imprt,
child_entity,
error_type=ImportCodeError.NO_PARENT_ENTITY,
error_column=id_parent,
whereclause=sa.and_(
# Complains for missing parent only for valid child, as parent may be missing
# because of erroneous uuid required to find the parent.
transient_table.c[child_entity.validity_column].is_(True),
transient_table.c[parent_entity.validity_column].is_(None), # no parent on same line
transient_table.c[id_parent].is_(None), # no parent in destination
transient_table.c[parent_line_no].is_(None), # no parent on another line
),
)
[docs]
def check_erroneous_parent_entities(
imprt: TImports, parent_entity: Entity, child_entity: Entity, parent_line_no: BibFields
) -> None:
"""
Check for erroneous (not valid) parent entities in the transient table of an import.
Parameters
----------
imprt : TImports
The import to check.
parent_entity : Entity
The entity of the parent.
child_entity : Entity
The entity of the child.
parent_line_no : BibFields
The field containing the line number of the parent.
Notes
-----
# Note: if child entity reference parent entity by id_parent, this means the parent
# entity is already in destination table so obviously valid.
The error codes are:
- ERRONEOUS_PARENT_ENTITY: the parent on the same line is not valid.
"""
transient_child = imprt.destination.get_transient_table()
transient_parent = aliased(transient_child)
report_erroneous_rows(
imprt,
child_entity,
error_type=ImportCodeError.ERRONEOUS_PARENT_ENTITY,
error_column="",
whereclause=sa.and_(
transient_child.c[child_entity.validity_column].isnot(None),
sa.or_(
# parent is on the same line
transient_child.c[parent_entity.validity_column].is_(False),
sa.and_( # parent is on another line referenced by parent_line_no
transient_parent.c.id_import == transient_child.c.id_import,
transient_parent.c.line_no == transient_child.c[parent_line_no],
transient_parent.c[parent_entity.validity_column].is_(False),
),
),
),
)