Code source de geonature.core.gn_meta.mtd.xml_parser

import datetime
import json

from flask import current_app
from lxml import etree as ET

from geonature.utils.config import config
from geonature.core.gn_meta.models import TAcquisitionFramework


[docs] namespace = config["XML_NAMESPACE"]
[docs] _xml_parser = ET.XMLParser(ns_clean=True, recover=True, encoding="utf-8")
[docs] def get_tag_content(parent, tag_name, default_value=None): """ Return the content of a xml tag Check if the node exist or return a default value Params: parent (etree Element): the parent where find the tag tag_name (str): the name of the tag default_value (any): the default value f the tag doesn't exist Return any: the tag content or the default value """ tag = parent.find(namespace + tag_name) if tag is not None: if tag.text and len(tag.text) > 0: return tag.text return default_value
[docs] def parse_actors_xml(actors): """ Parse the parameters of the Actor provided as an XML node in the input variable "actors" Param: actors (etree Element): Node of an actor type containing from one to multiple actors Returns: dict: A dictionnary of the actors informations """ actor_list = [] if actors is not None: for actor_node in actors: name = get_tag_content(actor_node, "nomPrenom") actor_role = get_tag_content(actor_node, "roleActeur") uuid_organism = get_tag_content(actor_node, "idOrganisme") organism = get_tag_content(actor_node, "organisme") email = get_tag_content(actor_node, "mail") actor_list.append( { "name": name, "uuid_organism": uuid_organism, "organism": organism, "actor_role": actor_role, "email": email, } ) return actor_list
[docs] def parse_acquisition_framwork_xml(xml): """ Parse an xml of AF from a string Return: dict: a dict of the parsed xml """ root = ET.fromstring(xml, parser=_xml_parser) ca = root.find(".//" + namespace + "CadreAcquisition") return parse_acquisition_framework(ca)
[docs] def parse_acquisition_framework(ca): # We extract all the required informations from the different tags of the XML file ca_uuid = get_tag_content(ca, "identifiantCadre") ca_name_max_length = TAcquisitionFramework.acquisition_framework_name.property.columns[ 0 ].type.length ca_name = get_tag_content(ca, "libelle")[: ca_name_max_length - 1] ca_desc = get_tag_content(ca, "description", default_value="") date_info = ca.find(namespace + "ReferenceTemporelle") ca_create_date = get_tag_content(ca, "dateCreationMtd", default_value=datetime.datetime.now()) ca_update_date = get_tag_content(ca, "dateMiseAJourMtd") ca_start_date = get_tag_content( date_info, "dateLancement", default_value=datetime.datetime.now() ) ca_end_date = get_tag_content(date_info, "dateCloture") ca_id_digitizer = None attributs_additionnels_node = ca.find(namespace + "attributsAdditionnels") # We extract the ID of the user to assign it the JDD as an id_digitizer for attr in attributs_additionnels_node: if get_tag_content(attr, "nomAttribut") == "ID_CREATEUR": ca_id_digitizer = get_tag_content(attr, "valeurAttribut") # We search for all the Contact nodes : # - Main contact in acteurPrincipal node # - Funder in acteurAutre node # - Project owner in acteurAutre node # - Project manager in acteurAutre node list_contact_tags = ["acteurPrincipal", "acteurAutre"] all_actors = [] for contact_tag in list_contact_tags: if get_tag_content(ca, contact_tag) is not None: for actor_node in ca.findall(namespace + contact_tag): actor = parse_actors_xml(actor_node) all_actors = all_actors + actor return { "unique_acquisition_framework_id": ca_uuid, "acquisition_framework_name": ca_name, "acquisition_framework_desc": ca_desc, "acquisition_framework_start_date": ca_start_date, "acquisition_framework_end_date": ca_end_date, "meta_create_date": ca_create_date, "meta_update_date": ca_update_date, "id_digitizer": ca_id_digitizer, "actors": all_actors, }
[docs] def parse_jdd_xml(xml): """ Parse an xml of datasets from a string Return: list: a list of dict of the JDD in the xml """ root = ET.fromstring(xml, parser=_xml_parser) jdd_list = [] for jdd in root.findall(".//" + namespace + "JeuDeDonnees"): # We extract all the required informations from the different tags of the XML file jdd_uuid = get_tag_content(jdd, "identifiantJdd") ca_uuid = get_tag_content(jdd, "identifiantCadre") dataset_name = get_tag_content(jdd, "libelle") dataset_shortname = get_tag_content(jdd, "libelleCourt", default_value="") dataset_desc = get_tag_content(jdd, "description", default_value="") terrestrial_domain = get_tag_content(jdd, "domaineTerrestre", default_value=False) marine_domain = get_tag_content(jdd, "domaineMarin", default_value=False) data_type = get_tag_content(jdd, "typeDonnees") collect_data_type = get_tag_content(jdd, "typeDonneesCollectees") create_date = get_tag_content(jdd, "dateCreation", default_value=datetime.datetime.now()) update_date = get_tag_content(jdd, "dateRevision") attributs_additionnels_node = jdd.find(namespace + "attributsAdditionnels") # We extract the ID of the user to assign it the JDD as an id_digitizer id_digitizer = None id_instance = None code_statut_donnees_source = None for attr in attributs_additionnels_node: if get_tag_content(attr, "nomAttribut") == "ID_CREATEUR": id_digitizer = get_tag_content(attr, "valeurAttribut") if get_tag_content(attr, "nomAttribut") == "ID_INSTANCE": id_instance = get_tag_content(attr, "valeurAttribut") if get_tag_content(attr, "nomAttribut") == "CODE_STATUT_DONNEES_SOURCE": code_statut_donnees_source = get_tag_content(attr, "valeurAttribut") # We search for all the Contact nodes : # - Main contact in pointContactPF node # - JDD provider in pointContactJdd node # - JDD builder in pointContactJdd node # - Database contact in contactBaseProduction node list_contact_tags = ["pointContactPF", "pointContactJdd", "contactBaseProduction"] all_actors = [] for contact_tag in list_contact_tags: if contact_tag == "contactBaseProduction": contact_node = jdd.find(namespace + "BaseProduction") else: contact_node = jdd if get_tag_content(contact_node, contact_tag) is not None: for actor_node in contact_node.findall(namespace + contact_tag): actor = parse_actors_xml(actor_node) all_actors = all_actors + actor keywords = None # We build the JDD data from all the variables collected from the XML file current_jdd = { "unique_dataset_id": jdd_uuid, "uuid_acquisition_framework": ca_uuid, "dataset_name": dataset_name if len(dataset_name) < 256 else f"{dataset_name[:253]}...", "dataset_shortname": dataset_shortname, "dataset_desc": ( dataset_desc if len(dataset_name) < 256 else f"Nom complet du jeu de données dans MTD : {dataset_name}\n {dataset_desc}" ), "keywords": keywords, "terrestrial_domain": json.loads(terrestrial_domain), "marine_domain": json.loads(marine_domain), "cd_nomenclature_data_type": data_type, "id_digitizer": id_digitizer, "cd_nomenclature_data_origin": code_statut_donnees_source, "actors": all_actors, "meta_create_date": create_date, "meta_update_date": update_date, } # filter with id_instance if current_app.config["MTD"]["ID_INSTANCE_FILTER"]: if id_instance and id_instance == str(current_app.config["MTD"]["ID_INSTANCE_FILTER"]): jdd_list.append(current_jdd) else: jdd_list.append(current_jdd) return jdd_list