Source code for niche_vlaanderen.validation

import logging
import warnings
from collections import defaultdict
from pathlib import Path

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=UserWarning)
    import geopandas as gpd

import numpy as np
import pandas as pd

from niche_vlaanderen.niche import Niche
from niche_vlaanderen.codetables import package_resource

logger = logging.getLogger(__name__)


class NicheValidationException(Exception):
    msg = "Error using Niche Overlay"


[docs] class NicheValidation(object): """Creates a new NicheValidation object Overlays a vegetation map with niche in order to obtain the accuracy of the model. Parameters: niche: niche_vlaanderen.Niche Niche object containing predicted variables types according to niche. The model must have run prior to the overlay map: Path Path to a file containing the vegetation map in one of the formats supported by fiona, eg shape. must contain these attributes: * HABx: vegetation type * pHABx: proportion of the vegetation type different vegetation types can be supplied, eg HAB1, HAB2, ... for every HABx variable a proportion must be given. mapping_file: Path | None optional file containing the mapping between habitat (HAB) code on the vegetation map and Niche vegetation types. The default mapping is a mapping of the BWK map (biologische waarderingskaart), and is part of the package at niche_vlaanderen/system_tables/hab_nich_join.csv If a mapping is provided by the user, it must contain a HAB and a NICHE column. Other columns are ignored. upscale: int optional upscale value that is used to increase the resolution of the niche rasters while doing the overlay. instead of using only the cell center, the comparison between the vegetation map and the niche model is done at upscale*upscale places in the cell. Defaults to 5. More details in https://inbo.github.io/niche_vlaanderen/validation.html id: str (optional) field to use as id for the provided map file. This id will be used in the overlay. If no id is supplied, the shape_index (row number) of the vector file will be used, starting from 0. """ def __init__(self, niche, map, mapping_file=None, upscale=5, id=None): if type(niche) is Niche: self.niche = niche else: raise ValueError( f"pass a valid Niche object - type of niche object is {type(niche)}" ) self.filename_map = map self.map = gpd.read_file(map) # the mapping columns contain are the field in the shapefile that contain a field starting with HAB{1-9} # (case insensitive) columns = self.map.columns vegetation_columns = columns[columns.str.upper().str.match("HAB[0-9]")] proportion_index = { veg: list(columns.str.upper()).index(f"PHAB{veg}") for veg in vegetation_columns.str[-1] } self.proportion_columns = { i: columns[proportion_index[i]] for i in proportion_index } if mapping_file is None: mapping_file = package_resource(["system_tables"], "hab_niche_join.csv") mapping = pd.read_csv(mapping_file) # drop any row containing niche vegetation 0 mapping = mapping[mapping["NICHE"] != 0] # convert to wide format mapping["col"] = mapping.groupby("HAB").cumcount() mapping["col"] = "NICHE_C" + (mapping["col"] + 1).astype("str") self.mapping = mapping.pivot( index="HAB", columns="col", values="NICHE" ).reset_index() niche_columns = self.map.columns[self.map.columns.str.startswith("NICH_")] self.map = self.map.drop(columns=niche_columns) self.map["area_shape"] = self.map.area / 10000 for hab_column in vegetation_columns: for nich_column in self.mapping.columns[ self.mapping.columns.str.startswith("NICHE_C") ]: logger.debug(f"habi: {hab_column}; nichj: {nich_column}") source = self.mapping[["HAB", nich_column]].rename( columns={ "HAB": hab_column, nich_column: f"NICH_{hab_column[-1]}_{nich_column[-1]}", } ) self.map = pd.merge(self.map, source, on=hab_column, how="left") self._niche_columns = self.map.columns[self.map.columns.str.startswith("NICH")] self.id = id self.overlay(upscale=upscale) def __repr__(self): o = "# Niche Validation object\n" o += f"map: {self.filename_map}\n" o += f"niche object: {self.niche.name}" return o
[docs] def overlay(self, upscale=5): """Overlays the map and the niche object""" # Remove any existing "NICH" columns present_vegetation_types = np.unique(self.map[self._niche_columns]) present_vegetation_types = present_vegetation_types[ ~np.isnan(present_vegetation_types) ] logger.debug(f"present niche types: {present_vegetation_types}") if len(present_vegetation_types) == 0: raise NicheValidationException( "No Niche vegetation present in validation map" ) # get potential presence # upscale niche rasters to that before calculating statistics self.potential_presence = self.niche.zonal_stats( self.map, outside=False, vegetation_types=present_vegetation_types, upscale=upscale, ) self.potential_presence = self.potential_presence.pivot( columns=["vegetation"], index=["presence", "shape_id"] ) self._tables = [ "area_pot", "area_nonpot", "area_nonpot_phab", "area_pot_perc_phab", "area_effective", "area_pot_perc", "veg_present", ] self.area_pot = self.potential_presence.loc["no data"]["area_ha"] * np.nan self.area_nonpot = self.potential_presence.loc["no data"]["area_ha"] * np.nan self.area_effective = self.potential_presence.loc["no data"]["area_ha"] * np.nan self.area_pot_perc = self.potential_presence.loc["no data"]["area_ha"] * np.nan self.area_pot_perc_phab = self.area_pot_perc * np.nan self.area_nonpot_phab = self.area_pot_perc * np.nan self.veg_present = self.area_pot_perc * 0 # Only if actual present: (pHAB * present) / (present + not present) for i, row in self.map.iterrows(): logger.debug(f"row: {row.area_shape}") # different mappings can exist which lead to the same vegetation # type. First we aggregate them in shape_veg which contains # the niche vegetation and the sum of its pHAB shape_veg = defaultdict(lambda: 0) for veg in self._niche_columns: if np.isfinite(row[veg]) and row[veg] != 0: pHab = row[self.proportion_columns[veg[5]]] shape_veg[int(row[veg])] += pHab for veg in shape_veg: pHab = shape_veg[veg] area_pot = ( self.potential_presence.loc["present"].loc[i].loc["area_ha"][veg] ) self.area_pot.loc[i, veg] = area_pot area_nonpot = ( self.potential_presence.loc["not present"] .loc[i] .loc["area_ha"][veg] ) self.area_nonpot.loc[i, veg] = area_nonpot area_effective = pHab * (area_pot + area_nonpot) / 100 self.area_effective.loc[i, veg] = area_effective if (area_pot + area_nonpot) == 0: warnings.warn( f"No overlap between potential vegetation map and " f"shape_id {i}" ) else: # vegetation type is present (actual presence) # used in polygon count self.veg_present.loc[i, veg] = 1 # aggregate statistics self.area_pot_perc = 100 * self.area_pot / (self.area_pot + self.area_nonpot) self.area_pot_perc_phab = np.minimum( 100 * self.area_pot / self.area_effective, 100 ) self.area_nonpot_phab = np.maximum(0, self.area_effective - self.area_pot) # Set id column for every polygon based table if self.id is not None: for table in self._tables: setattr(self, table, getattr(self, table).set_index(self.map[self.id])) # create summary table per vegetation type summary = {} summary["area_effective"] = self.area_effective.sum() summary["nonpot"] = self.area_nonpot.sum() summary["nonpot_phab"] = self.area_nonpot_phab.sum() summary["pot"] = self.area_pot.sum() summary["polygon_count"] = self.veg_present.sum() summary["score"] = 100 * summary["pot"] / (summary["pot"] + summary["nonpot"]) summary["score_phab"] = ( 100 * summary["pot"] / (summary["pot"] + summary["nonpot_phab"]) ) self.summary = pd.DataFrame(summary)
[docs] def write(self, path, overwrite_files=False): """Write the result of the validation to a folder path: string | pathlib.Path Output folder to which files will be written. Parent directory must already exist. overwrite_files: bool Overwrite files when saving. Note writing will fail if any of the files to be written already exists. """ path = Path(path) if path.exists() and not overwrite_files: if not path.is_dir(): raise NicheValidationException(f"path {path} is not an empty folder") if any(path.iterdir()): raise NicheValidationException(f"path {path} exists and is not empty") path.mkdir(parents=True, exist_ok=True) # save individual tables for t in self._tables: getattr(self, t).dropna(axis=0, how="all").to_csv(path / (t + ".csv")) self.potential_presence.to_csv(path / "potential_presence.csv") self.summary.to_csv(path / "summary.csv") with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=FutureWarning) self.joined_map().to_file(path / "validation.gpkg")
[docs] def joined_map(self): """Create a geopandas dataframe with all tables joined""" merged = self.map if self.id: merged = merged.set_index(self.id) for table in self._tables: merged = merged.join(getattr(self, table).add_prefix(f"{table}_")) return merged