Source code for pywhip.reporters

# -*- coding: utf-8 -*-

from collections import Mapping, defaultdict


[docs]class WhipReportException(Exception): """Raised when the reporting of the errors contains errors""" pass
[docs]class SpecificationErrorHandler(Mapping): """Class handler for field-rule entity reporting Attributes ---------- constraint : str The constraint linked to the specification (field-rule combination), expressed as string _samples : defaultdict(set) Dictionary with wrong data values as keys and the corresponding row identifiers as values. Notes ----- The :class:`~pywhip.reporters.SpecificationErrorHandler` class is basically an enriched dictionary (using :term:`mapping`), directly building on top of a :class:`~collections.defaultdict` with the (wrong) values as keys and a :class:`set` as values to add (unique) rows for which that value occurs. """ # TODO: add control that keys are tuples only (__setitem__) def __init__(self, constraint): self._samples = defaultdict(set) self.constraint = constraint def __getitem__(self, key): return self._samples[key] def __iter__(self): return iter(self._samples) def __len__(self): return len(self._samples) def _unique_value_messages(self): """Check if all values are linked to a single message""" return len(set([value[0] for value in self.keys()])) == len( set(self.keys())) def _failed_rows(self): """Overview of the failed row identifiers""" row_ids = set() for _, values in self.items(): row_ids.update(values) return row_ids
[docs] def build_error_report(self, total_rows_count, top_n): """Convert defaultdict to regular dict for json reporting Parameters ----------- total_rows_count : int Total rows of the current document working with, used to calculate passed rows as well top_n : int Number of samples (ordered on the number of rows) to retain for reporting purposes Notes ----- :meth:`~pywhip.reporters.SpecificationErrorHandler.build_error_report` combines the information contained by the :attr:`~pywhip.reporters.SpecificationErrorHandler._samples` attribute, for example:: { ("07241981", "string format ...") : [2, 3, 5, 6], ("value", "message as provided by error") : [1, 2, 6,] } together with the other attributes into a json-style report:: {"constraint": "%Y-%m-%d, %Y-%m, %Y", "failed_rows": 23, "passed_rows": 3, "samples": { "07241981": { "failed_rows": 4, "first_row": 2, "message": "string format ..." }, "value": { "failed_rows": n_rows, "first_row": minimum of row identifiers, "message": "message as provided by error" } } } """ if not self._unique_value_messages(): raise WhipReportException("Not all value-message " "combinations unique!") samples = {} for (value, message) in sorted(self, key=lambda k: len(self[k]), reverse=True)[:top_n]: row_id_list = self[(value, message)] samples[value] = {'message': message, 'first_row': min(row_id_list), 'failed_rows': len(row_id_list)} failed_rows_count = len(self._failed_rows()) return {'constraint': self.constraint, 'passed_rows': total_rows_count - failed_rows_count, 'failed_rows': failed_rows_count, 'samples': samples}