Source code for dabble.statistics

# Copyright 2022 AI Singapore
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Calculates the cumulative average, minimum, and maximum of a single variable
of interest over time.
"""

import operator
from typing import Any, Dict, Optional, Union

from peekingduck.pipeline.nodes.abstract_node import AbstractNode
from peekingduck.pipeline.nodes.dabble.statisticsv1 import utils

# Order matters so that regex doesn't read ">=" as ">" or "<=" as "<"
# Dictionaries are insertion ordered from Python 3.6 onwards
OPS = {
    ">=": operator.ge,
    ">": operator.gt,
    "==": operator.eq,
    "<=": operator.le,
    "<": operator.lt,
}


[docs]class Node(AbstractNode): # pylint: disable=too-many-instance-attributes """Calculates the cumulative average, minimum, and maximum of a single variable of interest (defined as ``current result`` here) over time. The configurations for this node offer several functions to reduce the incoming data type into a single ``current result`` of type :obj:`int` or :obj:`float`, which is valid for the current video frame. ``current result`` is then used to recalculate the values of the cumulative average, minimum, and maximum for PeekingDuck's running duration thus far. The configuration for this node is described below using a combination of the `Extended BNF <https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_ and `Augmented BNF <https://en.wikipedia.org/wiki/Augmented_Backus%E2%80%93Naur_form>`_ metasyntax. Concrete examples are provided later for illustration. :: pkd_data_type = ? PeekingDuck built-in data types ? e.g. count, large_groups, obj_attrs user_data_type = ? user data types produced by custom nodes ? e.g. my_var, my_attrs dict_key = ? Python dictionary keys, with optional nesting ? e.g. ["ids"], ["details"]["age"] data_type = pkd_data_type | user_data_type target_attr = data_type | data_type "[" dict_key "]" unary_function = "identity" | "length" | "maximum" | "minimum" unary_expr = unary_function ":" target_attr num_operator = "==" | ">=" | "<=" | ">" | "<" num_operand = ? Python integers or floats ? num_comparison = num_operator num_operand str_operator = "==" str_operand = ? Python strings enclosed by single or double quotes ? str_comparison = str_operator str_operand cond_function = "cond_count" cond_expr = cond_function ":" target_attr ( num_comparison | str_comparison ) configuration = unary_expr | cond_expr Points to note: * Square brackets (``[]``) are used to define ``<dict_key>``, and should not be used elsewhere in the configuration. * Operands are processed differently depending on whether they are enclosed by single/double quotes, or not. If enclosed, the operand is assumed to be of type :obj:`str` and classified as ``<str_operand>``. If not, the operand is classified as ``<num_operand>`` and converted into :obj:`float` for further processing. The table below illustrates how configuration choices reduce the incoming data type into the ``<current result>``. +---------------------------------------+-------------------+-------------------+-------------+ | ``<pkd_data_type>``: value | ``<target_attr>`` | ``<unary_expr>`` | ``<current | | | | | result>`` | | or | | or | | | | | | | | ``<user_data_type>``: value | | ``<cond_expr>`` | | +---------------------------------------+-------------------+-------------------+-------------+ | count: 8 | count | identity: | 8 | | | | | | | | | count | | +---------------------------------------+-------------------+-------------------+-------------+ | obj_attrs: { | obj_attrs["ids"] | length: | 3 | | | | | | | | | obj_attrs["ids"] | | | ids: [1,2,4], +-------------------+-------------------+-------------+ | | obj_attrs | maximum: | 52 | | details: { | ["details"] | | | | | ["age"] | obj_attrs | | | gender: ["male","male","female"], | | ["details"] | | | | | ["age"] | | | age: [52,17,48] }} +-------------------+-------------------+-------------+ | | obj_attrs | cond_count: | 2 | | | ["details"] | | | | | ["gender"] | obj_attrs | | | | | ["details"] | | | | | ["gender"] | | | | | | | | | | == "male" | | | +-------------------+-------------------+-------------+ | | obj_attrs | cond_count: | 3 | | | ["details"] | | | | | ["age"] | obj_attrs | | | | | ["details"] | | | | | ["age"] | | | | | | | | | | < 60 | | +---------------------------------------+-------------------+-------------------+-------------+ Inputs: |all_input_data| Outputs: |cum_avg_data| Note that :term:`cum_avg` will not be updated if there are no detections. For example, if :term:`cum_avg` = 10 for video frame 1, and there are no detections in the following 500 frames, :term:`cum_avg` is still 10 for video frame 501. |cum_max_data| |cum_min_data| Configs: identity (:obj:`str`): **default=null** |br| Accepts ``<target_attr>`` of types :obj:`int` or :obj:`float`, and returns the same value. length (:obj:`str`): **default=null** |br| Accepts ``<target_attr>`` of types :obj:`List[Any]` or :obj:`Dict[str, Any]`, and returns its length. minimum (:obj:`str`): **default=null** |br| Accepts ``<target_attr>`` of types :obj:`List[float | int]` or :obj:`Dict[str, float | int]`, and returns the minimum element within for the current frame. Not to be confused with the :term:`cum_min` output data type, which represents the cumulative minimum over time. maximum (:obj:`str`): **default=null** |br| Accepts ``<target_attr>`` of types :obj:`List[float | int]` or :obj:`Dict[str, float | int]`, and returns the maximum element within for the current frame. Not to be confused with the :term:`cum_max` output data type, which represents the cumulative maximum over time. cond_count (:obj:`str`): **default=null** |br| Accepts ``<target_attr>`` of types :obj:`List[float | int | str]`, and checks if each element in the list fulfils the condition described by ``<num_comparison>`` or ``<str_comparison>``. The number of elements that fulfil the condition are counted towards ``<current result>``. """ def __init__(self, config: Dict[str, Any] = None, **kwargs: Any) -> None: super().__init__(config, node_path=__name__, **kwargs) self.cum_avg, self.cum_min, self.cum_max = 0.0, float("inf"), float("-inf") self.num_iter = 0 all_funcs = { "cond_count": self.cond_count, "identity": self.identity, "length": self.length, "minimum": self.minimum, "maximum": self.maximum, } self.stats = utils.Stats(OPS) self.data_type, self.keys = self.stats.prepare_data(all_funcs) def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]: """Calculates the average, minimum and maximum of a single variable of interest over time. Args: inputs (dict): Dictionary with all available keys. Returns: outputs (dict): Dictionary with keys "cum_avg", "cum_min" and "cum_max". """ self.curr = self.stats.get_curr_result(inputs[self.data_type], self.keys.copy()) # if no detections in this frame, do not update and return stats from previous detections if self.curr: self._update_stats(self.curr) return { "cum_avg": self.cum_avg, "cum_min": self.cum_min, "cum_max": self.cum_max, } def _get_config_types(self) -> Dict[str, Any]: """Returns dictionary mapping the node's config keys to respective types.""" return { "identity": Optional[str], "length": Optional[str], "minimum": Optional[str], "maximum": Optional[str], "cond_count": Optional[str], } def _update_stats(self, curr: Union[float, int]) -> None: """Updates the cum_avg, cum_min and cum_max values with the current value.""" if not isinstance(curr, (float, int)): raise TypeError( f"The current result has to be of type 'int' or 'float' to calculate statistics." f"However, the current result here is: '{curr}' which is of type: {type(curr)}." ) if curr < self.cum_min: self.cum_min = curr if curr > self.cum_max: self.cum_max = curr if self.num_iter == 0: self.cum_avg = curr else: self.cum_avg = (self.cum_avg * self.num_iter + curr) / (self.num_iter + 1) self.num_iter += 1