Source code for dabble.statistics

# Copyright 2022 AI Singapore
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Calculates the cumulative average, minimum, and maximum of a single variable
of interest over time.
"""

import operator
from typing import Any, Dict, Optional, Union

from peekingduck.pipeline.nodes.abstract_node import AbstractNode
from peekingduck.pipeline.nodes.dabble.statisticsv1 import utils

# Order matters so that regex doesn't read ">=" as ">" or "<=" as "<"
# Dictionaries are insertion ordered from Python 3.6 onwards
OPS = {
    ">=": operator.ge,
    ">": operator.gt,
    "==": operator.eq,
    "<=": operator.le,
    "<": operator.lt,
}


[docs]class Node(AbstractNode):  # pylint: disable=too-many-instance-attributes
    """Calculates the cumulative average, minimum, and maximum of a single
    variable of interest (defined as ``current result`` here) over time. The
    configurations for this node offer several functions to reduce the incoming
    data type into a single ``current result`` of type :obj:`int` or
    :obj:`float`, which is valid for the current video frame.
    ``current result`` is then used to recalculate the values of the cumulative
    average, minimum, and maximum for PeekingDuck's running duration thus far.

    The configuration for this node is described below using a combination of
    the `Extended BNF
    <https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_ and
    `Augmented BNF
    <https://en.wikipedia.org/wiki/Augmented_Backus%E2%80%93Naur_form>`_
    metasyntax. Concrete examples are provided later for illustration. ::

        pkd_data_type   = ? PeekingDuck built-in data types ?
                          e.g. count, large_groups, obj_attrs
        user_data_type  = ? user data types produced by custom nodes ?
                          e.g. my_var, my_attrs
        dict_key        = ? Python dictionary keys, with optional nesting ?
                          e.g. ["ids"], ["details"]["age"]
        data_type       = pkd_data_type | user_data_type
        target_attr     = data_type | data_type "[" dict_key "]"

        unary_function  = "identity" | "length" | "maximum" | "minimum"
        unary_expr      = unary_function ":" target_attr

        num_operator    = "==" | ">=" | "<=" | ">" | "<"
        num_operand     = ? Python integers or floats ?
        num_comparison  = num_operator num_operand

        str_operator    = "=="
        str_operand     = ? Python strings enclosed by single or double quotes ?
        str_comparison  = str_operator str_operand

        cond_function   = "cond_count"
        cond_expr       = cond_function ":" target_attr ( num_comparison | str_comparison )

        configuration   = unary_expr | cond_expr

    Points to note:

        * Square brackets (``[]``) are used to define ``<dict_key>``, and
          should not be used elsewhere in the configuration.
        * Operands are processed differently depending on whether they are
          enclosed by single/double quotes, or not. If enclosed, the operand is
          assumed to be of type :obj:`str` and classified as ``<str_operand>``.
          If not, the operand is classified as ``<num_operand>`` and converted
          into :obj:`float` for further processing.

    The table below illustrates how configuration choices reduce the incoming
    data type into the ``<current result>``.

    +---------------------------------------+-------------------+-------------------+-------------+
    | ``<pkd_data_type>``: value            | ``<target_attr>`` | ``<unary_expr>``  | ``<current  |
    |                                       |                   |                   | result>``   |
    | or                                    |                   | or                |             |
    |                                       |                   |                   |             |
    | ``<user_data_type>``: value           |                   | ``<cond_expr>``   |             |
    +---------------------------------------+-------------------+-------------------+-------------+
    | count: 8                              | count             | identity:         | 8           |
    |                                       |                   |                   |             |
    |                                       |                   | count             |             |
    +---------------------------------------+-------------------+-------------------+-------------+
    | obj_attrs: {                          | obj_attrs["ids"]  | length:           | 3           |
    |                                       |                   |                   |             |
    |                                       |                   | obj_attrs["ids"]  |             |
    |   ids: [1,2,4],                       +-------------------+-------------------+-------------+
    |                                       | obj_attrs         | maximum:          | 52          |
    |   details: {                          | ["details"]       |                   |             |
    |                                       | ["age"]           | obj_attrs         |             |
    |     gender: ["male","male","female"], |                   | ["details"]       |             |
    |                                       |                   | ["age"]           |             |
    |     age: [52,17,48] }}                +-------------------+-------------------+-------------+
    |                                       | obj_attrs         | cond_count:       | 2           |
    |                                       | ["details"]       |                   |             |
    |                                       | ["gender"]        | obj_attrs         |             |
    |                                       |                   | ["details"]       |             |
    |                                       |                   | ["gender"]        |             |
    |                                       |                   |                   |             |
    |                                       |                   | == "male"         |             |
    |                                       +-------------------+-------------------+-------------+
    |                                       | obj_attrs         | cond_count:       | 3           |
    |                                       | ["details"]       |                   |             |
    |                                       | ["age"]           | obj_attrs         |             |
    |                                       |                   | ["details"]       |             |
    |                                       |                   | ["age"]           |             |
    |                                       |                   |                   |             |
    |                                       |                   | < 60              |             |
    +---------------------------------------+-------------------+-------------------+-------------+

    Inputs:
        |all_input_data|

    Outputs:
        |cum_avg_data|

        Note that :term:`cum_avg` will not be updated if there are no
        detections. For example, if :term:`cum_avg` = 10 for video frame 1, and
        there are no detections in the following 500 frames, :term:`cum_avg` is
        still 10 for video frame 501.

        |cum_max_data|

        |cum_min_data|

    Configs:
        identity (:obj:`str`): **default=null** |br|
            Accepts ``<target_attr>`` of types :obj:`int` or :obj:`float`, and
            returns the same value.
        length (:obj:`str`): **default=null** |br|
            Accepts ``<target_attr>`` of types :obj:`List[Any]` or
            :obj:`Dict[str, Any]`, and returns its length.
        minimum (:obj:`str`): **default=null** |br|
            Accepts ``<target_attr>`` of types :obj:`List[float | int]` or
            :obj:`Dict[str, float | int]`, and returns the minimum element
            within for the current frame. Not to be confused with the
            :term:`cum_min` output data type, which represents the cumulative
            minimum over time.
        maximum (:obj:`str`): **default=null** |br|
            Accepts ``<target_attr>`` of types :obj:`List[float | int]` or
            :obj:`Dict[str, float | int]`, and returns the maximum element
            within for the current frame. Not to be confused with the
            :term:`cum_max` output data type, which represents the cumulative
            maximum over time.
        cond_count (:obj:`str`): **default=null** |br|
            Accepts ``<target_attr>`` of types :obj:`List[float | int | str]`,
            and checks if each element in the list fulfils the condition
            described by ``<num_comparison>`` or ``<str_comparison>``. The
            number of elements that fulfil the condition are counted towards
            ``<current result>``.
    """

    def __init__(self, config: Dict[str, Any] = None, **kwargs: Any) -> None:
        super().__init__(config, node_path=__name__, **kwargs)
        self.cum_avg, self.cum_min, self.cum_max = 0.0, float("inf"), float("-inf")
        self.num_iter = 0
        all_funcs = {
            "cond_count": self.cond_count,
            "identity": self.identity,
            "length": self.length,
            "minimum": self.minimum,
            "maximum": self.maximum,
        }
        self.stats = utils.Stats(OPS)
        self.data_type, self.keys = self.stats.prepare_data(all_funcs)

    def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        """Calculates the average, minimum and maximum of a single variable of interest over time.

        Args:
            inputs (dict): Dictionary with all available keys.

        Returns:
            outputs (dict): Dictionary with keys "cum_avg", "cum_min" and "cum_max".
        """

        self.curr = self.stats.get_curr_result(inputs[self.data_type], self.keys.copy())

        # if no detections in this frame, do not update and return stats from previous detections
        if self.curr:
            self._update_stats(self.curr)

        return {
            "cum_avg": self.cum_avg,
            "cum_min": self.cum_min,
            "cum_max": self.cum_max,
        }

    def _get_config_types(self) -> Dict[str, Any]:
        """Returns dictionary mapping the node's config keys to respective types."""
        return {
            "identity": Optional[str],
            "length": Optional[str],
            "minimum": Optional[str],
            "maximum": Optional[str],
            "cond_count": Optional[str],
        }

    def _update_stats(self, curr: Union[float, int]) -> None:
        """Updates the cum_avg, cum_min and cum_max values with the current value."""
        if not isinstance(curr, (float, int)):
            raise TypeError(
                f"The current result has to be of type 'int' or 'float' to calculate statistics."
                f"However, the current result here is: '{curr}' which is of type: {type(curr)}."
            )

        if curr < self.cum_min:
            self.cum_min = curr
        if curr > self.cum_max:
            self.cum_max = curr
        if self.num_iter == 0:
            self.cum_avg = curr
        else:
            self.cum_avg = (self.cum_avg * self.num_iter + curr) / (self.num_iter + 1)
        self.num_iter += 1