Source code for model.movenet

# Copyright 2022 AI Singapore
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""🕺 Fast Pose Estimation model."""

from typing import Any, Dict, Optional

import cv2
import numpy as np

from peekingduck.pipeline.nodes.abstract_node import AbstractNode
from peekingduck.pipeline.nodes.model.movenetv1 import movenet_model


[docs]class Node(AbstractNode):
    """MoveNet node that initializes a MoveNet model to detect human poses from
    an image.

    The MoveNet node is capable of detecting up to 6 human figures for
    multipose lightning and single person for singlepose lightning/thunder. If
    there are more than 6 persons in the image, multipose lightning will only
    detect 6. This also applies to singlepose models, where only 1 person will
    be detected in a multi persons image, do take note that detection
    performance will suffer when using singlepose models on multi persons
    images. 17 keypoints are estimated and the keypoint indices table can be
    found :ref:`here <whole-body-keypoint-ids>`.

    Inputs:
        |img_data|

    Outputs:
        |bboxes_data|

        |keypoints_data|

        |keypoint_scores_data|

        |keypoint_conns_data|

        |bbox_labels_data|

    Configs:
        model_format (:obj:`str`): **{"tensorflow", "tensorrt"},
            default="tensorflow"** |br|
            Defines the weights format of the model.
        model_type (:obj:`str`):
            **{"
            singlepose_lightning", "singlepose_thunder", "multipose_lightning"
            },  default="multipose_lightning"** |br|
            Defines the detection model for MoveNet either single or multi pose.
            Lightning is smaller and faster but less accurate than Thunder
            version.
        weights_parent_dir (:obj:`Optional[str]`): **default = null**. |br|
            Change the parent directory where weights will be stored by
            replacing ``null`` with an absolute path to the desired directory.
        bbox_score_threshold (:obj:`float`): **[0,1], default = 0.2** |br|
            Detected bounding box confidence score threshold, only boxes above
            threshold will be kept in the output.
        keypoint_score_threshold (:obj:`float`): **[0,1], default = 0.3** |br|
            Detected keypoints confidence score threshold, only keypoints above
            threshold will be kept in output.
    """

    def __init__(self, config: Dict[str, Any] = None, **kwargs: Any) -> None:
        super().__init__(config, node_path=__name__, **kwargs)
        self.model = movenet_model.MoveNetModel(self.config)

    def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        """Function that reads the image input and returns the bboxes,
        keypoints, keypoints confidence scores, keypoint connections and
        bounding box labels of the persons detected.

        Args:
            inputs (Dict[str, Any]): Dictionary of inputs with key "img".

        Returns:
            (Dict[str, Any]): bbox output in dictionary format with keys
            "bboxes", "keypoints", "keypoint_scores", "keypoint_conns", and
            "bbox_labels".
        """
        image = cv2.cvtColor(inputs["img"], cv2.COLOR_BGR2RGB)
        bboxes, keypoints, keypoint_scores, keypoint_conns = self.model.predict(image)
        bbox_labels = np.array(["person"] * len(bboxes))
        bboxes = np.clip(bboxes, 0, 1)

        return {
            "bboxes": bboxes,
            "bbox_labels": bbox_labels,
            "keypoints": keypoints,
            "keypoint_conns": keypoint_conns,
            "keypoint_scores": keypoint_scores,
        }

    def _get_config_types(self) -> Dict[str, Any]:
        """Returns dictionary mapping the node's config keys to respective types."""
        return {
            "bbox_score_threshold": float,
            "keypoint_score_threshold": float,
            "model_format": str,
            "model_type": str,
            "weights_parent_dir": Optional[str],
        }