Source code for model.movenet

# Copyright 2022 AI Singapore
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""🕺 Fast Pose Estimation model."""

from typing import Any, Dict, Optional

import cv2
import numpy as np

from peekingduck.pipeline.nodes.abstract_node import AbstractNode
from peekingduck.pipeline.nodes.model.movenetv1 import movenet_model


[docs]class Node(AbstractNode): """MoveNet node that initializes a MoveNet model to detect human poses from an image. The MoveNet node is capable of detecting up to 6 human figures for multipose lightning and single person for singlepose lightning/thunder. If there are more than 6 persons in the image, multipose lightning will only detect 6. This also applies to singlepose models, where only 1 person will be detected in a multi persons image, do take note that detection performance will suffer when using singlepose models on multi persons images. 17 keypoints are estimated and the keypoint indices table can be found :ref:`here <whole-body-keypoint-ids>`. Inputs: |img_data| Outputs: |bboxes_data| |keypoints_data| |keypoint_scores_data| |keypoint_conns_data| |bbox_labels_data| Configs: model_format (:obj:`str`): **{"tensorflow", "tensorrt"}, default="tensorflow"** |br| Defines the weights format of the model. model_type (:obj:`str`): **{" singlepose_lightning", "singlepose_thunder", "multipose_lightning" }, default="multipose_lightning"** |br| Defines the detection model for MoveNet either single or multi pose. Lightning is smaller and faster but less accurate than Thunder version. weights_parent_dir (:obj:`Optional[str]`): **default = null**. |br| Change the parent directory where weights will be stored by replacing ``null`` with an absolute path to the desired directory. bbox_score_threshold (:obj:`float`): **[0,1], default = 0.2** |br| Detected bounding box confidence score threshold, only boxes above threshold will be kept in the output. keypoint_score_threshold (:obj:`float`): **[0,1], default = 0.3** |br| Detected keypoints confidence score threshold, only keypoints above threshold will be kept in output. """ def __init__(self, config: Dict[str, Any] = None, **kwargs: Any) -> None: super().__init__(config, node_path=__name__, **kwargs) self.model = movenet_model.MoveNetModel(self.config) def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]: """Function that reads the image input and returns the bboxes, keypoints, keypoints confidence scores, keypoint connections and bounding box labels of the persons detected. Args: inputs (Dict[str, Any]): Dictionary of inputs with key "img". Returns: (Dict[str, Any]): bbox output in dictionary format with keys "bboxes", "keypoints", "keypoint_scores", "keypoint_conns", and "bbox_labels". """ image = cv2.cvtColor(inputs["img"], cv2.COLOR_BGR2RGB) bboxes, keypoints, keypoint_scores, keypoint_conns = self.model.predict(image) bbox_labels = np.array(["person"] * len(bboxes)) bboxes = np.clip(bboxes, 0, 1) return { "bboxes": bboxes, "bbox_labels": bbox_labels, "keypoints": keypoints, "keypoint_conns": keypoint_conns, "keypoint_scores": keypoint_scores, } def _get_config_types(self) -> Dict[str, Any]: """Returns dictionary mapping the node's config keys to respective types.""" return { "bbox_score_threshold": float, "keypoint_score_threshold": float, "model_format": str, "model_type": str, "weights_parent_dir": Optional[str], }