Source code for dabble.camera_calibration

# Copyright 2022 AI Singapore
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Calculates camera coefficients to be used to
remove distortion from a wide-angle camera image.
"""

import math
import time
from pathlib import Path
from typing import Any, Dict, List

import cv2
import numpy as np
import yaml

from peekingduck.pipeline.nodes.abstract_node import AbstractNode
from peekingduck.pipeline.nodes.draw.utils.constants import BLACK, CHAMPAGNE, TOMATO

# global constants
# terminal criteria for subpixel finetuning
TERMINATION_CRITERIA = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)

NUM_PICTURES = 5

# constants for positioning of boxes and text
TOP_LEFT = 0
TOP_RIGHT = 1
BOTTOM_LEFT = 2
BOTTOM_RIGHT = 3
MIDDLE = 4

# constants for _check_corners_validity
AREA_THRESHOLD = 3 / 4
CORNERS_OK = 0
IMAGE_TOO_SMALL = 1
NOT_IN_BOX = 2

# displayed messages
DEFAULT_TEXT = ["PLACE BOARD HERE"]
TOO_SMALL = ["MOVE BOARD CLOSER"]
DETECTION_SUCCESS = ["DETECTION SUCCESSFUL!", "PRESS ANY KEY TO CONTINUE."]
DETECTION_COMPLETE = ["DETECTION COMPLETE!", "PRESS ANY KEY TO EXIT."]
MAX_LEN_TEXT = "PRESS ANY KEY TO CONTINUE."

# constants for drawing
BGND_BOX_OPACITY = 0.75
BOX_WIDTH_RATIO = 1 / 3  # relative to the window width
BOX_HEIGHT_RATIO = 1 / 2  # relative to the window height
TEXT_PADDING = 5

# constants for font drawing
# 1280 is used as the reference point for the ratio
BOX_THICKNESS_RATIO = 2 / 1280
NORMAL_FONT_SCALE_RATIO = 0.84 / 1280
COUNTDOWN_FONT_SCALE_RATIO = 10 / 1280
NORMAL_FONT_THICKNESS_RATIO = 2 / 1280
COUNTDOWN_FONT_THICKNESS_RATIO = 8 / 1280


[docs]class Node(AbstractNode): """Calculates camera coefficients for `undistortion <https://docs.opencv.org/4.x/dc/dbb/tutorial_py_calibration.html>`_. To calculate your camera, first download the following checkerboard and print it out in a suitable size and attach it to a hard surface, or display it on a sufficiently large device screen, such as a computer or a tablet. For most use cases, an A4-sized checkerboard works well, but depending on the position and distance of the camera, a bigger checkerboard may be required. .. image:: /assets/api/checkerboard.png :width: 20 % Next, create an empty ``pipeline_config.yml`` in your project folder and modify it as follows: .. code-block:: yaml :linenos: nodes: - input.visual: source: 0 # change this to the camera you are using threading: True mirror_image: True - dabble.camera_calibration - output.screen Run the above pipeline with :greenbox:`peekingduck run`. If you are unfamiliar with the pipeline file and running peekingduck, you may refer to the :doc:`HelloCV tutorial </tutorials/01_hello_cv>`. |br| You should see a display of your camera with some instructions overlaid. Follow the instructions to position the checkerboard at 5 different positions in the camera. If the process is successful, the camera coefficients will be calculated and written to a file and you can start using the :mod:`augment.undistort` node. Inputs: |img_data| Outputs: |img_data| Configs: num_corners (:obj:`List[int]`): **default = [10, 7]**. |br| A list containing the number of internal corners along the vertical and horizontal axes. For example, in the given image above, the checkerboard is of size 11x8, so the number of internal corners is 10x7. If you are using the given checkerboard above, you do not need to change this parameter. scale_factor (:obj:`int`): **default = 2**. |br| Factor to scale the image by when finding chessboard corners. For example, with a scale of 4, an image of size (1080 x 1920) will be scaled down to (270 x 480) when detecting the corners. Increasing this value reduces computation time. If the node is unable to detect corners, reducing this value may help. file_path (:obj:`str`): **default = "PeekingDuck/data/camera_calibration_coeffs.yml"**. |br| Path of the YML file to store the calculated camera coefficients. """ def __init__(self, config: Dict[str, Any] = None, **kwargs: Any) -> None: super().__init__(config, node_path=__name__, **kwargs) self.file_path = Path(self.file_path) # type: ignore # check if file_path has a ".yml" extension if self.file_path.suffix != ".yml": raise ValueError("Filepath must have a '.yml' extension.") if not self.file_path.exists(): self.file_path.parent.mkdir(parents=True, exist_ok=True) grid_height = self.num_corners[0] grid_width = self.num_corners[1] # prepare all object points, like (0, 0, 0), (1, 0, 0), etc. np_mgrid = np.mgrid[0:grid_height, 0:grid_width] object_points_base = np.zeros((grid_height * grid_width, 3), np.float32) object_points_base[:, :2] = np_mgrid.T.reshape(-1, 2) # arrays to store object points and image points # points in real world self.object_points: List[np.ndarray] = [object_points_base] * NUM_PICTURES self.image_points: List[np.ndarray] = [] # points on image plane self.last_detection = time.time() self.num_detections = 0 self.display_scales: Dict["str", Any["float", "int"]] = {} def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]: # type: ignore """This node calculates the camera distortion coefficients for undistortion. Args: inputs (dict): Inputs dictionary with the key `img`. Returns: outputs (dict): Outputs dictionary with the key `img`. """ img = inputs["img"] gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) height, width = img.shape[:2] self._initialize_display_scales(width) start_point, end_point, text_pos = _get_box_info( self.num_detections, width, height ) detect_corners_success = False _draw_box(img, start_point, end_point, self.display_scales["box_thickness"]) text_to_draw = [f"{DEFAULT_TEXT[0]} ({self.num_detections+1}/{NUM_PICTURES})"] # if sufficient time has passed, attempt to detect corners if time.time() - self.last_detection >= 5: detect_corners_success, corners = self._detect_corners( height, width, gray_img ) # cv2 successfully detected the corners if detect_corners_success: corners_valid = _check_corners_validity( width, height, corners, start_point, end_point ) if corners_valid == IMAGE_TOO_SMALL: text_to_draw = TOO_SMALL if detect_corners_success and corners_valid == CORNERS_OK: self.image_points.append(corners) self.num_detections += 1 self._draw_text_and_corners(img, gray_img, corners, text_pos) # wait for user to press a key cv2.waitKey(0) self.last_detection = time.time() # if we have sufficient images, calculate the coefficients and write to a file if self.num_detections == NUM_PICTURES: calibration_data = self._calculate_coeffs( img_shape=gray_img.shape[::-1] ) self._write_coeffs(calibration_data) self._calculate_error(calibration_data) return {"pipeline_end": True} self._draw_text_and_countdown(img, text_to_draw, text_pos) return {"img": img} def _get_config_types(self) -> Dict[str, Any]: """Returns dictionary mapping the node's config keys to respective types.""" return {"num_corners": List[int], "scale_factor": int, "file_path": str} def _initialize_display_scales(self, img_width: int) -> None: """Initializes display scales if it hasn't been initialized before""" if not self.display_scales: self.display_scales = { "box_thickness": max(int(img_width * BOX_THICKNESS_RATIO), 1), "normal_font_scale": img_width * NORMAL_FONT_SCALE_RATIO, "countdown_font_scale": img_width * COUNTDOWN_FONT_SCALE_RATIO, "normal_font_thickness": int(img_width * NORMAL_FONT_THICKNESS_RATIO), "countdown_font_thickness": int( img_width * COUNTDOWN_FONT_THICKNESS_RATIO ), } def _detect_corners(self, height: int, width: int, gray_img: np.ndarray) -> tuple: """Detects corners in the image""" # downscale new_h = int(height / self.scale_factor) new_w = int(width / self.scale_factor) resized_img = cv2.resize(gray_img, (new_w, new_h), interpolation=cv2.INTER_AREA) # try to find chessboard corners detect_corners_success, corners = cv2.findChessboardCorners( image=resized_img, patternSize=self.num_corners, corners=None ) if corners is not None: corners = corners * self.scale_factor return detect_corners_success, corners def _calculate_coeffs(self, img_shape: tuple) -> tuple: """Performs calculations with detected corners""" calibration_data = cv2.calibrateCamera( objectPoints=self.object_points, imagePoints=self.image_points, imageSize=img_shape, cameraMatrix=None, distCoeffs=None, ) ( calibration_success, camera_matrix, distortion_coeffs, _, _, ) = calibration_data if calibration_success: self.logger.info("Calibration successful!") self.logger.info(f"Camera Matrix: {camera_matrix}") self.logger.info(f"Distortion Coefficients: {distortion_coeffs}") else: raise Exception("Calibration failed. Please try again.") return calibration_data def _write_coeffs(self, calibration_data: tuple) -> None: """Writes camera coefficients to a file""" (_, camera_matrix, distortion_coeffs, _, _) = calibration_data file_data = {} file_data["camera_matrix"] = camera_matrix.tolist() file_data["distortion_coeffs"] = distortion_coeffs.tolist() yaml.dump(file_data, open(self.file_path, "w"), default_flow_style=None) def _calculate_error(self, calibration_data: tuple) -> None: """Calculates re-projection error""" ( _, camera_matrix, distortion_coeffs, rotation_vec, translation_vec, ) = calibration_data mean_error = 0 for i in range(len(self.object_points)): projected_image_points, _ = cv2.projectPoints( objectPoints=self.object_points[i], rvec=rotation_vec[i], tvec=translation_vec[i], cameraMatrix=camera_matrix, distCoeffs=distortion_coeffs, ) error = cv2.norm( src1=self.image_points[i], src2=projected_image_points, normType=cv2.NORM_L2, ) error /= len(projected_image_points) mean_error += error self.logger.info(f"Total error: {mean_error / len(self.object_points)}") def _draw_text_and_corners( self, img: np.ndarray, gray_img: np.ndarray, corners: np.ndarray, text_pos: tuple, ) -> None: """Draws text and corners on image""" # improve corner accuracy corners_accurate = cv2.cornerSubPix( image=gray_img, corners=corners, winSize=(11, 11), zeroZone=(-1, -1), criteria=TERMINATION_CRITERIA, ) # draw corners and message on the image cv2.drawChessboardCorners( image=img, patternSize=self.num_corners, corners=corners_accurate, patternWasFound=True, ) if self.num_detections != NUM_PICTURES: text_to_draw = DETECTION_SUCCESS else: text_to_draw = DETECTION_COMPLETE _draw_text( img=img, texts=text_to_draw, pos_info=text_pos, font_scale=self.display_scales["normal_font_scale"], thickness=self.display_scales["normal_font_thickness"], ) # display the image cv2.imshow("PeekingDuck", img) def _draw_text_and_countdown( self, img: np.ndarray, text_to_draw: List[str], text_pos: tuple ) -> None: """Draws text and countdown on image""" _draw_text( img=img, texts=text_to_draw, pos_info=text_pos, font_scale=self.display_scales["normal_font_scale"], thickness=self.display_scales["normal_font_thickness"], ) time_to_next_detection = math.ceil(5 - time.time() + self.last_detection) if time_to_next_detection > 0: _draw_countdown( img=img, num=time_to_next_detection, font_scale=self.display_scales["countdown_font_scale"], thickness=self.display_scales["countdown_font_thickness"], )
def _get_box_info(num: int, width: int, height: int) -> tuple: """Returns start and end points of box, and position to put text""" start_points = { TOP_LEFT: (0, 0), TOP_RIGHT: (int(width * (1 - BOX_WIDTH_RATIO)), 0), BOTTOM_LEFT: (0, int(height * (1 - BOX_HEIGHT_RATIO))), BOTTOM_RIGHT: ( int(width * (1 - BOX_WIDTH_RATIO)), int(height * (1 - BOX_HEIGHT_RATIO)), ), MIDDLE: ( int(width * (1 / 2 - BOX_WIDTH_RATIO / 2)), int(height * (1 / 2 - BOX_HEIGHT_RATIO / 2)), ), } end_points = { TOP_LEFT: (int(width * BOX_WIDTH_RATIO), int(height * BOX_HEIGHT_RATIO)), TOP_RIGHT: (width, int(height * BOX_HEIGHT_RATIO)), BOTTOM_LEFT: (int(width * BOX_WIDTH_RATIO), height), BOTTOM_RIGHT: (width, height), MIDDLE: ( int(width * (1 / 2 + BOX_WIDTH_RATIO / 2)), int(height * (1 / 2 + BOX_HEIGHT_RATIO / 2)), ), } text_positions = { TOP_LEFT: (TEXT_PADDING, int(height * BOX_HEIGHT_RATIO) - TEXT_PADDING), TOP_RIGHT: ( int(width * (1 - BOX_WIDTH_RATIO)) + TEXT_PADDING, int(height * BOX_HEIGHT_RATIO) - TEXT_PADDING, ), BOTTOM_LEFT: ( TEXT_PADDING, int(height * (1 - BOX_HEIGHT_RATIO)) + TEXT_PADDING, ), BOTTOM_RIGHT: ( int(width * (1 - BOX_WIDTH_RATIO)) + TEXT_PADDING, int(height * (1 - BOX_HEIGHT_RATIO)) + TEXT_PADDING, ), MIDDLE: ( int(width * (1 / 2 - BOX_WIDTH_RATIO / 2)) + TEXT_PADDING, int(height * (1 / 2 + BOX_HEIGHT_RATIO / 2)) - TEXT_PADDING, ), } pos_types = { TOP_LEFT: BOTTOM_LEFT, TOP_RIGHT: BOTTOM_LEFT, BOTTOM_LEFT: TOP_LEFT, BOTTOM_RIGHT: TOP_LEFT, MIDDLE: BOTTOM_LEFT, } return start_points[num], end_points[num], (text_positions[num], pos_types[num]) def _check_corners_validity( width: int, height: int, corners: np.ndarray, start_point: tuple, end_point: tuple ) -> int: """Checks whether the corners are large enough and fall within the box""" min_w = width min_h = height max_w = 0 max_h = 0 for corner in corners: min_w = min(min_w, corner[0][0]) max_w = max(max_w, corner[0][0]) min_h = min(min_h, corner[0][1]) max_h = max(max_h, corner[0][1]) area = (max_w - min_w) * (max_h - min_h) # if area is less than 1/4 of the box size if area < width * BOX_WIDTH_RATIO * height * BOX_HEIGHT_RATIO / 4: return IMAGE_TOO_SMALL # if the board is completely out of the box if ( max_w < start_point[0] or end_point[0] < min_w or max_h < start_point[1] or end_point[1] < min_h ): return NOT_IN_BOX min_w_box = max(min_w, start_point[0]) max_w_box = min(max_w, end_point[0]) min_h_box = max(min_h, start_point[1]) max_h_box = min(max_h, end_point[1]) # check if at least 3 / 4 of the board area is within the box if (max_w_box - min_w_box) * (max_h_box - min_h_box) < area * AREA_THRESHOLD: return NOT_IN_BOX return CORNERS_OK def _draw_box( img: np.ndarray, start_point: tuple, end_point: tuple, box_thickness: int ) -> None: """Draws rectangle on the image""" cv2.rectangle( img=img, pt1=start_point, pt2=end_point, color=(0, 0, 0), thickness=3 * box_thickness, ) cv2.rectangle( img=img, pt1=start_point, pt2=end_point, color=CHAMPAGNE, thickness=box_thickness, ) def _draw_bgnd_box( img: np.ndarray, pt1: tuple, pt2: tuple, ) -> None: """Draws background box on image""" box_img = img.copy() # draw the rectangle cv2.rectangle(box_img, pt1, pt2, BLACK, cv2.FILLED) # apply the overlay cv2.addWeighted(box_img, BGND_BOX_OPACITY, img, 1 - BGND_BOX_OPACITY, 0, img) def _draw_text( img: np.ndarray, texts: List[str], pos_info: tuple, font_scale: float, thickness: int, ) -> None: """Draws text on the image""" pos, pos_type = pos_info text_width = 0 (_, text_height), baseline = cv2.getTextSize( text=texts[0], fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness, ) for text in texts: text_width = max( text_width, cv2.getTextSize( text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness, )[0][0], ) img_width = img.shape[1] box_width = min(text_width + 10, int(img_width * BOX_WIDTH_RATIO - 10)) if pos_type == TOP_LEFT: _draw_bgnd_box( img, (pos[0], pos[1]), ( pos[0] + box_width, pos[1] + len(texts) * (text_height + baseline + 5) + baseline + 5, ), ) pos = (pos[0] + 5, pos[1] + text_height + baseline + 5) elif pos_type == BOTTOM_LEFT: _draw_bgnd_box( img, (pos[0], pos[1] - len(texts) * (text_height + baseline + 5) - baseline - 5), (pos[0] + box_width, pos[1]), ) pos = ( pos[0] + 5, pos[1] - (text_height + baseline + 5) * (len(texts) - 1) - baseline - 5, ) for text in texts: cv2.putText( img=img, text=text, org=pos, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, color=CHAMPAGNE, thickness=thickness, lineType=cv2.LINE_AA, ) pos = (pos[0], pos[1] + text_height + baseline + 5) def _draw_countdown( img: np.ndarray, num: int, font_scale: float, thickness: int ) -> None: """Draws a countdown in the center of the screen""" height, width = img.shape[:2] text = str(num) text_width, text_height = cv2.getTextSize( text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness, )[0] pos = (int(width / 2 - text_width / 2), int(height / 2 + text_height / 2)) cv2.putText( img=img, text=text, org=pos, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, color=BLACK, thickness=thickness * 3, lineType=cv2.LINE_AA, ) cv2.putText( img=img, text=text, org=pos, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, color=TOMATO, thickness=thickness, lineType=cv2.LINE_AA, )