Source code for machinevisiontoolbox.ImageFiducials

"""
Detection of fiducial markers (ArUco, AprilTag, QR code) in images.
"""

from __future__ import annotations

import math
from abc import abstractmethod
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

import cv2
import matplotlib.pyplot as plt
import numpy as np
from ansitable import ANSITable, Column
from spatialmath import SE3

from machinevisiontoolbox.ImagePointFeatures import BaseFeature2D

# https://docs.opencv.org/4.4.0/d7/d60/classcv_1_1SIFT.html


if TYPE_CHECKING:
    from machinevisiontoolbox.ImageCore import Image
    from machinevisiontoolbox._image_typing import _ImageBase


def _fiducial_dict(dict="4x4_1000"):
    tag_dict = {
        "4x4_50": cv2.aruco.DICT_4X4_50,
        "4x4_100": cv2.aruco.DICT_4X4_100,
        "4x4_250": cv2.aruco.DICT_4X4_250,
        "4x4_1000": cv2.aruco.DICT_4X4_1000,
        "5x5_50": cv2.aruco.DICT_5X5_50,
        "5x5_100": cv2.aruco.DICT_5X5_100,
        "5x5_250": cv2.aruco.DICT_5X5_250,
        "5x5_1000": cv2.aruco.DICT_5X5_1000,
        "6x6_50": cv2.aruco.DICT_6X6_50,
        "6x6_100": cv2.aruco.DICT_6X6_100,
        "6x6_250": cv2.aruco.DICT_6X6_250,
        "6x6_1000": cv2.aruco.DICT_6X6_1000,
        "7x7_50": cv2.aruco.DICT_7X7_50,
        "7x7_100": cv2.aruco.DICT_7X7_100,
        "7x7_250": cv2.aruco.DICT_7X7_250,
        "7x7_1000": cv2.aruco.DICT_7X7_1000,
        "original": cv2.aruco.DICT_ARUCO_ORIGINAL,
        "16h5": cv2.aruco.DICT_APRILTAG_16h5,
        "25h9": cv2.aruco.DICT_APRILTAG_25h9,
        "36h10": cv2.aruco.DICT_APRILTAG_36h10,
        "36h11": cv2.aruco.DICT_APRILTAG_36h11,
    }
    if isinstance(dict, str):
        return cv2.aruco.getPredefinedDictionary(dict=tag_dict[dict])
    else:
        return dict


class ImageFiducialsMixin(_ImageBase if TYPE_CHECKING else object):
    def fiducial(
        self,
        dict: str = "4x4_1000",
        K: np.ndarray | None = None,
        side: float | None = None,
    ) -> list[Fiducial]:
        """
        Find fiducial markers in image

        :param dict: marker type, defaults to "4x4_1000"
        :type dict: str, optional
        :param K: camera intrinsics, defaults to None
        :type K: ndarray(3,3), optional
        :param side: side length of the marker, defaults to None
        :type side: float, optional
        :return: markers found in image
        :rtype: list of :class:`Fiducial` instances

        Find ArUco or ApriTag markers in the scene and return a list of
        :class:`Fiducial` objects, one per marker.  If camera intrinsics are
        provided then also compute the marker pose with respect to the camera.

        ``dict`` specifies the marker family or dictionary and describes the
        number of bits in the tag and the number of usable unique tags.

        ============  ========   ===========   =====================
        dict          tag type   marker size   number of unique tags
        ============  ========   ===========   =====================
        ``4x4_50``    Aruco      4x4           50
        ``4x4_100``   Aruco      4x4           100
        ``4x4_250``   Aruco      4x4           250
        ``4x4_1000``  Aruco      4x4           1000
        ``5x5_50``    Aruco      5x5           50
        ``5x5_100``   Aruco      5x5           100
        ``5x5_250``   Aruco      5x5           250
        ``5x5_1000``  Aruco      5x5           1000
        ``6x6_50``    Aruco      6x6           50
        ``6x6_100``   Aruco      6x6           100
        ``6x6_250``   Aruco      6x6           250
        ``6x6_1000``  Aruco      6x6           1000
        ``7x7_50``    Aruco      7x7           50
        ``7x7_100``   Aruco      7x7           100
        ``7x7_250``   Aruco      7x7           250
        ``7x7_1000``  Aruco      7x7           1000
        ``original``  Aruco      ?             ?
        ``16h5``      AprilTag   4x4           30
        ``25h9``      AprilTag   5x5           35
        ``36h10``     AprilTag   6x6           ?
        ``36h11``     AprilTag   6x6           587
        ============  ========   ===========   =====================

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('tags.png')
            >>> fiducials = img.fiducial('5x5_50')
            >>> fiducials
            >>> fiducials[0].corners

        .. note:: ``side`` is the dimension of the square that contains the
            small white squares inside the black background.

        :references:
            - |RVC3|, Section 13.6.1.

        :seealso: :class:`Fiducial` :class:`ArUcoBoard`
        """

        dictionary = _fiducial_dict(dict)
        detector = cv2.aruco.ArucoDetector(dictionary)
        cornerss, ids, _ = detector.detectMarkers(image=self.mono()._A)

        # corners is a list of marker corners, one element per tag
        #  each element is 1x4x2 matrix holding corner coordinates
        #
        # ids is ndarray of shape (N,1) holding the tag ids

        fiducials = []
        if ids is None or len(ids) == 0:
            return fiducials  # no markers found
        if K is not None and side is not None:
            rvecs, tvecs, p3d = cv2.aruco.estimatePoseSingleMarkers(
                corners=cornerss, markerLength=side, cameraMatrix=K, distCoeffs=None
            )
            for id, rvec, tvec, corners in zip(ids, rvecs, tvecs, cornerss):
                fiducials.append(Fiducial(id[0], corners[0].T, K, rvec, tvec, p3d))
        else:
            for id, corners in zip(ids, cornerss):
                fiducials.append(Fiducial(id[0], corners[0].T))

        return fiducials


# --------------------- supporting classes -------------------------------- #



[docs]
class Fiducial:
    def __init__(
        self,
        id: int,
        corners: np.ndarray,
        K: np.ndarray | None = None,
        rvec: np.ndarray | None = None,
        tvec: np.ndarray | None = None,
        p3d: np.ndarray | None = None,
    ) -> None:
        """
        Properties of a visual fiducial marker

        :param id: identity of the marker
        :type id: int
        :param corners: image plane marker corners
        :type corners: ndarray(2, 4)
        :param K: camera intrinsics
        :type K: ndarray(3,3), optional
        :param rvec: translation of marker with respect to camera, as an Euler vector
        :type rvec: ndarray(3), optional
        :param tvec: translation of marker with respect to camera
        :type tvec: ndarray(3), optional

        :seealso: :meth:`id` :meth:`pose` :meth:`draw`
            :meth:`~machinevisiontoolbox.ImageFeatures.ImageFeaturesMixin.fiducial`
        """
        self._id = id
        self.corners = corners  # strip first dimensions
        self.K = K
        if tvec is not None and rvec is not None:
            self._pose = SE3(tvec) * SE3.EulerVec(rvec.flatten())
        else:
            self._pose = None
        self.rvec = rvec
        self.tvec = tvec
        self.p3d = p3d

    def __str__(self) -> str:
        """
        String representation of fiducial

        :return: Brief readable description of fidicual id and pose
        :rtype: str
        """
        s = f"id={self.id}"
        if self.pose is not None:
            s += ": " + self.pose.strline()
        return s

    def __repr__(self) -> str:
        s = f"Fiducial(id={self.id}"
        if self.pose is not None:
            s += f", pose=[{self.pose.strline()}]"
        return s + ")"

    # def plot(self, ax=None):
    #     ax = _axes_logic(ax, 2)

    @property
    def id(self) -> int:
        """
        Fiducial id

        :return: fiducial marker identity
        :rtype: int

        Returns the built in identity code of the April tag or arUco marker.
        """
        return self._id

    @property
    def pose(self) -> SE3 | None:
        """
        Fiducial pose

        :return: marker pose
        :rtype: SE3

        Returns the pose of the tag with respect to the camera.  The x- and
        y-axes are in the marker plane and the z-axis is out of the marker.

        .. note:: Accurate camera intrinsics and dimension parameters are
            required for this value to be metric.
        """
        return self._pose


[docs]
    def draw(self, image: Image, length: int = 100, thick: int = 2) -> None:
        """
        Draw marker coordinate frame into image

        :param image: image with BGR color order
        :type image: :class:`Image`
        :param length: axis length in pixels, defaults to 100
        :type length: int, optional
        :param thick: axis thickness in pixels, defaults to 2
        :type thick: int, optional
        :raises ValueError: image must have BGR color order

        Draws a coordinate frame into the image representing the pose of the
        marker.  The x-, y- and z-axes are drawn as red, green and blue line
        segments.
        """
        if not image.isbgr:
            raise ValueError("image must have BGR color order")
        cv2.drawFrameAxes(
            image=image._A,
            cameraMatrix=self.K,
            distCoeffs=np.array([]),
            rvec=self.rvec,
            tvec=self.tvec,
            length=length,
            thickness=thick,
        )



[docs]
    @classmethod
    def create(cls, dict: str, id: int, sidelength: int) -> Image:
        """
        Create a fiducial marker image

        :param dict: marker type, eg. '6x6_1000'
        :type dict: str
        :param id: marker id
        :type id: int
        :param sidelength: side length of the marker in pixels
        :type sidelength: int
        :return: ``uint8`` marker image
        :rtype: :class:`Image`

        The image is a square of size ``sidelength`` x ``sidelength`` black (value 0) pixels.
        The id pattern comprises white pixels (value 255) contained within the black
        square.  There is no white border. The pattern is generated using the OpenCV library.

        :seealso: :func:`~opencv.aruco.generateImageMarker`
        """
        from machinevisiontoolbox import Image

        img = cv2.aruco.generateImageMarker(
            dictionary=_fiducial_dict(dict), id=id, sidePixels=sidelength
        )
        return Image(img)




class FiducialCollection:
    def __init__(
        self, dict: str, ntags: int, firsttag: int = 0, name: str | None = None
    ) -> None:
        """Create a FiducialCollection object

        :param dict: marker type, eg. '6x6_1000'
        :type dict: str
        :param ntags: number of markers in the collection
        :type ntags: int
        :param name: name of the board, defaults to None
        :type name: str, optional
        :param firsttag: ID of the first tag, defaults to 0
        :type firsttag: int, optional

        This object represents a collection of fiducial markers, such as an ArUco board.
        The type of markers, ArUco or custom, is specified by the ``dict`` parameter.

        The markers on the board have ids start at ``firsttag`` and are numbered
        sequentially.  These ids are used to:
            - filter the markers, useful if there are several ArUco boards in the scene
            - create an ArUco board image

        """
        self._name = name
        self._dict = _fiducial_dict(dict)

        ids = list(range(firsttag, firsttag + ntags))
        self._ids = set(ids)
        self._ntags = ntags

    def estimatePose(
        self,
        image: Image,
        camera: Any,
        return_markers: bool = False,
        params: dict | None = None,
    ) -> Any:
        """Estimate the pose of the board

        :param image: image containing the board
        :type image: Image
        :param camera: model of the camera, including intrinsics and distortion parameters
        :type camera: :class:`CentralCamera`
        :raises ValueError: the boards pose could not be estimated
        :param return_markers: return the detected markers and their pose, defaults to False
        :type return_markers: bool, optional
        :param params: dictionary of corner detector parameters
        :type params: dict
        :return: Camera pose with respect to board origin, vector of residuals in units of pixels in marker ID order, corresponding marker IDs
        :rtype: 3-tuple of SE3, numpy.ndarray, numpy.ndarray, optionally list of :class:`Fiducial`

        Residuals are the Euclidean distance between the detected marker corners and the
        reprojected corners in the image plane, in units of pixels.  The mean and maximum
        residuals are useful for assessing the quality of the pose estimate.

        If `return_markers` is True then also return a list of :class:`Fiducial` instances,
        one per ArUco marker on the board.  This describes the marker's identity and
        the coordinates of its corners clockwise from the canonical top-left corner.

        Corner detector parameters can be passed in as a dictionary.  There are many parameters,
        see the `details here <https://docs.opencv.org/4.x/d1/dcd/structcv_1_1aruco_1_1DetectorParameters.html#aacfce88ccc0b5b494e002c9d63848760>`_.
        Most are numeric, but some are enumerated types.  For example, to enable
        sub-pixel refinement of corners use::

            board.estimatePose(image, camera, dict(cornerRefinementMethod="subpix"))
        """

        # find the markers in the image
        #  cornnerss is a list of (1,4,2) shaped arrays, each holding the corners of a marker
        #  ids is an (N,1) shaped array of marker ID
        arucoParams = cv2.aruco.DetectorParameters()
        refine_dict = {
            "none": cv2.aruco.CORNER_REFINE_NONE,
            "subpix": cv2.aruco.CORNER_REFINE_SUBPIX,
            "contour": cv2.aruco.CORNER_REFINE_CONTOUR,
            "apriltag": cv2.aruco.CORNER_REFINE_APRILTAG,
        }
        if params is not None:
            for key, item in params.items():
                if hasattr(arucoParams, key):
                    if key == "cornerRefinementMethod":
                        item = refine_dict[item]
                    setattr(arucoParams, key, item)

        detector = cv2.aruco.ArucoDetector(self._dict, arucoParams)
        cornerss, ids, rejected = detector.detectMarkers(image=image.mono()._A)

        # corners is a list of ndarray(1,4,2) of marker corners
        # ids is ndarray of shape (N,1) holding the tag ids
        # rejected is a list of rejected corners from tags whose inner code is incorrectly coded

        # filter tags by ID
        cornerss = [
            corners.T.squeeze()
            for corners, id in zip(cornerss, ids)
            if id[0] in self._ids
        ]
        ids = [id[0] for corners, id in zip(cornerss, ids) if id[0] in self._ids]
        # ids = np.reshape(ids, (-1, 1))

        # match the markers to the board
        # print(f"{len(ids)} markers found")
        objPoints, imgPoints = self.matchImagePoints(cornerss, ids)

        # objPoints is ndarray(N,1,3) of 3D points in the board frame
        # imgPoints is ndarray(N,1,2) of 2D points in the image frame
        # where N is the total number of corners found

        # solve for camera pose
        retval, rvec, tvec = cv2.solvePnP(
            objectPoints=objPoints,
            imagePoints=imgPoints,
            cameraMatrix=camera.K,
            distCoeffs=camera.distortion,
            flags=cv2.SOLVEPNP_ITERATIVE,
        )

        if not retval:
            raise ValueError("solvePnP failed")
        # print(f"rotation: {rvec.T}")
        # print(f"translation: {tvec.T}")
        self._tvec = tvec
        self._rvec = rvec

        @dataclass
        class statistics:
            RMSE: float
            Rmax: float
            residuals: np.ndarray

        # compute the reprojection error
        reprojection, _ = cv2.projectPoints(
            objectPoints=objPoints,
            rvec=rvec,
            tvec=tvec,
            cameraMatrix=camera.K,
            distCoeffs=camera.distortion,
        )
        diff = (imgPoints - reprojection).squeeze()
        residuals = np.linalg.norm(diff, axis=1)

        RMSE = math.sqrt(np.mean(residuals**2))
        Rmax = np.max(residuals)

        resid_tag = {id: r for id, r in zip(ids, residuals.reshape(-1, 4))}
        stats = statistics(RMSE, Rmax, residuals)

        T = SE3(tvec) * SE3.EulerVec(rvec.flatten())

        if return_markers:
            fiducials = []
            for id, corners in zip(ids, cornerss):
                fiducial = Fiducial(id, corners)
                residuals = resid_tag[id]
                RMSE = np.sqrt(np.mean(residuals**2))
                Rmax = residuals.max()
                fiducial.stats = statistics(RMSE, Rmax, residuals)

                fiducials.append(fiducial)
            return T, stats, ids, fiducials
        else:
            return T, stats, ids

    @abstractmethod
    def matchImagePoints(
        self, cornerss: list[np.ndarray], ids: list[int]
    ) -> tuple[np.ndarray, np.ndarray]:
        """
        Match 2D image points to 3D board points

        This is an abstract method that must be implemented by subclasses.

        :param cornerss: List of corners of fiducial
        :type cornerss: list of ndarray(2,4)
        :param ids: List of fiducial ids
        :type ids: list of int
        :return: 3D object points and 2D image points
        :rtype: ndarray(3,4), ndarray(2,4)
        """
        pass

    def draw(
        self, image: Image, camera: Any, length: float = 0.1, thick: int = 2
    ) -> None:
        """
        Draw board coordinate frame into image

        :param image: image with BGR color order
        :type image: :class:`Image`
        :param length: axis length in metric units, defaults to 0.1
        :type length: float, optional
        :param thick: axis thickness in pixels, defaults to 2
        :type thick: int, optional
        :raises ValueError: image must have BGR color order

        Draws a coordinate frame into the image representing the pose of the
        board.  The x-, y- and z-axes are drawn as red, green and blue line
        segments.

        .. note:: the ``length`` is specified in the same units as focal length and
            pixel size of the camera, and the marker dimensions, typically meters.
        """
        if not image.isbgr:
            raise ValueError("image must have BGR color order")
        cv2.drawFrameAxes(
            image=image._A,
            cameraMatrix=camera.K,
            distCoeffs=camera.distortion,
            rvec=self._rvec,
            tvec=self._tvec,
            length=length,
            thickness=thick,
        )

    def __repr__(self) -> str:
        s = f"{self.__class__.__name__}(dict={self._dict}, ntags={self._ntags}"
        if self._name is not None:
            s += f", name={self._name}"
        return s + ")"


class ArUcoBoard(FiducialCollection):
    def __init__(
        self,
        layout: tuple[int, int],
        sidelength: float,
        separation: float,
        dict: str,
        name: str | None = None,
        firsttag: int = 0,
    ) -> None:
        """Create an ArUcoBoard object

        :param layout: number of markers in the x- and y-directions
        :type layout: 2-tuple of int
        :param sidelength: Side length of each marker
        :type sidelength: float
        :param separation: White space between markers, must be the same in both directions
        :type separation: float
        :param dict: marker type, eg. '6x6_1000'
        :type dict: str
        :param name: name of the board, defaults to None
        :type name: str, optional
        :param firsttag: ID of the first tag, defaults to 0
        :type firsttag: int, optional
        :raises ValueError: if the ``layout`` is not a 2-tuple of integers

        This object represents a board of markers, such as an ArUco board.  The board comprises
        a regular grid of markers each of which has a known ``sidelength`` and ``separation``.  The grid
        has :math:`n_x \times n_y` markers in the x and y directions respectively, and
        ``layout``=:math:`(n_x, n_y)`.  The type of markers, ArUco or custom, is specified by the
        ``dict`` parameter.

        The markers on the board have ids start at ``firsttag`` and are numbered sequentially.  These ids
        are used to:
            - filter the markers, useful if there are several ArUco boards in the scene
            - create an ArUco board image

        .. note:: the dimensions must be in the same units as camera focal length and
            pixel size, typically meters.
        """
        super().__init__(dict, layout[0] * layout[1], firsttag=firsttag, name=name)

        self._layout = layout
        if len(layout) != 2:
            raise ValueError("layout must be a tuple of two integers")
        self._sidelength = sidelength
        self._separation = separation

        self._board = cv2.aruco.GridBoard(
            size=layout,
            markerLength=sidelength,
            markerSeparation=separation,
            dictionary=self._dict,
            ids=np.array(list(self._ids)),
        )

    def matchImagePoints(
        self, cornerss: list[np.ndarray], ids: list[int]
    ) -> tuple[np.ndarray, np.ndarray]:
        """Match 2D image points to 3D board points

        :param cornerss: List of corners of fiducial
        :type cornerss: list of ndarray(2,4)
        :param ids: List of fiducial ids
        :type ids: list of int
        :return: 3D object points and 2D image points
        :rtype: ndarray(M,1,3), ndarray(M,1,2), where M is the number of corners found (multiple of 4)
        """
        return self._board.matchImagePoints(
            [corners.T.reshape(1, 4, 2) for corners in cornerss], np.c_[ids]
        )

    def chart(self, filename: str | None = None, dpi: int = 100) -> Image | None:
        """Write ArUco chart to a file

        :param filename: name of the file to write chart to, defaults to returning an :class:`Image` instance
        :type filename: str
        :param dpi: dots per inch of printer, defaults to 100
        :type dpi: int, optional
        :return: :class:`Image` if ``filename`` is None
        :rtype: Image or None

        PIL is used to write the file, and can support multiple formats (specified
        by the file extension) such as PNG, PDF, etc.

        The markers have ids that start at the value given in the constructor and are
        numbered sequentially.  The markers are arranged in a grid of size given and
        increase in the x-direction (rows) first.

        If a PDF file is written the chart can be printed at 100% scale factor and will
        have the correct dimensions.  The size is of the chart is invariant to the
        ``dpi`` parameter, that simply affects the resolution of the image and file size.

        .. note:: This method assumes that the dimensions given in the constructor are in
            meters.
        """
        # dots per m
        dpm = dpi * 1000 / 25.4

        # compute size of chart in metres based on marker size and separation
        width = (
            self._layout[0] * (self._sidelength + self._separation) - self._separation
        )
        height = (
            self._layout[1] * (self._sidelength + self._separation) - self._separation
        )

        # convert to pixels
        width = int(width * dpm)
        height = int(height * dpm)

        # generate the image
        img = self._board.generateImage((width, height))

        if filename is None:
            from machinevisiontoolbox import Image

            return Image(img)
        else:
            from PIL import Image as PILImage

            img = PILImage.fromarray(img)
            img.save(filename, dpi=(dpi, dpi))
            return None


if __name__ == "__main__":
    from pathlib import Path

    import pytest

    pytest.main(
        [
            str(
                Path(__file__).parent.parent.parent
                / "tests"
                / "test_image_fiducials.py"
            ),
            "-v",
        ]
    )