Source code for machinevisiontoolbox.ImageColor

#!/usr/bin/env python

import numpy as np
import spatialmath.base.argcheck as argcheck
import cv2 as cv

from machinevisiontoolbox.base import color, name2color
from machinevisiontoolbox.base import imageio

from scipy import interpolate

[docs]class ImageColorMixin:
    """
    Image processing color operations on the Image class
    """
[docs]    def mono(self, opt='r601'):
        """
        Convert color image to monochrome

        :param opt: greyscale conversion mode, one of: 'r601' [default], 'r709',
          'value' or 'cv'
        :type opt: str, optional
        :return: monochrome image
        :rtype: :class:`Image`

        Return a greyscale image of the same width and height as the color
        image.  Various conversion options are available:

        ===========  =====================================================
        ``opt``      definition
        ===========  =====================================================
        ``'r601'``   ITU Rec. 601, Y' = 0.229 R' + 0.587 G' + 0.114 B'
        ``'r709'``   ITU Rec. 709, Y' =  0.2126 R' + 0.7152 G' + 0.0722 B'
        ``'value'``  V (value) component of HSV space 
        ``'cv'``     OpenCV colorspace() RGB to gray conversion
        ===========  =====================================================

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('flowers1.png')
            >>> img
            >>> img.mono()

        .. note:: For a monochrome image returns a reference to the :class:`Image` instance.

        :references:
            - Robotics, Vision & Control for Python, Section 10.2.7, P. Corke, Springer 2023.

        :seealso: :meth:`colorspace` :meth:`colorize`
        """

        if not self.iscolor:
            return self

        if opt == 'r601':
            mono = 0.229 * self.red() + 0.587 * self.green() + \
                0.114 * self.blue()

        elif opt == 'r709':
            mono = 0.2126 * self.red() + 0.7152 * self.green() + \
                0.0722 * self.blue()

        elif opt == 'value':
            # 'value' refers to the V in HSV space, not the CIE L*
            # the mean of the max and min of RGB values at each pixel
            mn = self.image.min(axis=2)
            mx = self.image.max(axis=2)

            mono = mn / 2 + mx / 2

        elif opt == 'cv':
            if self.isrgb:
              return self.colorspace('gray', src="rgb")
            else:
              return self.colorspace('gray', src="bgr")
        else:
            raise TypeError('unknown type for opt')

        return self.__class__(self.cast(mono.image))


[docs]    def chromaticity(self, which='RG'):
        r"""
        Create chromaticity image

        :param which: string comprising single letter color plane names, defaults to 'RG'
        :type which: str, optional
        :return: chromaticity image
        :rtype: :class:`Image` instance

        Convert a tristimulus image to a chromaticity image.  For the case of
        an RGB image and ``which='RG'``

        .. math::
            r = \frac{R}{R+G+B}, \, g = \frac{G}{R+G+B}

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('flowers1.png')
            >>> img.chromaticity()
            >>> img.chromaticity('RB')

        .. note:: The chromaticity color planes are the same as ``which`` but
          lower cased.

        :references:
            - Robotics, Vision & Control for Python, Section 10.2.5, P. Corke, Springer 2023.

        :seealso: :func:`~machinevisiontoolbox.base.color.tristim2cc`
        """
        if not self.iscolor:
            raise ValueError('cannot compute chromaticity for greyscale image')
        if self.nplanes != 3:
            raise ValueError('expecting 3 plane image')

        sum = np.sum(self.image, axis=2)
        r = self.plane(which[0]).image / sum
        g = self.plane(which[1]).image / sum

        return self.__class__(np.dstack((r, g)), colororder=which.lower(), dtype="float32")


[docs]    def colorize(self, color=[1, 1, 1], colororder='RGB', alpha=False):
        """
        Colorize a greyscale image

        :param color: base color 
        :type color: string, array_like(3)
        :param colororder: order of color channels of resulting image
        :type colororder: str, dict
        :return: color image
        :rtype: :class:`Image` instance

        The greyscale image is colorized by setting each output pixel to the product
        of ``color`` and the input pixel value.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('shark1.png')
            >>> img.colorize([1, 0, 0])  # red shark
            >>> img.colorize('blue')  # blue shark

        :references:
            - Robotics, Vision & Control for Python, Section 11.3, P. Corke, Springer 2023.

        :seealso: :meth:`mono`
        """

        # TODO, colorize all in list
        if isinstance(color, str):
            color = name2color(color)
        else:
            color = argcheck.getvector(color).astype(self.dtype)
        if self.iscolor:
            raise ValueError(self.image, 'Image must be greyscale')

        # alpha can be False, True, or scalar
        if alpha is False:
            out = np.dstack((color[0] * self.image,
                             color[1] * self.image,
                             color[2] * self.image))
        else:
            if alpha is True:
              alpha = 1

            out = np.dstack((color[0] * self.image,
                           color[1] * self.image,
                           color[2] * self.image,
                           alpha * np.ones(self.shape)))

        if self.isint and np.issubdtype(color.dtype, np.floating):
            out = self.cast(out)

        return self.__class__(out, colororder=colororder)


[docs]    def kmeans_color(self, k=None, centroids=None, seed=None):
        """
        k-means color clustering

        **Training**

        :param k: number of clusters, defaults to None
        :type k: int, optional
        :param seed: random number seed, defaults to None
        :type seed: int, optional
        :return: label image, centroids and residual
        :rtype: :class:`Image`, ndarray(P,k), float

        The pixels are grouped into ``k`` clusters based on their Euclidean
        distance from ``k`` cluster centroids.  Clustering is iterative and
        the intial cluster centroids are random.

        The method returns a label image, indicating the assigned cluster for
        each input pixel, the cluster centroids and a residual.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> targets = Image.Read("tomato_124.png", dtype="float", gamma="sRGB")
            >>> ab = targets.colorspace("L*a*b*").plane("a*:b*")
            >>> targets_labels, targets_centroids, resid = ab.kmeans_color(k=3, seed=0)
            >>> targets_centroids

        **Classification**

        :param centroids: cluster centroids from training phase
        :type centroids: ndarray(P,k)
        :return: label image
        :rtype: :class:`Image`

        Pixels in the input image are assigned the label of the closest centroid.

        .. note:: The colorspace of the images could a chromaticity space to classify
          objects while ignoring brightness variation.

        :references:
            - Robotics, Vision & Control for Python, Section 12.1.1.2, P. Corke, Springer 2023.

      :seealso: `opencv.kmeans <https://docs.opencv.org/3.4/d5/d38/group__core__cluster.html#ga9a34dc06c6ec9460e90860f15bcd2f88>`_
      """
        # TODO
        # colorspace can be RGB, rg, Lab, ab

        if seed is not None:
            cv.setRNGSeed(seed)
        
        data = self.to_float().reshape((-1, self.nplanes))
        criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)

        if k is not None:
            # perform clustering
            ret, label, centres = cv.kmeans(
                    data=data,
                    K= k,
                    bestLabels=None,
                    criteria=criteria,
                    attempts=10,
                    flags=cv.KMEANS_RANDOM_CENTERS
                )
            return self.__class__(label.reshape(self.shape[:2])), centres.T, ret
        
        elif centroids is not None:
            # assign pixels to given cluster centres
            # M x K
            k = centroids.shape[1]
            data = np.repeat(data[..., np.newaxis], k, axis=2)  # N x M x K

            # compute L2 norm over the error
            distance = np.linalg.norm(data - centroids, axis=1)  # N x K

            # now find which cluster centre gave the smallest error 
            label = np.argmin(distance, axis=1)

            return self.__class__(label.reshape(self.shape[:2]))

[docs]    def colorspace(self, dst, src=None):
        """
        Transform a color image between color representations

        :param dst: destination color space (see below)
        :type dst: str
        :param src: source color space (see below), defaults to colororder of image
        :type src: str, optional
        :return: color image in new colorspace
        :rtype: :class:`Image`

        Color space names (synonyms listed on the same line) are:

        =======================     ======================
        Color space name            Option string(s)
        =======================     ======================
        grey scale                  'grey', 'gray'
        RGB (red/green/blue)        'rgb'
        BGR (blue/green/red)        'bgr'
        CIE XYZ                     'xyz', 'xyz_709'
        YCrCb                       'ycrcb'
        HSV (hue/sat/value)         'hsv'
        HLS (hue/lightness/sat)     'hls'
        CIE L*a*b*                  'lab', 'l*a*b*'
        CIE L*u*v*                  'luv', 'l*u*v*'
        =======================     ======================

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> im = Image.Read('flowers1.png')
            >>> im.colorspace('hsv')

        .. note:: RGB images are assumed to be linear, or gamma decoded.

        :references:
            - Robotics, Vision & Control for Python, Section 10.2.7, 10.4.1, P. Corke, Springer 2023.

        :seealso: :meth:`mono` :func:`~machinevisiontoolbox.base.color.colorspace_convert`
        """

        # TODO other color cases
        # TODO check conv is valid

        # TODO conv string parsing

        # ensure floats? unsure if cv.cvtColor operates on ints
        # imf = self.to_float()

        if src is None:
            src = self.colororder_str

        # options gamma, on by default if to is RGB or BGR
        # options white on by default

        out = []
        # print('converting from', src, 'to', dst)

        out = color.colorspace_convert(self.image, src, dst)
        # print('conversion done')
        if out.ndim > 2:
            colororder = dst
            colororder = colororder.replace("*", "*:", 2)
        else:
            colororder = None

        return self.__class__(out, dtype=self.dtype, colororder=colororder)


[docs]    @classmethod
    def Overlay(cls, im1, im2, colors='rc'):
        """
        Overlay two greyscale images in different colors

        :param im1: first image
        :type im1: :class:`Image`
        :param im2: second image
        :type im2: :class:`Image`
        :param colors: colors for each image, defaults to 'rc''
        :type colors: 2-element string/list/tuple, optional
        :raises ValueError: images must be greyscale
        :return: overlaid images
        :rtype: :class:`Image`

        Two greyscale images are overlaid in different colors.  Useful for
        visualizing disparity or optical flow.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img1 = Image.Read('eiffel-1.png', mono=True)
            >>> img2 = Image.Read('eiffel-2.png', mono=True)
            >>> Image.Overlay(img1, img2)
            >>> Image.Overlay(img1, img2, 'rg')
            >>> Image.Overlay(img1, img2, ((1, 0, 0), (0, 1, 0)))

        .. note:: Images can be different size, the output image size is the 
          maximum of the dimensions of the input images.  Small dimensions are
          zero padded.  The top-left corner of both images are aligned.

        :seealso: :meth:`anaglyph` :meth:`blend` :meth:`stshow`
        """
        if im1.iscolor or im2.iscolor:
            raise ValueError('images must be greyscale')
        h = max(im1.height, im2.height)
        w = max(im1.width, im2.width)
        overlay = cls.Constant(w, h, [0, 0, 0], colororder='RGB')
        im1 = im1.colorize(colors[0]) 
        im2 = im2.colorize(colors[1])
        overlay.paste(im1, (0,0), 'add', copy=False)
        overlay.paste(im2, (0,0), 'add', copy=False)
        return overlay

[docs]    def gamma_encode(self, gamma):
        r"""
        Gamma encoding

        :param gamma: gamma value
        :type gamma: str, float
        :return: gamma encoded version of image
        :rtype: :class:`Image`

        Gamma encode the image.  This takes a linear luminance image and
        converts it to a form suitable for display on a non-linear monitor.
        ``gamma`` is either the string 'sRGB' for IEC 61966-2-1:1999 or a float:

        .. math:: \mat{Y}_{u,v} = \mat{X}_{u,v}^\gamma

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image(np.arange(8)[np.newaxis, :])  # create grey step wedge
            >>> img.gamma_encode('sRGB').disp()

        .. note::
            - ``gamma`` is the reciprocal of the value used for gamma decoding
            - Gamma encoding is typically performed in a camera with
              :math:`\gamma=0.45`.
            - For images with multiple planes, the gamma encoding is applied
              to all planes.
            - For floating point images, the pixels are assumed to be in the
              range 0 to 1.
            - For integer images,the pixels are assumed in the range 0 to
              the maximum value of their class.  Pixels are converted first to
              double, processed, then converted back to the integer class.

        :references:
            - Robotics, Vision & Control for Python, Section 10.2.7, 10.3.6, P. Corke, Springer 2023.

        :seealso: :meth:`gamma_encode` :meth:`colorspace`
        """
        out = color.gamma_encode(self.image, gamma)
        return self.__class__(out)

[docs]    def gamma_decode(self, gamma):
        r"""
        Gamma decoding

        :param gamma: gamma value
        :type gam: string or float
        :return: gamma decoded version of image
        :rtype: Image instance

        Gamma decode the image.  This takes a gamma-encoded image, as typically
        obtained from a camera or image file, and converts it to a linear
        luminance image.  ``gamma`` is either the string 'sRGB' for IEC
        61966-2-1:1999 or a float:

        .. math:: \mat{Y}_{u,v} = \mat{X}_{u,v}^\gamma

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('street.png')
            >>> linear = img.gamma_decode('sRGB')

        .. note::
            - ``gamma`` is the reciprocal of the value used for gamma encoding
            - Gamma decoding should be applied to any color image prior to
              colometric operations.
            - Gamma decoding is typically performed in the display hardware with
              :math:`\gamma=2.2`.
            - For images with multiple planes, the gamma decoding is applied
              to all planes.
            - For floating point images, the pixels are assumed to be in the
              range 0 to 1.
            - For integer images,the pixels are assumed in the range 0 to
              the maximum value of their class.  Pixels are converted first to
              double, processed, then converted back to the integer class.

        :references:
            - Robotics, Vision & Control for Python, Section 10.2.7, 10.3.6, P. Corke, Springer 2023.

        :seealso: :meth:`gamma_encode` :meth:`colorspace`
        """
        out = color.gamma_decode(self.image, gamma)
        return self.__class__(out, colororder=self.colororder)

# --------------------------------------------------------------------------- #
if __name__ == "__main__":

    import pathlib
    import os.path

    from machinevisiontoolbox import Image

    im1 = Image.Read('eiffel-1.png', mono=True)
    im2 = Image.Read('eiffel-2.png', mono=True)
    Image.Overlay(im1, im2, 'rc').disp(block=True)

    
    exec(open(pathlib.Path(__file__).parent.parent.absolute() / "tests" / "test_color.py").read())  # pylint: disable=exec-used