Source code for machinevisiontoolbox.ImageColor

#!/usr/bin/env python

import numpy as np
import spatialmath.base.argcheck as argcheck
import cv2 as cv

from machinevisiontoolbox.base import color, name2color
from machinevisiontoolbox.base import imageio

from scipy import interpolate

[docs]class ImageColorMixin: """ Image processing color operations on the Image class """
[docs] def mono(self, opt='r601'): """ Convert color image to monochrome :param opt: greyscale conversion mode, one of: 'r601' [default], 'r709', 'value' or 'cv' :type opt: str, optional :return: monochrome image :rtype: :class:`Image` Return a greyscale image of the same width and height as the color image. Various conversion options are available: =========== ===================================================== ``opt`` definition =========== ===================================================== ``'r601'`` ITU Rec. 601, Y' = 0.229 R' + 0.587 G' + 0.114 B' ``'r709'`` ITU Rec. 709, Y' = 0.2126 R' + 0.7152 G' + 0.0722 B' ``'value'`` V (value) component of HSV space ``'cv'`` OpenCV colorspace() RGB to gray conversion =========== ===================================================== Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image.Read('flowers1.png') >>> img >>> img.mono() .. note:: For a monochrome image returns a reference to the :class:`Image` instance. :references: - Robotics, Vision & Control for Python, Section 10.2.7, P. Corke, Springer 2023. :seealso: :meth:`colorspace` :meth:`colorize` """ if not self.iscolor: return self if opt == 'r601': mono = 0.229 * self.red() + 0.587 * self.green() + \ 0.114 * self.blue() elif opt == 'r709': mono = 0.2126 * self.red() + 0.7152 * self.green() + \ 0.0722 * self.blue() elif opt == 'value': # 'value' refers to the V in HSV space, not the CIE L* # the mean of the max and min of RGB values at each pixel mn = self.image.min(axis=2) mx = self.image.max(axis=2) mono = mn / 2 + mx / 2 elif opt == 'cv': if self.isrgb: return self.colorspace('gray', src="rgb") else: return self.colorspace('gray', src="bgr") else: raise TypeError('unknown type for opt') return self.__class__(self.cast(mono.image))
[docs] def chromaticity(self, which='RG'): r""" Create chromaticity image :param which: string comprising single letter color plane names, defaults to 'RG' :type which: str, optional :return: chromaticity image :rtype: :class:`Image` instance Convert a tristimulus image to a chromaticity image. For the case of an RGB image and ``which='RG'`` .. math:: r = \frac{R}{R+G+B}, \, g = \frac{G}{R+G+B} Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image.Read('flowers1.png') >>> img.chromaticity() >>> img.chromaticity('RB') .. note:: The chromaticity color planes are the same as ``which`` but lower cased. :references: - Robotics, Vision & Control for Python, Section 10.2.5, P. Corke, Springer 2023. :seealso: :func:`~machinevisiontoolbox.base.color.tristim2cc` """ if not self.iscolor: raise ValueError('cannot compute chromaticity for greyscale image') if self.nplanes != 3: raise ValueError('expecting 3 plane image') sum = np.sum(self.image, axis=2) r = self.plane(which[0]).image / sum g = self.plane(which[1]).image / sum return self.__class__(np.dstack((r, g)), colororder=which.lower(), dtype="float32")
[docs] def colorize(self, color=[1, 1, 1], colororder='RGB', alpha=False): """ Colorize a greyscale image :param color: base color :type color: string, array_like(3) :param colororder: order of color channels of resulting image :type colororder: str, dict :return: color image :rtype: :class:`Image` instance The greyscale image is colorized by setting each output pixel to the product of ``color`` and the input pixel value. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image.Read('shark1.png') >>> img.colorize([1, 0, 0]) # red shark >>> img.colorize('blue') # blue shark :references: - Robotics, Vision & Control for Python, Section 11.3, P. Corke, Springer 2023. :seealso: :meth:`mono` """ # TODO, colorize all in list if isinstance(color, str): color = name2color(color) else: color = argcheck.getvector(color).astype(self.dtype) if self.iscolor: raise ValueError(self.image, 'Image must be greyscale') # alpha can be False, True, or scalar if alpha is False: out = np.dstack((color[0] * self.image, color[1] * self.image, color[2] * self.image)) else: if alpha is True: alpha = 1 out = np.dstack((color[0] * self.image, color[1] * self.image, color[2] * self.image, alpha * np.ones(self.shape))) if self.isint and np.issubdtype(color.dtype, np.floating): out = self.cast(out) return self.__class__(out, colororder=colororder)
[docs] def kmeans_color(self, k=None, centroids=None, seed=None): """ k-means color clustering **Training** :param k: number of clusters, defaults to None :type k: int, optional :param seed: random number seed, defaults to None :type seed: int, optional :return: label image, centroids and residual :rtype: :class:`Image`, ndarray(P,k), float The pixels are grouped into ``k`` clusters based on their Euclidean distance from ``k`` cluster centroids. Clustering is iterative and the intial cluster centroids are random. The method returns a label image, indicating the assigned cluster for each input pixel, the cluster centroids and a residual. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> targets = Image.Read("tomato_124.png", dtype="float", gamma="sRGB") >>> ab = targets.colorspace("L*a*b*").plane("a*:b*") >>> targets_labels, targets_centroids, resid = ab.kmeans_color(k=3, seed=0) >>> targets_centroids **Classification** :param centroids: cluster centroids from training phase :type centroids: ndarray(P,k) :return: label image :rtype: :class:`Image` Pixels in the input image are assigned the label of the closest centroid. .. note:: The colorspace of the images could a chromaticity space to classify objects while ignoring brightness variation. :references: - Robotics, Vision & Control for Python, Section 12.1.1.2, P. Corke, Springer 2023. :seealso: `opencv.kmeans <https://docs.opencv.org/3.4/d5/d38/group__core__cluster.html#ga9a34dc06c6ec9460e90860f15bcd2f88>`_ """ # TODO # colorspace can be RGB, rg, Lab, ab if seed is not None: cv.setRNGSeed(seed) data = self.to_float().reshape((-1, self.nplanes)) criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0) if k is not None: # perform clustering ret, label, centres = cv.kmeans( data=data, K= k, bestLabels=None, criteria=criteria, attempts=10, flags=cv.KMEANS_RANDOM_CENTERS ) return self.__class__(label.reshape(self.shape[:2])), centres.T, ret elif centroids is not None: # assign pixels to given cluster centres # M x K k = centroids.shape[1] data = np.repeat(data[..., np.newaxis], k, axis=2) # N x M x K # compute L2 norm over the error distance = np.linalg.norm(data - centroids, axis=1) # N x K # now find which cluster centre gave the smallest error label = np.argmin(distance, axis=1) return self.__class__(label.reshape(self.shape[:2]))
[docs] def colorspace(self, dst, src=None): """ Transform a color image between color representations :param dst: destination color space (see below) :type dst: str :param src: source color space (see below), defaults to colororder of image :type src: str, optional :return: color image in new colorspace :rtype: :class:`Image` Color space names (synonyms listed on the same line) are: ======================= ====================== Color space name Option string(s) ======================= ====================== grey scale 'grey', 'gray' RGB (red/green/blue) 'rgb' BGR (blue/green/red) 'bgr' CIE XYZ 'xyz', 'xyz_709' YCrCb 'ycrcb' HSV (hue/sat/value) 'hsv' HLS (hue/lightness/sat) 'hls' CIE L*a*b* 'lab', 'l*a*b*' CIE L*u*v* 'luv', 'l*u*v*' ======================= ====================== Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> im = Image.Read('flowers1.png') >>> im.colorspace('hsv') .. note:: RGB images are assumed to be linear, or gamma decoded. :references: - Robotics, Vision & Control for Python, Section 10.2.7, 10.4.1, P. Corke, Springer 2023. :seealso: :meth:`mono` :func:`~machinevisiontoolbox.base.color.colorspace_convert` """ # TODO other color cases # TODO check conv is valid # TODO conv string parsing # ensure floats? unsure if cv.cvtColor operates on ints # imf = self.to_float() if src is None: src = self.colororder_str # options gamma, on by default if to is RGB or BGR # options white on by default out = [] # print('converting from', src, 'to', dst) out = color.colorspace_convert(self.image, src, dst) # print('conversion done') if out.ndim > 2: colororder = dst colororder = colororder.replace("*", "*:", 2) else: colororder = None return self.__class__(out, dtype=self.dtype, colororder=colororder)
[docs] @classmethod def Overlay(cls, im1, im2, colors='rc'): """ Overlay two greyscale images in different colors :param im1: first image :type im1: :class:`Image` :param im2: second image :type im2: :class:`Image` :param colors: colors for each image, defaults to 'rc'' :type colors: 2-element string/list/tuple, optional :raises ValueError: images must be greyscale :return: overlaid images :rtype: :class:`Image` Two greyscale images are overlaid in different colors. Useful for visualizing disparity or optical flow. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img1 = Image.Read('eiffel-1.png', mono=True) >>> img2 = Image.Read('eiffel-2.png', mono=True) >>> Image.Overlay(img1, img2) >>> Image.Overlay(img1, img2, 'rg') >>> Image.Overlay(img1, img2, ((1, 0, 0), (0, 1, 0))) .. note:: Images can be different size, the output image size is the maximum of the dimensions of the input images. Small dimensions are zero padded. The top-left corner of both images are aligned. :seealso: :meth:`anaglyph` :meth:`blend` :meth:`stshow` """ if im1.iscolor or im2.iscolor: raise ValueError('images must be greyscale') h = max(im1.height, im2.height) w = max(im1.width, im2.width) overlay = cls.Constant(w, h, [0, 0, 0], colororder='RGB') im1 = im1.colorize(colors[0]) im2 = im2.colorize(colors[1]) overlay.paste(im1, (0,0), 'add', copy=False) overlay.paste(im2, (0,0), 'add', copy=False) return overlay
[docs] def gamma_encode(self, gamma): r""" Gamma encoding :param gamma: gamma value :type gamma: str, float :return: gamma encoded version of image :rtype: :class:`Image` Gamma encode the image. This takes a linear luminance image and converts it to a form suitable for display on a non-linear monitor. ``gamma`` is either the string 'sRGB' for IEC 61966-2-1:1999 or a float: .. math:: \mat{Y}_{u,v} = \mat{X}_{u,v}^\gamma Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image(np.arange(8)[np.newaxis, :]) # create grey step wedge >>> img.gamma_encode('sRGB').disp() .. note:: - ``gamma`` is the reciprocal of the value used for gamma decoding - Gamma encoding is typically performed in a camera with :math:`\gamma=0.45`. - For images with multiple planes, the gamma encoding is applied to all planes. - For floating point images, the pixels are assumed to be in the range 0 to 1. - For integer images,the pixels are assumed in the range 0 to the maximum value of their class. Pixels are converted first to double, processed, then converted back to the integer class. :references: - Robotics, Vision & Control for Python, Section 10.2.7, 10.3.6, P. Corke, Springer 2023. :seealso: :meth:`gamma_encode` :meth:`colorspace` """ out = color.gamma_encode(self.image, gamma) return self.__class__(out)
[docs] def gamma_decode(self, gamma): r""" Gamma decoding :param gamma: gamma value :type gam: string or float :return: gamma decoded version of image :rtype: Image instance Gamma decode the image. This takes a gamma-encoded image, as typically obtained from a camera or image file, and converts it to a linear luminance image. ``gamma`` is either the string 'sRGB' for IEC 61966-2-1:1999 or a float: .. math:: \mat{Y}_{u,v} = \mat{X}_{u,v}^\gamma Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image.Read('street.png') >>> linear = img.gamma_decode('sRGB') .. note:: - ``gamma`` is the reciprocal of the value used for gamma encoding - Gamma decoding should be applied to any color image prior to colometric operations. - Gamma decoding is typically performed in the display hardware with :math:`\gamma=2.2`. - For images with multiple planes, the gamma decoding is applied to all planes. - For floating point images, the pixels are assumed to be in the range 0 to 1. - For integer images,the pixels are assumed in the range 0 to the maximum value of their class. Pixels are converted first to double, processed, then converted back to the integer class. :references: - Robotics, Vision & Control for Python, Section 10.2.7, 10.3.6, P. Corke, Springer 2023. :seealso: :meth:`gamma_encode` :meth:`colorspace` """ out = color.gamma_decode(self.image, gamma) return self.__class__(out, colororder=self.colororder)
# --------------------------------------------------------------------------- # if __name__ == "__main__": import pathlib import os.path from machinevisiontoolbox import Image im1 = Image.Read('eiffel-1.png', mono=True) im2 = Image.Read('eiffel-2.png', mono=True) Image.Overlay(im1, im2, 'rc').disp(block=True) exec(open(pathlib.Path(__file__).parent.parent.absolute() / "tests" / "test_color.py").read()) # pylint: disable=exec-used