Source code for machinevisiontoolbox.ImageWholeFeatures

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.ticker import ScalarFormatter

import cv2 as cv
from spatialmath import base, SE3
from machinevisiontoolbox.base import findpeaks, findpeaks2d


class ImageWholeFeaturesMixin:
    def hist(self, nbins=256, opt=None):
        """
        Image histogram

        :param nbins: number of histogram bins, defaults to 256
        :type nbins: int, optional
        :param opt: histogram option
        :type opt: str
        :return: histogram of image
        :rtype: :class:`~machinevisiontoolbox.ImageWholeFeatures.Histogram`

        Returns an object that summarizes the distribution of
        pixel values in each color plane.

        Example::

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('street.png')
            >>> type(hist)
            >>> hist = img.hist()
            >>> hist
            >>> hist.plot()

        .. plot::

            from machinevisiontoolbox import Image
            img = Image.Read('street.png')
            img.hist().plot()

        Example::

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('flowers1.png')
            >>> hist = img.hist()
            >>> hist
            >>> hist.plot(style='stack')

        .. plot::

            from machinevisiontoolbox import Image
            img = Image.Read('flowers1.png')
            img.hist().plot(style='stack')

        :note:
            - For an integer  image the bins spans the greylevel range 0-255.
            - For a floating point image the histogram spans the greylevel range
              0.0 to 1.0 with 256 bins.
            - For floating point images all NaN and Inf values are first
              removed.
            - Computed using OpenCV CalcHist. Only works on floats up to 32 bit,
              float64 images are automatically converted to float32.

        :references:
            - Robotics, Vision & Control for Python, Section 14.4.3, P. Corke, Springer 2023.

        :seealso:
            :class:`~machinevisiontoolbox.ImageWholeFeatures.Histogram`
            `opencv.calcHist <https://docs.opencv.org/3.4/d6/dc7/group__imgproc__hist.html#ga4b2b5fd75503ff9e6844cc4dcdaed35d>`_
        """

        # check inputs
        optHist = ["sorted"]
        if opt is not None and opt not in optHist:
            raise ValueError(opt, "opt is not a valid option")

        if self.isint:
            xrange = [0, np.iinfo(self.dtype).max]
        else:
            # float image
            xrange = [0.0, 1.0]

        xc = []
        hc = []
        hcdf = []
        hnormcdf = []

        # ensure that float image is converted to float32
        if self.A.dtype == np.dtype("float64"):
            implanes = cv.split(self.A.astype("float32"))
        else:
            implanes = cv.split(self.A)

        for i in range(self.nplanes):
            # bin coordinates
            x = np.linspace(*xrange, nbins, endpoint=True).T
            # h = cv.calcHist(implanes, [i], None, [nbins], [0, maxrange + 1])
            h = cv.calcHist(implanes, [i], None, [nbins], xrange)
            if i == 0:
                xc.append(x)
            hc.append(h)

        # stack into arrays
        xs = np.vstack(xc).T
        hs = np.hstack(hc)

        # TODO this seems too complex, why do we stack stuff as well
        # as have an array of hist tuples??
        # xs, xc are the same, and same for all plots

        hhhx = Histogram(hs, xs, self.isfloat)
        hhhx.colordict = self.colororder

        return hhhx

    # def sum(self):
    #     """
    #     Sum of all pixels

    #     :return: sum of all pixel values
    #     :rtype: float or ndarray(P)

    #     Computes the sum of pixels in the image:

    #     .. math::

    #         \sum_{uv} I_{uv}

    #     For a P-channel image the result is a P-element array.

    #     Example:

    #     .. runblock:: pycon

    #         >>> from machinevisiontoolbox import Image
    #         >>> img = Image.Read('street.png')
    #         >>> img.sum()

    #     :seealso: :meth:`mpq` :meth:`npq` :meth:`upq`
    #     """
    #     out = []
    #     for im in self:
    #         out.append(np.sum(im.A))

    #     if len(out) == 1:
    #         return out[0]
    #     else:
    #         return out

    def mpq(self, p, q):
        r"""
        Image moments

        :param p: u exponent
        :type p: int
        :param q: v exponent
        :type q: int
        :return: moment
        :type: scalar

        Computes the pq'th moment of the image:

        .. math::

            m(I) = \sum_{uv} I_{uv} u^p v^q

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('shark1.png')
            >>> img.mpq(1, 0)

        :note:
            - Supports single channel images only.
            - ``mpq(0, 0)`` is the same as ``sum()`` but less efficient.

        :references:
            - Robotics, Vision & Control for Python, Section 12.1.3.4, P. Corke, Springer 2023.

        :seealso: :meth:`sum` :meth:`npq` :meth:`upq`
        """

        if not isinstance(p, int) or not isinstance(q, int):
            raise TypeError(p, "p, q must be an int")

        im = self.mono().A
        X, Y = self.meshgrid()
        return np.sum(im * (X**p) * (Y**q))

    def upq(self, p, q):
        r"""
        Central image moments

        :param p: u exponent
        :type p: int
        :param q: v exponent
        :type q: int
        :return: moment
        :type: scalar

        Computes the pq'th central moment of the image:

        .. math::

            \mu(I) = \sum_{uv} I_{uv} (u-u_0)^p (v-v_0)^q

        where :math:`u_0 = m_{10}(I) / m_{00}(I)` and :math:`v_0 = m_{01}(I) / m_{00}(I)`.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('shark1.png')
            >>> img.upq(2, 2)

        :note:
            - The central moments are invariant to translation.
            - Supports single channel images only.

        :references:
            - Robotics, Vision & Control for Python, Section 12.1.3.4, P. Corke, Springer 2023.

        :seealso: :meth:`sum` :meth:`mpq` :meth:`upq`
        """

        if not isinstance(p, int) or not isinstance(q, int):
            raise TypeError(p, "p, q must be an int")

        m00 = self.mpq(0, 0)
        xc = self.mpq(1, 0) / m00
        yc = self.mpq(0, 1) / m00

        im = self.mono().A
        X, Y = self.meshgrid()

        return np.sum(im * ((X - xc) ** p) * ((Y - yc) ** q))

    def npq(self, p, q):
        r"""
        Normalized central image moments

        :param p: u exponent
        :type p: int
        :param q: v exponent
        :type q: int
        :return: moment
        :type: scalar

        Computes the pq'th normalized central moment of the image:

        .. math::

            \nu(I) = \frac{\mu_{pq}(I)}{m_{00}(I)} = \frac{1}{m_{00}(I)} \sum_{uv} I_{uv} (u-u_0)^p (v-v_0)^q

        where :math:`u_0 = m_{10}(I) / m_{00}(I)` and :math:`v_0 = m_{01}(I) / m_{00}(I)`.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('shark1.png')
            >>> img.npq(2, 2)

        :note:
            - The normalized central moments are invariant to translation and
              scale.
            - Supports single channel images only.

        :references:
            - Robotics, Vision & Control for Python, Section 12.1.3.4, P. Corke, Springer 2023.

        :seealso: :meth:`sum` :meth:`mpq` :meth:`upq`
        """
        if not isinstance(p, int) or not isinstance(q, int):
            raise TypeError(p, "p, q must be an int")
        if (p + q) < 2:
            raise ValueError(p + q, "normalized moments only valid for p+q >= 2")

        g = (p + q) / 2 + 1

        return self.upq(p, q) / self.mpq(0, 0) ** g

    def moments(self, binary=False):
        """
        Image moments

        :param binary: if True, all non-zero pixels are treated as 1's
        :type binary: bool
        :return: image moments
        :type: dict

        Compute multiple moments of the image and return them as a dict

        ==========================  ===============================================================================
        Moment type                 dict keys
        ==========================  ===============================================================================
        moments                     ``m00`` ``m10`` ``m01`` ``m20`` ``m11`` ``m02`` ``m30`` ``m21`` ``m12`` ``m03``
        central moments             ``mu20`` ``mu11`` ``mu02`` ``mu30`` ``mu21`` ``mu12`` ``mu03`` |
        normalized central moments  ``nu20`` ``nu11`` ``nu02`` ``nu30`` ``nu21`` ``nu12`` ``nu03`` |
        ==========================  ===============================================================================

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('shark1.png')
            >>> img.moments()

        :note:
            - Converts a color image to greyscale.

        :references:
            - Robotics, Vision & Control for Python, Section 12.1.3.4, P. Corke, Springer 2023.

        :seealso: :meth:`mpq` :meth:`npq` :meth:`upq` `opencv.moments <https://docs.opencv.org/master/d8/d23/classcv_1_1Moments.html>`_
        """
        return cv.moments(self.mono().to_int(), binary)

    def humoments(self):
        """
        Hu image moment invariants

        :return: Hu image moments
        :rtype: ndarray(7)

        Computes the seven Hu image moment invariants of the image.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> img = Image.Read('shark1.png', dtype='float')
            >>> img.humoments()

        :note:
            - Image is assumed to be a binary image of a single connected
              region
            - These invariants are a function of object shape and are invariant
              to position, orientation and scale.

        :references:
            - M-K. Hu, Visual pattern recognition by moment invariants. IRE
              Trans. on Information Theory, IT-8:pp. 179-187, 1962.
            - Robotics, Vision & Control for Python, Section 12.1.3.6, P. Corke, Springer 2023.

        :seealso: :meth:`moments` `opencv.HuMoments <https://docs.opencv.org/master/d3/dc0/group__imgproc__shape.html#gab001db45c1f1af6cbdbe64df04c4e944>`_
        """

        # TODO check for binary image

        moments = cv.moments(self.A)
        hu = cv.HuMoments(moments)
        return hu.flatten()

    def nonzero(self):
        """
        Find non-zero pixel values as 2D coordinates

        :return: coordinates of non-zero pixels
        :rtype: ndarray(2,N)

        The (u,v) coordinates are given as columns of the returned array.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> import numpy as np
            >>> pix = np.zeros((10,10)); pix[2,3]=10; pix[4,5]=11; pix[6,7]=12
            >>> img = Image(pix)
            >>> img.nonzero()

        :references:
            - Robotics, Vision & Control for Python, Section 12.1.3.2, P. Corke, Springer 2023.

        :seealso: :meth:`flatnonzero`
        """
        v, u = np.nonzero(self.A)
        return np.vstack((u, v))

    def flatnonzero(self):
        """
        Find non-zero pixel values as 1D indices

        :return: index of non-zero pixels
        :rtype: ndarray(N)

        The coordinates are given as 1D indices into a flattened version of
        the image.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> import numpy as np
            >>> pix = np.zeros((10,10)); pix[2,3]=10; pix[4,5]=11; pix[6,7]=12
            >>> img = Image(pix)
            >>> img.flatnonzero()
            >>> img.view1d()[23]

        :seealso: :meth:`view1d` :meth:`nonzero`
        """
        return np.flatnonzero(self.A)

    def peak2d(self, npeaks=2, scale=1, interp=False, positive=True):
        r"""
        Find local maxima in image

        :param npeaks: number of peaks to return, defaults to 2
        :type npeaks: int, optional
        :param scale: scale of peaks to consider
        :type scale: int
        :param interp:  interpolate the peak positions, defaults to False
        :type interp: bool, optional
        :param positive:  only select peaks that are positive, defaults to False
        :type positive: bool, optional
        :return: peak magnitude and positions
        :rtype: ndarray(npeaks), ndarray(2,npeaks)

        Find the positions of the local maxima in the image.  A local maxima
        is the largest value within a sliding window of width
        :math:`2 \mathtt{scale}+1`.

        Example:

        .. runblock:: pycon

            >>> from machinevisiontoolbox import Image
            >>> import numpy as np
            >>> peak = np.array([[10, 20, 30], [40, 50, 45], [15, 20, 30]])
            >>> img = Image(np.pad(peak, 3, constant_values=10))
            >>> img.A
            >>> img.peak2d(interp=True)

        :note:
            - Edges elements will never be returned as maxima.
            - To find minima, use ``peak2d(-image)``.
            - The ``interp`` option fits points in the neighbourhood about the
              peak with a paraboloid and its peak position is returned.

        :seealso: :meth:`~machinevisiontoolbox.base.findpeaks.findpeaks2d`
        """

        ret = findpeaks2d(self.A, npeaks=npeaks, scale=scale, interp=interp)
        return ret[:, -1], ret[:, :2].T


[docs]class Histogram: def __init__(self, h, x, isfloat=False): """ Create histogram instance :param h: image histogram :type h: ndarray(N), ndarray(N,P) :param x: image values :type x: ndarray(N) :param isfloat: pixel values are floats, defaults to False :type isfloat: bool, optional Create :class:`Histogram` instance from histogram data provided as Numpy arrays. :seealso: :meth:`~machinevisiontoolbox.ImageFeatures.ImageFeaturesMixin.hist` """ self.nplanes = h.shape[1] if self.nplanes == 1: h = h[:, 0] self._h = h # histogram self._x = x.flatten() # x value self.isfloat = isfloat # 'hist', 'h cdf normcdf x') def __str__(self): """ Histogram summary as a string :return: concise summary of histogram :rtype: str Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image.Random() >>> h = im.hist(100) >>> print(h) """ s = f"histogram with {len(self.x)} bins" if self.nplanes > 1: s += f" x {self.h.shape[1]} planes" s += f": xrange {self.x[0]} - {self.x[-1]}, yrange {np.min(self.h)} - {np.max(self.h)}" return s def __repr__(self): """ Print histogram summary :return: print concise summary of histogram :rtype: str Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> img = Image.Random() >>> h = im.hist(100) >>> h """ return str(self) @property def x(self): """ Histogram bin values :return: array of left-hand bin values :rtype: ndarray(N) Bin :math:`i` contains grey values in the range :math:`[x_{[i]}, x_{[i+1]})`. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> import numpy as np >>> hist = Image.Read('flowers1.png').hist() >>> with np.printoptions(threshold=10): >>> hist.x """ return self._x @property def h(self): """ Histogram count values :return: array of histogram count values :rtype: ndarray(N) or ndarray(N,P) For a greyscale image this is a 1D array, for a multiplane (color) image this is a 2D array with the histograms of each plane as columns. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> import numpy as np >>> hist = Image.Read('flowers1.png').hist() >>> with np.printoptions(threshold=10): >>> hist.h """ return self._h @property def cdf(self): """ Cumulative histogram values :return: array of cumulative histogram count values :rtype: ndarray(N) or ndarray(N,P) For a greyscale image this is a 1D array, for a multiplane (color) image this is a 2D array with the cumulative histograms of each plane as columns. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> import numpy as np >>> hist = Image.Read('flowers1.png').hist() >>> with np.printoptions(threshold=10): >>> hist.cdf """ return np.cumsum(self._h, axis=0) @property def ncdf(self): """ Normalized cumulative histogram values :return: array of normalized cumulative histogram count values :rtype: ndarray(N) or ndarray(N,P) For a greyscale image this is a 1D array, for a multiplane (color) image this is a 2D array with the normalized cumulative histograms of each plane as columns. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> import numpy as np >>> hist = Image.Read('flowers1.png').hist() >>> with np.printoptions(threshold=10): >>> hist.ncdf """ y = np.cumsum(self._h, axis=0) if self.nplanes == 1: y = y / y[-1] else: y = y / y[-1, :] return y
[docs] def plot( self, type="frequency", block=False, bar=None, style="stack", alpha=0.5, **kwargs, ): """ Plot histogram :param type: histogram type, one of: 'frequency' [default], 'cdf', 'ncdf' :type type: str, optional :param block: hold plot, defaults to False :type block: bool, optional :param bar: histogram bar plot, defaults to True for frequency plot, False for other plots :type bar: bool, optional :param style: Style for multiple plots, one of: 'stack' [default], 'overlay' :type style: str, optional :param alpha: transparency for overlay plot, defaults to 0.5 :type alpha: float, optional :raises ValueError: invalid histogram type :raises ValueError: cannot use overlay style for 1-channel histogram """ # if type == 'histogram': # plot_histogram(self.xs.flatten(), self.hs.flatten(), block=block, # xlabel='pixel value', ylabel='number of pixels', **kwargs) # elif type == 'cumulative': # plot_histogram(self.xs.flatten(), self.cs.flatten(), block=block, # xlabel='pixel value', ylabel='cumulative number of pixels', **kwargs) # elif type == 'normalized': # plot_histogram(self.xs.flatten(), self.ns.flatten(), block=block, # xlabel='pixel value', ylabel='normalized cumulative number of pixels', **kwargs) # fig = plt.figure() x = self._x[:] if type == "frequency": y = self.h maxy = np.max(y) ylabel1 = "frequency" ylabel2 = "frequency" if bar is not False: bar = True elif type in ("cdf", "cumulative"): y = self.cdf maxy = np.max(y[-1, :]) ylabel1 = "cumulative frequency" ylabel2 = "cumulative frequency" elif type in ("ncdf", "normalized"): y = self.ncdf ylabel1 = "norm. cumulative freq." ylabel2 = "normalized cumulative frequency" maxy = 1 else: raise ValueError("unknown type") if self.nplanes == 1: y = y[..., np.newaxis] if self.isfloat: xrange = (0.0, 1.0) else: xrange = (0, 255) if self.colordict is not None: colors = list(self.colordict.keys()) n = len(colors) # ylabel1 += ' (' + ','.join(colors) + ')' else: n = 1 if style == "overlay": raise ValueError("cannot use overlay style for monochrome image") if style == "stack": for i in range(n): ax = plt.subplot(n, 1, i + 1) if bar: ax.bar(x, y[:, i], width=x[1] - x[0], bottom=0, **kwargs) else: ax.plot(x, y[:, i], **kwargs) ax.grid() if n == 1: ax.set_ylabel(ylabel1) else: ax.set_ylabel(ylabel1 + " (" + colors[i] + ")") ax.set_xlim(*xrange) ax.set_ylim(0, maxy) ax.yaxis.set_major_formatter( ScalarFormatter(useOffset=False, useMathText=True) ) ax.set_xlabel("pixel value") elif style == "overlay": x = np.r_[0, x, 255] ax = plt.subplot(1, 1, 1) patchcolor = [] goodcolors = [c for c in "rgbykcm"] for i, color in colors: if color.lower() in "rgbykcm": patchcolor.append(color.lower()) else: patchcolor.append(goodcolors.pop(0)) for i in range(n): yi = np.r_[0, y[:, i], 0] p1 = np.array([x, yi]).T poly1 = Polygon( p1, closed=True, facecolor=patchcolor[i], alpha=alpha, **kwargs ) ax.add_patch(poly1) ax.set_xlim(*xrange) ax.set_ylim(0, maxy) ax.yaxis.set_major_formatter( ScalarFormatter(useOffset=False, useMathText=True) ) ax.set_xlabel("pixel value") ax.set_ylabel(ylabel2) ax.grid(True) plt.legend(colors) plt.show(block=block)
[docs] def peaks(self, **kwargs): """ Histogram peaks :param kwargs: parameters passed to :func:`~machinevisiontoolbox.base.findpeaks.findpeaks` :return: positions of histogram peaks :rtype: ndarray(M), list of ndarray For a greyscale image return an array of grey values corresponding to local maxima. For a color image return a list of arrays of grey values corresponding to local maxima in each plane. Example: .. runblock:: pycon >>> from machinevisiontoolbox import Image >>> import numpy as np >>> hist = Image.Read('street.png').hist() >>> hist.peaks(scale=20) :seealso: :func:`~machinevisiontoolbox.base.findpeaks.findpeaks` """ if self.nplanes == 1: # greyscale image x, _ = findpeaks(self.h, self.x, **kwargs) return x else: xp = [] for i in range(self.nplanes): x, _ = findpeaks(self.h[:, i], self.x, **kwargs) xp.append(x) return xp
# # helper function that was part of hist() in the Matlab toolbox # # TODO consider moving this to ImpageProcessingBase.py # def plothist(self, title=None, block=False, **kwargs): # """ # plot first image histogram as a line plot (TODO as poly) # NOTE convenient, but maybe not a great solution because we then need to # duplicate all the plotting options as for idisp? # """ # if title is None: # title = self[0].filename # hist = self[0].hist(**kwargs) # x = hist[0].x # h = hist[0].h # fig, ax = plt.subplots() # # line plot histogram style # if self.iscolor: # ax.plot(x[:, 0], h[:, 0], 'b', alpha=0.8) # ax.plot(x[:, 1], h[:, 1], 'g', alpha=0.8) # ax.plot(x[:, 2], h[:, 2], 'r', alpha=0.8) # else: # ax.plot(hist[0].x, hist[0].h, 'k', alpha=0.7) # # polygon histogram style # polygon_style = False # if polygon_style: # if self.iscolor: # from matplotlib.patches import Polygon # # TODO make sure pb goes to bottom of axes at the edges: # pb = np.stack((x[:, 0], h[:, 0]), axis=1) # polyb = Polygon(pb, # closed=True, # facecolor='b', # linestyle='-', # alpha=0.75) # ax.add_patch(polyb) # pg = np.stack((x[:, 1], h[:, 1]), axis=1) # polyg = Polygon(pg, # closed=True, # facecolor='g', # linestyle='-', # alpha=0.75) # ax.add_patch(polyg) # pr = np.stack((x[:, 2], h[:, 2]), axis=1) # polyr = Polygon(pr, # closed=True, # facecolor='r', # linestyle='-', # alpha=0.75) # ax.add_patch(polyr) # # addpatch seems to require a plot, so hack is to plot null and # # make alpha=0 # ax.plot(0, 0, alpha=0) # else: # from matplotlib.patches import Polygon # p = np.hstack((x, h)) # poly = Polygon(p, # closed=True, # facecolor='k', # linestyle='-', # alpha=0.5) # ax.add_patch(poly) # ax.plot(0, 0, alpha=0) # ax.set_ylabel('count') # ax.set_xlabel('bin') # ax.grid() # ax.set_title(title) # plt.show(block=block) # # him = im[2].hist() # # fig, ax = plt.subplots() # # ax.plot(him[i].x[:, 0], him[i].h[:, 0], 'b') # # ax.plot(him[i].x[:, 1], him[i].h[:, 1], 'g') # # ax.plot(him[i].x[:, 2], him[i].h[:, 2], 'r') # # plt.show() if __name__ == "__main__": from machinevisiontoolbox import Image from math import pi # img = Image.Read('flowers1.png', dtype='float32', grey=True) # print(img) # # img.disp() # h = img.hist() # print(h) # with np.printoptions(precision=2, threshold=5): # print(h.h) # print(h.cdf) # print(h.ncdf) # print(h.peaks(scale=0.2)) # # h.plot('frequency', style='overlay') # # plt.figure() # h.plot('frequency', block=True) # print(img.moments()) im = Image.Read("penguins.png") z = im.ocr(minconf=90) print(z)