#!/usr/bin/env python
import numpy as np
import spatialmath.base.argcheck as argcheck
import cv2 as cv
import scipy as sp
from scipy import signal
"""
Image processing kernel operations on the Image class
"""
[docs]
class Kernel:
def __init__(self, K, name=None):
"""
Convolution kernel object
:param K: kernel weighting matrix
:type K: ndarray(N,M)
:param name: name of the kernel, defaults to None
:type name: str, optional
:raises ValueError: ``K`` is not a 2D ndarray
Kernel objects are used to represent convolution kernels for image
processing operations. They are created by a number of class
methods that generate common kernels such as Gaussian, Laplacian, etc.
:class:`ImageCore.Image` :class:`machinevisiontoolbox.ImageCore.Image` :class:`machinevisiontoolbox.Image`
:seealso: :meth:`Gauss` :meth:`Laplace` :meth:`Sobel` :meth:`DoG` :meth:`LoG` :meth:`DGauss` :meth:`Circle` :meth:`Box`
"""
if not isinstance(K, np.ndarray) and K.ndim != 2:
raise ValueError("kernel must be a 2D ndarray")
self.K = K
self.name = name
def __str__(self) -> str:
"""Human readable kernel description
:return: summary description of the kernel
:rtype: str
The summary includes the size of the kernel, and its minimum, maximum and mean
values . If the kernel is symmetric this is noted.
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> K = Kernel.Gauss(sigma=2)
>>> print(K)
"""
s = f"Kernel: {self.K.shape[0]}x{self.K.shape[1]}"
s += f", min={self.K.min():.2g}, max={self.K.max():.2g}, mean={self.K.mean():.2g}"
if np.allclose(self.K, self.K.T, rtol=1e-05, atol=1e-08):
s += ", SYMMETRIC"
if self.name is not None:
s += f" ({self.name})"
return s
@property
def T(self):
return Kernel(self.K.T)
@property
def shape(self):
return self.K.shape
def print(self, fmt=None, separator: str = " ", precision: int = 2) -> None:
"""
Print kernel weights in compact format
:param fmt: format string, defaults to None
:type fmt: str, optional
:param separator: value separator, defaults to single space
:type separator: str, optional
:param precision: precision for floating point kernel values, defaults to 2
:type precision: int, optional
Very compact display of kernel numerical values in grid layout.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> K = Kernel.Gauss(sigma=2)
>>> K.print()
"""
if fmt is None:
ff = f"{{:.{precision}f}}"
width = max(len(ff.format(self.K.max())), len(ff.format(self.K.min())))
fmt = f"{separator}{{:{width}.{precision}f}}"
for v in range(self.K.shape[0]):
row = ""
for u in range(self.K.shape[1]):
row += fmt.format(self.K[v, u])
print(row)
[docs]
@classmethod
def Gauss(cls, sigma, h=None):
r"""
Gaussian kernel
:param sigma: standard deviation of Gaussian kernel
:type sigma: float
:param h: half width of the kernel
:type h: integer, optional
:return: 2h+1 x 2h+1 Gaussian kernel
:rtype: :class:`Kernel`
Return the 2-dimensional Gaussian kernel of standard deviation ``sigma``
.. math::
\mathbf{K} = \frac{1}{2\pi \sigma^2} e^{-(u^2 + v^2) / 2 \sigma^2}
The kernel is centred within a square array with side length given by:
- :math:`2 \mbox{ceil}(3 \sigma) + 1`, or
- :math:`2 \mathtt{h} + 1`
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> K = Kernel.Gauss(sigma=1, h=2)
>>> K.shape
>>> print(K)
>>> K.print()
>>> K = Kernel.Gauss(sigma=2)
>>> K.shape
:note:
- The volume under the Gaussian kernel is one.
- If the kernel is strongly truncated, ie. it is non-zero at the
edges of the window then the volume will be less than one.
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.1, P. Corke, Springer 2023.
:seealso: :meth:`DGauss`
"""
# make sure sigma, w are valid input
if h is None:
h = np.ceil(3 * sigma)
wi = np.arange(-h, h + 1)
x, y = np.meshgrid(wi, wi)
m = 1.0 / (2.0 * np.pi * sigma**2) * np.exp(-(x**2 + y**2) / 2.0 / sigma**2)
# area under the curve should be 1, but the discrete case is only
# an approximation
# return m / np.sum(m)
return cls(m / np.sum(m), name=f"Gaussian σ={sigma}")
[docs]
@classmethod
def Laplace(cls):
r"""
Laplacian kernel
:return: 3 x 3 Laplacian kernel
:rtype: Kernel
Return the Laplacian kernel
.. math::
\mathbf{K} = \begin{bmatrix}
0 & 1 & 0 \\
1 & -4 & 1 \\
0 & 1 & 0
\end{bmatrix}
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.Laplace()
:note:
- This kernel has an isotropic response to image gradient.
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :meth:`LoG` :meth:`zerocross`
"""
# fmt: off
K = np.array([[ 0, 1, 0],
[ 1, -4, 1],
[ 0, 1, 0]])
# fmt: on
return cls(K, name="Laplacian")
[docs]
@classmethod
def Sobel(cls):
r"""
Sobel edge detector
:return: 3 x 3 Sobel kernel
:rtype: Kernel
Return the Sobel kernel for horizontal gradient
.. math::
\mathbf{K} = \frac{1}{8} \begin{bmatrix}
1 & 0 & -1 \\
2 & 0 & -2 \\
1 & 0 & -1
\end{bmatrix}
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.Sobel()
:note:
- This kernel is an effective vertical-edge detector
- The y-derivative (horizontal-edge) kernel is ``K.T``
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :meth:`DGauss`
"""
# fmt: off
K = np.array([[1, 0, -1],
[2, 0, -2],
[1, 0, -1]]) / 8.0
# fmt: on
return cls(K, name="Sobel")
[docs]
@classmethod
def DoG(cls, sigma1, sigma2=None, h=None):
r"""
Difference of Gaussians kernel
:param sigma1: standard deviation of first Gaussian kernel
:type sigma1: float
:param sigma2: standard deviation of second Gaussian kernel
:type sigma2: float, optional
:param h: half-width of Gaussian kernel
:type h: int, optional
:return: 2h+1 x 2h+1 kernel
:rtype: Kernel
Return the 2-dimensional difference of Gaussian kernel defined by two
standard deviation values:
.. math::
\mathbf{K} = G(\sigma_1) - G(\sigma_2)
where :math:`\sigma_1 > \sigma_2`.
By default, :math:`\sigma_2 = 1.6 \sigma_1`.
The kernel is centred within a square array with side length given by:
- :math:`2 \mbox{ceil}(3 \sigma) + 1`, or
- :math:`2\mathtt{h} + 1`
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.DoG(1)
:note:
- This kernel is similar to the Laplacian of Gaussian and is often
used as an efficient approximation.
- This is a "Mexican hat" shaped kernel
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :meth:`LoG` :meth:`Gauss`
"""
# sigma1 > sigma2
if sigma2 is None:
sigma2 = 1.6 * sigma1
else:
if sigma2 > sigma1:
t = sigma1
sigma1 = sigma2
sigma2 = t
# thus, sigma2 > sigma1
if h is None:
h = np.ceil(3.0 * sigma1)
m1 = Kernel.Gauss(sigma1, h) # thin kernel
m2 = Kernel.Gauss(sigma2, h) # wide kernel
return cls(m2 - m1, name=f"DoG σ1={sigma1}, σ2={sigma2}")
[docs]
@classmethod
def LoG(cls, sigma, h=None):
r"""
Laplacian of Gaussian kernel
:param sigma: standard deviation of first Gaussian kernel
:type sigma: float
:param h: half-width of kernel
:type h: int, optional
:return: 2h+1 x 2h+1 kernel
:rtype: Kernel
Return a 2-dimensional Laplacian of Gaussian kernel with
standard deviation ``sigma``
.. math::
\mathbf{K} = \frac{1}{\pi \sigma^4} \left(\frac{u^2 + v^2}{2 \sigma^2} -1\right) e^{-(u^2 + v^2) / 2 \sigma^2}
The kernel is centred within a square array with side length given by:
- :math:`2 \mbox{ceil}(3 \sigma) + 1`, or
- :math:`2\mathtt{h} + 1`
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.LoG(1)
:note: This is the classic "Mexican hat" shaped kernel
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :meth:`Laplace` :meth:`DoG` :meth:`Gauss` :meth:`zerocross`
"""
if h is None:
h = np.ceil(3.0 * sigma)
wi = np.arange(-h, h + 1)
x, y = np.meshgrid(wi, wi)
log = (
1.0
/ (np.pi * sigma**4.0)
* ((x**2 + y**2) / (2.0 * sigma**2) - 1)
* np.exp(-(x**2 + y**2) / (2.0 * sigma**2))
)
# ensure that the mean is zero, for a truncated kernel this may not
# be the case
log -= log.mean()
return cls(log, name=f"LoG σ={sigma}")
[docs]
@classmethod
def DGauss(cls, sigma, h=None):
r"""
Derivative of Gaussian kernel
:param sigma: standard deviation of first Gaussian kernel
:type sigma: float
:param h: half-width of kernel
:type h: int, optional
:return: 2h+1 x 2h+1 kernel
:rtype: Kernel
Returns a 2-dimensional derivative of Gaussian
kernel with standard deviation ``sigma``
.. math::
\mathbf{K} = \frac{-x}{2\pi \sigma^2} e^{-(x^2 + y^2) / 2 \sigma^2}
The kernel is centred within a square array with side length given by:
- :math:`2 \mbox{ceil}(3 \sigma) + 1`, or
- :math:`2\mathtt{h} + 1`
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.DGauss(1)
:note:
- This kernel is the horizontal derivative of the Gaussian, :math:`dG/dx`.
- The vertical derivative, :math:`dG/dy`, is the transpose of this kernel.
- This kernel is an effective edge detector.
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :meth:`Gauss` :meth:`Sobel`
"""
if h is None:
h = np.ceil(3.0 * sigma)
wi = np.arange(-h, h + 1)
x, y = np.meshgrid(wi, wi)
K = -x / sigma**2 / (2.0 * np.pi) * np.exp(-(x**2 + y**2) / 2.0 / sigma**2)
return cls(K, name=f"DGauss σ={sigma}")
[docs]
@classmethod
def Circle(cls, radius, h=None, normalize=False, dtype="uint8"):
r"""
Circular structuring element
:param radius: radius of circular structuring element
:type radius: scalar, array_like(2)
:param h: half-width of kernel
:type h: int
:param normalize: normalize volume of kernel to one, defaults to False
:type normalize: bool, optional
:param dtype: data type for image, defaults to ``uint8``
:type dtype: str or NumPy dtype, optional
:return: 2h+1 x 2h+1 circular kernel
:rtype: Kernel
Returns a circular kernel of radius ``radius`` pixels. Sometimes referred
to as a tophat kernel. Values inside the circle are set to one,
outside are set to zero.
If ``radius`` is a 2-element vector the result is an annulus of ones,
and the two numbers are interpretted as inner and outer radii
respectively.
The kernel is centred within a square array with side length given
by :math:`2\mathtt{h} + 1`.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.Circle(2)
>>> Kernel.Circle([2, 3])
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.1, P. Corke, Springer 2023.
:seealso: :meth:`Box`
"""
# check valid input:
if not argcheck.isscalar(radius): # r.shape[1] > 1:
radius = argcheck.getvector(radius)
rmax = radius.max()
rmin = radius.min()
else:
rmax = radius
if h is not None:
w = h * 2 + 1
elif h is None:
w = 2 * rmax + 1
s = np.zeros((int(w), int(w)), dtype=dtype)
c = np.floor(w / 2.0)
if not argcheck.isscalar(radius):
# circle case
x = np.arange(w) - c
X, Y = np.meshgrid(x, x)
r2 = X**2 + Y**2
ll = np.where((r2 >= rmin**2) & (r2 <= rmax**2))
s[ll] = 1
else:
# annulus case
x = np.arange(w) - c
X, Y = np.meshgrid(x, x)
ll = np.where(np.round((X**2 + Y**2 - radius**2) <= 0))
s[ll] = 1
if normalize:
s /= np.sum(s)
return cls(s, name=f"Circle r={radius}")
[docs]
@classmethod
def Box(cls, h, normalize=True):
r"""
Square structuring element
:param h: half-width of kernel
:type h: int
:param normalize: normalize volume of kernel to one, defaults to True
:type normalize: bool, optional
:return: 2h+1 x 2h+1 kernel
:rtype: Kernel
Returns a square kernel with unit volume.
The kernel is centred within a square array with side length given
by :math:`2\mathtt{h} + 1`.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Kernel
>>> Kernel.Box(2)
>>> Kernel.Box(2, normalize=False)
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.1, P. Corke, Springer 2023.
:seealso: :meth:`Circle`
"""
# check valid input:
wi = 2 * h + 1
k = np.ones((wi, wi))
if normalize:
k /= np.sum(k)
return cls(k, name=f"Box h={h}")
[docs]
class ImageSpatialMixin:
@staticmethod
def _bordertype_cv(border, exclude=None):
"""
Border handling options for OpenCV
:param border: border handling option, one of: 'constant', 'replicate',
'reflect', 'mirror', 'wrap', 'pad', 'none'
:type border: str
:param exclude: list of excluded values, defaults to None
:type exclude: list or tuple, optional
:raises ValueError: ``border`` value is not valid
:raises ValueError: ``border`` value is excluded
:return: OpenCV key
:rtype: int
Map an MVTB border handling key to the OpenCV flag value
"""
# border options:
border_opt = {
"constant": cv.BORDER_CONSTANT,
"replicate": cv.BORDER_REPLICATE,
"reflect": cv.BORDER_REFLECT,
"mirror": cv.BORDER_REFLECT_101,
"reflect_101": cv.BORDER_REFLECT_101,
"wrap": cv.BORDER_WRAP,
"pad": cv.BORDER_CONSTANT,
"none": cv.BORDER_ISOLATED,
}
if exclude is not None and border in exclude:
raise ValueError("border option not supported")
try:
return border_opt[border]
except KeyError:
raise ValueError(border, "border is not a valid option")
# border options:
_border_opt = {
"constant": cv.BORDER_CONSTANT,
"replicate": cv.BORDER_REPLICATE,
"reflect": cv.BORDER_REFLECT,
"mirror": cv.BORDER_REFLECT_101,
"wrap": cv.BORDER_WRAP,
"pad": cv.BORDER_CONSTANT,
"none": cv.BORDER_ISOLATED,
}
@staticmethod
def _border_args_cv(border, morpho=False, allow=None, disallow=None):
if disallow is not None and border in disallow:
raise ValueError(f"border option {border} not supported")
if allow is not None and border not in allow:
raise ValueError(f"border option {border} not supported")
if isinstance(border, str):
# given as string, convert to OpenCV flag value
try:
return dict(borderType=_border_opt[border])
except KeyError:
raise ValueError(border, "border is not a valid option")
elif isinstance(border, int) or isinstance(border, float):
# given as a numeric value, assume 'pad'
return dict(bordertype=_border_opt["pad"], borderValu=border_value)
@staticmethod
def _bordertype_sp(border, exclude=None):
"""
Border handling options for SciPy
:param border: border handling option, one of: 'constant', 'replicate',
'reflect', 'mirror', 'wrap'
:type border: str
:param exclude: list of excluded values, defaults to None
:type exclude: list or tuple, optional
:raises ValueError: ``border`` value is not valid
:raises ValueError: ``border`` value is excluded
:return: SciPy key
:rtype: str
Map an MVTB border handling key to the SciPy ndimage flag value
"""
# border options:
border_opt = {
"constant": "constant",
"replicate": "nearest",
"reflect": "reflect",
"mirror": "mirror",
"wrap": "wrap",
}
if exclude is not None and border in exclude:
raise ValueError("border option not supported")
try:
return border_opt[border]
except KeyError:
raise ValueError(border, "border is not a valid option")
[docs]
def smooth(self, sigma, h=None, mode="same", border="reflect", bordervalue=0):
r"""
Smooth image
:param sigma: standard deviation of the Gaussian kernel
:type sigma: float
:param h: half-width of the kernel
:type h: int
:param mode: option for convolution, see :meth:`convolve`, defaults to 'same'
:type mode: str, optional
:param border: option for boundary handling, see :meth:`convolve`, defaults to 'reflect'
:type border: str, optional
:param bordervalue: padding value, see :meth:`convolve`, defaults to 0
:type bordervalue: scalar, optional
:return: smoothed image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`
Smooth the image by convolving with a Gaussian kernel of standard
deviation ``sigma``. If ``h`` is not given the kernel half width is set
to :math:`2 \mbox{ceil}(3 \sigma) + 1`.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Read('monalisa.png')
>>> img.smooth(sigma=3).disp()
:note:
- Smooths all planes of the input image.
- The Gaussian kernel has a unit volume.
- If input image is integer it is converted to float, convolved,
then converted back to integer.
:references:
- Robotics, Vision & Control for Python, Section 11.5.1, P. Corke, Springer 2023.
:seealso: :meth:`machinevisiontoolbox.Kernel.Gauss` :meth:`convolve`
"""
if not argcheck.isscalar(sigma):
raise ValueError(sigma, "sigma must be a scalar")
# make the smoothing kernel
K = Kernel.Gauss(sigma, h)
return self.convolve(K, mode=mode, border=border, bordervalue=bordervalue)
[docs]
def convolve(self, K, mode="same", border="reflect", bordervalue=0):
"""
Image convolution
:param K: convolution kernel
:type K: :class:`~.Kernel` or ndarray, optional
:param mode: option for convolution, defaults to 'same'
:type mode: str, optional
:param border: option for boundary handling, defaults to 'reflect'
:type border: str, optional
:param bordervalue: padding value, defaults to 0
:type bordervalue: scalar, optional
:return: input image convolved with kernel
:rtype:
:class:`.Image`
Computes the convolution of image with the kernel ``K``.
There are two options that control what happens at the edge of the image
where the convolution window lies outside the image border. ``mode``
controls the size of the resulting image, while ``border`` controls how
pixel values are extrapolated outside the image border.
=========== ===========================================================================
``mode`` description
=========== ===========================================================================
``'same'`` output image is same size as input image (default)
``'full'`` output image is larger than the input image, add border to input image
``'valid'`` output image is smaller than the input image and contains only valid pixels
=========== ===========================================================================
================ ====================================================
``border`` description
================ ====================================================
``'replicate'`` replicate border pixels outwards
``'pad'`` outside pixels are set to ``value``
``'wrap'`` borders are joined, left to right, top to bottom
``'reflect'`` outside pixels reflect inside pixels
``'reflect101'`` outside pixels reflect inside pixels except for edge
``'none'`` do not look outside of border
================ ====================================================
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> import numpy as np
>>> img = Image.Read('monalisa.png')
>>> img.convolve(K=np.ones((11,11))).disp()
:note:
- The kernel is typically square with an odd side length.
- The result has the same datatype as the input image. For a kernel
where the results could be negative (eg. edge detection kernel)
this will cause issues such as value wraparound.
- If the image is color (has multiple planes) the kernel is
applied to each plane, resulting in an output image with the same
number of planes.
:references:
- Robotics, Vision & Control for Python, Section 11.5.1, P. Corke, Springer 2023.
:seealso:
:class:`~.Kernel`
:meth:`~smooth`
:func:`opencv.filter2D`
:func:`opencv.copyMakeBorder`
"""
if isinstance(K, self.__class__):
# kernel is an Image instance
K = K.A
elif isinstance(K, Kernel):
# kernel is a numpy array
K = K.K
K = argcheck.getmatrix(K, shape=[None, None], dtype="float32")
# OpenCV does correlation, not convolution, so we flip the kernel
# to compensate. Flip horizontally and vertically.
K = np.flip(K)
kh, kw = K.shape
kh //= 2
kw //= 2
# TODO check images are of the same type
# TODO check opt is valid string based on conv2 options
modeopt = ["valid", "same", "full"]
if mode not in modeopt:
raise ValueError(mode, "opt is not a valid option")
img = self.A
if border == "pad" and value != 0:
img = cv.copyMakeBorder(
a, kv, kv, kh, kh, cv.BORDER_CONSTANT, value=bordervalue
)
elif mode == "full":
img = cv.copyMakeBorder(
a, kv, kv, kh, kh, self._bordertype_cv(border), value=bordervalue
)
out = cv.filter2D(
img, ddepth=-1, kernel=K, borderType=self._bordertype_cv(border)
)
if mode == "valid":
if out.ndim == 2:
out = out[kh:-kh, kw:-kw]
else:
out = out[kh:-kh, kw:-kw, :]
return self.__class__(out, colororder=self.colororder)
# def sobel(self, kernel=None):
# if kernel is None:
# kernel = Kernel.Sobel()
# Iu = self.convolve(kernel)
# Iv = self.convolve(kernel.T)
# return Iu, Iv
[docs]
def gradients(self, K=None, mode="same", border="reflect", bordervalue=0):
"""
Compute horizontal and vertical gradients
:param K: derivative kernel, defaults to Sobel
:type K: :class:`~machinevisiontoolbox.ImageSpatial.Kernel` or ndarray, optional
:param mode: option for convolution, see :meth:`convolve`, defaults to 'same'
:type mode: str, optional
:param border: option for boundary handling, see :meth:`convolve`, defaults to 'reflect'
:type border: str, optional
:param bordervalue: padding value, , see :meth:`convolve`, defaults to 0
:type bordervalue: scalar, optional
:return: horizontal and vertical gradient images
:rtype: 2-tuple of :class:`~machinevisiontoolbox.ImageCore.Image`
Compute horizontal and vertical gradient images.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Read('monalisa.png', grey=True)
>>> Iu, Iv = img.gradients()
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :class:`~machinevisiontoolbox.ImageSpatial.Kernel`
"""
if K is None:
K = Kernel.Sobel()
Iu = self.convolve(K, mode=mode, border=border, bordervalue=bordervalue)
Iv = self.convolve(K.T, mode=mode, border=border, bordervalue=bordervalue)
return Iu, Iv
[docs]
def direction(
horizontal, vertical
): # lgtm[py/not-named-self] pylint: disable=no-self-argument
r"""
Gradient direction
:param im: vertical gradient image
:type im: :class:`~machinevisiontoolbox.ImageCore.Image`
:return: gradient direction in radians
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`
Compute the per-pixel gradient direction from two images comprising the
horizontal and vertical gradient components.
.. math::
\theta_{u,v} = \tan^{-1} \frac{\mat{I}_{v: u,v}}{\mat{I}_{u: u,v}}
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Read('monalisa.png', grey=True)
>>> Iu, Iv = img.gradients()
>>> direction = Iu.direction(Iv)
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso: :meth:`gradients`
"""
if horizontal.shape != vertical.shape:
raise ValueError("images must the same shape")
return horizontal.__class__(np.arctan2(vertical.A, horizontal.A))
[docs]
def Harris_corner_strength(self, k=0.04, h=2):
"""
Harris corner strength image
:param k: Harris parameter, defaults to 0.04
:type k: float, optional
:param h: kernel half width, defaults to 2
:type h: int, optional
:return: Harris corner strength image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`
Returns an image containing Harris corner strength values. This is
positive for high gradient in orthogonal directions, and negative
for high gradient in a single direction.
:references:
- Robotics, Vision & Control for Python, Section 12.3.1, P. Corke, Springer 2023.
:seealso:
:meth:`gradients`
:meth:`Harris`
"""
dst = cv.cornerHarris(self.mono().image, 2, 2 * h + 1, k)
return self.__class__(dst)
[docs]
def window(self, func, h=None, se=None, border="reflect", bordervalue=0, **kwargs):
r"""
Generalized spatial operator
:param func: function applied to window
:type func: callable
:param h: half width of structuring element
:type h: int, optional
:param se: structuring element
:type se: ndarray(N,M), optional
:param border: option for boundary handling, see :meth:`convolve`, defaults to 'reflect'
:type border: str, optional
:param bordervalue: padding value, defaults to 0
:type bordervalue: scalar, optional
:raises ValueError: ``border`` is not a valid option
:raises TypeError: ``func`` not callable
:raises ValueError: single channel images only
:return: transformed image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`
Returns an image where each pixel is the result of applying the function
``func`` to a neighbourhood centred on the corresponding pixel in image.
The return value of ``func`` becomes the corresponding pixel value.
The neighbourhood is defined in two ways:
- If ``se`` is given then it is the the size of the structuring element
``se`` which should have odd side lengths. The elements in the
neighbourhood corresponding to non-zero elements in ``se`` are packed
into a vector (in column order from top left) and passed to the
specified callable function ``func``.
- If ``se`` is None then ``h`` is the half width of a :math:`w \times
w` square structuring element of ones, where :math:`w =2h+1`.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> import numpy as np
>>> img = Image.Read('monalisa.png', grey=True)
>>> out = img.window(np.median, h=3)
:note:
- The structuring element should have an odd side length.
- Is slow since the function ``func`` must be invoked once for
every output pixel.
:references:
- Robotics, Vision & Control for Python, Section 11.5.3, P. Corke, Springer 2023.
:seealso: :func:`scipy.ndimage.generic_filter`
"""
# replace window's mex function with scipy's ndimage.generic_filter
if self.iscolor:
raise ValueError("single channel images only")
if not callable(func):
raise TypeError(func, "func not callable")
if h is not None and se is None:
w = 2 * h + 1
se = np.ones((w, w))
out = sp.ndimage.generic_filter(
self.A,
func,
footprint=se,
mode=self._bordertype_sp(border),
cval=bordervalue,
)
return self.__class__(out)
[docs]
def zerocross(self):
"""
Compute zero crossing
:return: boolean image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image` instance
Compute a zero-crossing image, where pixels are true if they are adjacent to
a change in sign.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> U, V = Image.meshgrid(None, 6, 6)
>>> img = Image(U - V - 2, dtype='float')
>>> img.print()
>>> img.zerocross().print()
:note: Use morphological filtering with 3x3 structuring element, can
lead to erroneous values in border pixels.
:references:
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
:seealso:
:meth:`Laplace`
:meth:`LoG`
"""
min = cv.morphologyEx(self.image, cv.MORPH_ERODE, np.ones((3, 3)))
max = cv.morphologyEx(self.image, cv.MORPH_DILATE, np.ones((3, 3)))
zeroCross = np.logical_or(
np.logical_and(min < 0, self.image > 0),
np.logical_and(max > 0, self.image < 0),
)
return self.__class__(zeroCross)
# modeopt = {
# 'full': 'full',
# 'valid': 'valid',
# 'same': 'same'
# }
# if optmode not in modeopt:
# raise ValueError(optmode, 'opt is not a valid option')
# boundaryopt = {
# 'fill': 'fill',
# 'wrap': 'wrap',
# 'reflect': 'symm'
# }
# if optboundary not in boundaryopt:
# raise ValueError(optboundary, 'opt is not a valid option')
# is_int = False
# if np.issubdtype(self.dtype, np.integer):
# is_int = True
# img = self.float()
# else:
# img = self
# # make the smoothing kernel
# K = Kernel.Gauss(sigma, h)
# if img.iscolor:
# # could replace this with a nested list comprehension
# ims = []
# for im in img:
# o = np.dstack([signal.convolve2d(np.squeeze(im.image[:, :, i]),
# K,
# mode=modeopt[optmode],
# boundary=boundaryopt[
# optboundary])
# for i in range(im.numchannels)])
# ims.append(o)
# elif not img.iscolor:
# ims = []
# for im in img:
# ims.append(signal.convolve2d(im.image,
# K,
# mode=modeopt[optmode],
# boundary=boundaryopt[
# optboundary]))
# else:
# raise ValueError(self.iscolor, 'bad value for iscolor')
# if is_int:
# return self.__class__(ims).int()
# else:
# return self.__class__(ims)
# def replicate(self, M=1):
# """
# Expand image
# :param M: number of times to replicate image
# :type M: integer
# :return out: Image expanded image
# :rtype out: :class:`~machinevisiontoolbox.ImageCore.Image`
# - ``IM.replicate(M)`` is an expanded version of the image (H,W) where
# each pixel is replicated into a (M,M) tile. If ``im`` is (H,W) the
# result is ((M*H),(M*W)) numpy array.
# Example:
# .. runblock:: pycon
# """
# out = []
# for im in self:
# if im.ndims > 2:
# # dealing with multiplane image
# # TODO replace with a list comprehension
# ir2 = []
# for i in range(im.numchannels):
# im1 = self.__class__(im.image[:, :, i])
# ir2 = np.append(im1.replicate(M))
# return ir2
# nr = im.shape[0]
# nc = im.shape[1]
# # replicate columns
# ir = np.zeros((M * nr, nc), dtype=im.dtype)
# for r in range(M):
# ir[r:-1:M, :] = im.image
# # replicate rows
# ir2 = np.zeros((M * nr, M * nc), dtype=im.dtype)
# for c in range(M):
# ir2[:, c:-1:M] = ir
# out.append(ir2)
# return self.__class__(out)
[docs]
def scalespace(self, n, sigma=1):
"""
Compute image scalespace sequence
:param n: number of steps
:type n: omt
:param sigma: Gaussian filter width, defaults to 1
:type sigma: scalar, optional
:return: Gaussian and difference of Gaussian sequences, scale factors
:rtype: list of :class:`~machinevisiontoolbox.ImageCore.Image`, list of :class:`~machinevisiontoolbox.ImageCore.Image`, list of float
Compute a scalespace image sequence by consecutively smoothing the input
image with a Gaussian of width ``sigma``. The difference between
consecutive smoothings is the difference of Gaussian which is an
approximation to the Laplacian of Gaussian.
Examples::
>>> mona = Image.Read("monalisa.png", dtype="float");
>>> G, L, scales = mona.scalespace(8, sigma=8);
:note: The two image sequences have the same length, the original image is
not included in the list of smoothed images.
:references:
- Robotics, Vision & Control for Python, Section 12.3.2, P. Corke, Springer 2023.
:seealso:
:meth:`pyramid`
:meth:`smooth`
:class:`~.Gauss`
:class:`~.LoG`
"""
im = self.copy()
g = [im]
scale = 0.5
scales = [scale]
lap = []
for i in range(n - 1):
im = im.smooth(sigma)
scale = np.sqrt(scale**2 + sigma**2)
scales.append(scale)
g.append(im)
x = (g[-1] - g[-2]) * scale**2
lap.append(x)
return g, lap, scales
[docs]
def pyramid(self, sigma=1, N=None, border="replicate", bordervalue=0):
"""
Pyramidal image decomposition
:param sigma: standard deviation of Gaussian kernel
:type sigma: float
:param N: number of pyramid levels to be computed, defaults to all
:type N: int, optional
:param border: option for boundary handling, see :meth:`convolve`, defaults to 'replicate'
:type border: str, optional
:param bordervalue: padding value, defaults to 0
:type bordervalue: scalar, optional
:return: list of images at each pyramid level
:rtype: list of :class:`~machinevisiontoolbox.ImageCore.Image`
Returns a pyramid decomposition of the input image using Gaussian
smoothing with standard deviation of ``sigma``. The return is a list
array of images each one having dimensions half that of the previous
image. The pyramid is computed down to a non-halvable image size.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Read('monalisa.png')
>>> pyramid = img.pyramid(4)
>>> len(pyramid)
>>> pyramid
:note:
- Works for greyscale images only.
- Converts a color image to greyscale.
:references:
- Robotics, Vision & Control for Python, Section 12.3.2, P. Corke, Springer 2023.
:seealso:
:meth:`smooth`
:meth:`scalespace`
"""
# check inputs, greyscale only
im = self.mono()
if not argcheck.isscalar(sigma):
raise ValueError(sigma, "sigma must be a scalar")
if N is None:
N = max(im.shape)
else:
if (not argcheck.isscalar(N)) and (N >= 0) and (N <= max(im.shape)):
raise ValueError(
N,
"N must be a scalar and 0 <= N <= max(im.shape)",
)
# TODO options to accept different border types,
# note that the Matlab implementation is hard-coded to 'same'
# return cv.buildPyramid(im, N, borderType=cv.BORDER_REPLICATE)
# Python version does not seem to be implemented
# list comprehension approach
# TODO pyr = [cv.pyrdown(inputs(i)) for i in range(N) if conditional]
impyr = im.image
pyr = [impyr]
for i in range(N):
if impyr.shape[0] == 1 or impyr.shape[1] == 1:
break
impyr = cv.pyrDown(
impyr, borderType=self._bordertype_cv(border, exclude="constant")
)
pyr.append(impyr)
# output list of Image objects
pyrimlist = [self.__class__(p) for p in pyr]
return pyrimlist
[docs]
def canny(self, sigma=1, th0=None, th1=None):
"""
Canny edge detection
:param sigma: standard deviation for Gaussian kernel smoothing, defaults to 1
:type sigma: float, optional
:param th0: lower threshold
:type th0: float
:param th1: upper threshold
:type th1: float
:return: edge image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image` instance
Computes an edge image obtained using the Canny edge detector algorithm.
Hysteresis filtering is applied to the gradient image: edge pixels >
``th1`` are connected to adjacent pixels > ``th0``, those below ``th0``
are set to zero.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Read('monalisa.png')
>>> edges = img.canny()
:note:
- Produces a zero image with single pixel wide edges having
non-zero values.
- Larger values correspond to stronger edges.
- If ``th1`` is zero then no hysteresis filtering is performed.
- A color image is automatically converted to greyscale first.
:references:
- "A Computational Approach To Edge Detection", J. Canny,
IEEE Trans. Pattern Analysis and Machine Intelligence,
8(6):679–698, 1986.
- Robotics, Vision & Control for Python, Section 11.5.1.3, P. Corke, Springer 2023.
"""
# convert to greyscale:
img = self.mono()
# set defaults (eg thresholds, eg one as a function of the other)
if th0 is None:
if np.issubdtype(th0, np.floating):
th0 = 0.1
else:
# isint
th0 = np.round(0.1 * np.iinfo(img.dtype).max)
if th1 is None:
th1 = 1.5 * th0
# compute gradients Ix, Iy using guassian kernel
dg = Kernel.DGauss(sigma)
sigma = 0.3333
Ix = self.convolve(dg)
Iy = self.convolve(np.transpose(dg))
# Ix, Iy must be 16-bit input image
Ix = np.array(Ix.A, dtype=np.int16)
Iy = np.array(Iy.A, dtype=np.int16)
v = np.mean(self.A)
# apply automatic Canny edge detection using the computed median
lower = max(0, (1.0 - sigma) * v)
upper = min(1, (1.0 + sigma) * v)
out = cv.Canny(self.to_int(), lower, upper, L2gradient=False)
return self.__class__(out)
[docs]
def rank(self, footprint=None, h=None, rank=-1, border="replicate", bordervalue=0):
r"""
Rank filter
:param footprint: filter footprint or structuring element
:type footprint: ndarray(N,M), optional
:param h: half width of structuring element
:type h: int, optional
:param rank: rank of filter
:type rank: int, str
:param border: option for boundary handling, defaults to 'replicate'
:type border: str, optional
:param bordervalue: padding value, defaults to 0
:type bordervalue: scalar, optional
:return: rank filtered image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`
Return a rank filtered version of image. Only pixels corresponding to
non-zero elements of the structuring element are ranked, and the value
that is ``rank`` in rank becomes the corresponding output pixel value.
The highest rank, the maximum, is rank 0. The rank can also be given
as a string: 'min|imumum', 'max|imum', 'med|ian', long or short versions
are supported.
The structuring element is given as:
- ``footprint`` a 2D Numpy array containing zero or one values, or
- ``h`` which is the half width :math:`w=2h+1` of an array of ones
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> import numpy as np
>>> img = Image(np.arange(25).reshape((5,5)))
>>> img.print()
>>> img.rank(h=1, rank=0).print() # maximum filter
>>> img.rank(h=1, rank=8).print() # minimum filter
>>> img.rank(h=1, rank=4).print() # median filter
>>> img.rank(h=1, rank='median').print() # median filter
:note:
- The footprint should have an odd side length.
- The input can be logical, uint8, uint16, float or double, the
output is always double.
:references:
- Robotics, Vision & Control for Python, Section 11.5.3, P. Corke, Springer 2023.
:seealso: :obj:`scipy.ndimage.rank_filter`
"""
if h is not None:
w = 2 * h + 1
footprint = np.ones((w, w))
n = np.sum(footprint)
if isinstance(rank, str):
if rank in ("min", "minimum"):
rank = n - 1
elif rank in ("max", "maximum"):
rank = 0
elif rank in ("med", "median"):
rank = n // 2
elif not isinstance(rank, int):
raise TypeError(rank, "rank must be int or str")
if rank < 0:
raise ValueError("rank must be >= 0")
r = int(footprint.sum() - rank - 1)
out = sp.ndimage.rank_filter(
self.A, r, footprint=footprint, mode=self._bordertype_sp(border)
)
return self.__class__(out)
# ======================= labels ============================= #
[docs]
def labels_binary(self, connectivity=4, ltype="int32"):
"""
Blob labelling
:param connectivity: number of neighbours used for connectivity: 4 [default] or 8
:type connectivity: int, optional
:param ltype: output image type: 'int32' [default], 'uint16'
:type ltype: string, optional
:return: label image, number of regions
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`, int
Compute labels of connected components in the input greyscale or binary
image. Regions are sets of contiguous pixels with the same value.
The method returns the label image and the number of labels N, so labels
lie in the range [0, N-1].The value in the label image in an integer
indicating which region the corresponding input pixel belongs to. The
background has label 0.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Squares(2, 15)
>>> img.print()
>>> labels, N = img.labels_binary()
>>> N
>>> labels.print()
:note:
- This algorithm is variously known as region labelling,
connectivity analysis, region coloring, connected component analysis,
blob labelling.
- The output image is the same size as the input image.
- The input image can be binary or greyscale.
- Connectivity is performed using 4 nearest neighbours by default.
- 8-way connectivity introduces ambiguities, a chequerboard is
two blobs.
:references:
- Robotics, Vision & Control for Python, Section 12.1.2.1, P. Corke, Springer 2023.
:seealso:
:meth:`blobs`
`cv2.connectedComponents <https://docs.opencv.org/master/d3/dc0/group__imgproc__shape.html#gaedef8c7340499ca391d459122e51bef5>`_
:meth:`labels_graphseg`
:meth:`labels_MSER`
"""
if not (connectivity in [4, 8]):
raise ValueError(conn, "connectivity must be 4 or 8")
# make labels uint32s, unique and never recycled?
# set ltype to default to cv.CV_32S
if ltype == "int32":
ltype = cv.CV_32S
dtype = np.int32
elif ltype == "uint16":
ltype = cv.CV_16U
dtype = np.uint16
else:
raise TypeError(ltype, "ltype must be either int32 or uint16")
retval, labels = cv.connectedComponents(
image=self.to_int(), connectivity=connectivity, ltype=ltype
)
return self.__class__(labels), retval
[docs]
def labels_MSER(self, **kwargs):
"""
Blob labelling using MSER
:param kwargs: arguments passed to ``MSER_create``
:return: label image, number of regions
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`, int
Compute labels of connected components in the input greyscale image.
Regions are sets of contiguous pixels that form stable regions across a
range of threshold values.
The method returns the label image and the number of labels N, so labels
lie in the range [0, N-1].The value in the label image in an integer
indicating which region the corresponding input pixel belongs to. The
background has label 0.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img = Image.Squares(2, 15)
>>> img.print()
>>> labels, N = img.labels_MSER()
>>> N
>>> labels.print()
:references:
- Linear time maximally stable extremal regions,
David Nistér and Henrik Stewénius,
In Computer Vision–ECCV 2008, pages 183–196. Springer, 2008.
- Robotics, Vision & Control for Python, Section 12.1.2.2, P. Corke, Springer 2023.
:seealso:
:meth:`labels_binary`
:meth:`labels_graphseg`
:meth:`blobs`
`opencv.MSER_create <https://docs.opencv.org/3.4/d3/d28/classcv_1_1MSER.html>`_
"""
mser = cv.MSER_create(**kwargs)
regions, _ = mser.detectRegions(self.to_int())
if len(regions) < 256:
dtype = np.uint8
else:
dtype = np.uint32
out = np.zeros(self.shape, dtype=dtype)
for i, points in enumerate(regions):
# print('region ', i, points.shape[0])
out[points[:, 1], points[:, 0]] = i
return self.__class__(out, dtype=dtype), len(regions)
[docs]
def labels_graphseg(self, sigma=0.5, k=2000, minsize=100):
"""
Blob labelling using graph-based segmentation
:param kwargs: arguments passed to ``MSER_create``
:return: label image, number of regions
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image`, int
Compute labels of connected components in the input color image. Regions
are sets of contiguous pixels that are similar with respect to their
surrounds.
The method returns the label image and the number of labels N, so labels
lie in the range [0, N-1].The value in the label image in an integer
indicating which region the corresponding input pixel belongs to. The
background has label 0.
:references:
- Efficient graph-based image segmentation,
Pedro F Felzenszwalb and Daniel P Huttenlocher,
volume 59, pages 167–181. Springer, 2004.
- Robotics, Vision & Control for Python, Section 12.1.2.2, P. Corke, Springer 2023.
:seealso:
:meth:`labels_binary`
:meth:`labels_MSER`
:meth:`blobs`
`opencv.createGraphSegmentation <https://docs.opencv.org/3.4/d5/df0/group__ximgproc__segmentation.html#ga5e3e721c5f16e34d3ad52b9eeb6d2860>`_
"""
# P. Felzenszwalb, D. Huttenlocher: "Graph-Based Image Segmentation
segmenter = cv.ximgproc.segmentation.createGraphSegmentation(
sigma=0.5, k=2000, min_size=100
)
out = segmenter.processImage(self.to_int())
return self.__class__(out), np.max(out) + 1
# -------------------- similarity operations -------------------------- #
[docs]
def sad(image1, image2): # lgtm[py/not-named-self] pylint: disable=no-self-argument
"""
Sum of absolute differences
:param image2: second image
:type image2: :class:`~machinevisiontoolbox.ImageCore.Image`
:raises ValueError: image2 shape is not equal to self
:return: sum of absolute differences
:rtype: scalar
Returns a simple image disimilarity measure which is the sum of absolute
differences between the image and ``image2``. The result is a scalar
and a value of 0 indicates identical pixel patterns and is increasingly
positive as image dissimilarity increases.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img1 = Image([[10, 11], [12, 13]])
>>> img2 = Image([[10, 11], [10, 13]])
>>> img1.sad(img2)
>>> img1.sad(img2+10)
>>> img1.sad(img2*2)
:note: Not invariant to pixel value scale or offset.
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso:
:meth:`zsad`
:meth:`ssd`
:meth:`ncc`
"""
if not np.all(image1.shape == image2.shape):
raise ValueError("image2 shape is not equal to image1")
# out = []
# for im in self:
# m = np.abs(im.image - image2.image)
# out.append(np.sum(m))
m = np.abs(image1.image - image2.image)
out = np.sum(m)
return out
[docs]
def ssd(image1, image2): # lgtm[py/not-named-self] pylint: disable=no-self-argument
"""
Sum of squared differences
:param image2: second image
:type image2: :class:`~machinevisiontoolbox.ImageCore.Image`
:raises ValueError: image2 shape is not equal to self
:return: sum of squared differences
:rtype: scalar
Returns a simple image disimilarity measure which is the sum of the
squared differences between the image and ``image2``. The result is a
scalar and a value of 0 indicates identical pixel patterns and is
increasingly positive as image dissimilarity increases.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img1 = Image([[10, 11], [12, 13]])
>>> img2 = Image([[10, 11], [10, 13]])
>>> img1.ssd(img2)
>>> img1.ssd(img2+10)
>>> img1.ssd(img2*2)
:note: Not invariant to pixel value scale or offset.
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso:
:meth:`zssd`
:meth:`sad`
:meth:`ncc`
"""
if not np.all(image1.shape == image2.shape):
raise ValueError("image2 shape is not equal to image1")
m = np.power((image1.image - image2.image), 2)
return np.sum(m)
[docs]
def ncc(image1, image2): # lgtm[py/not-named-self] pylint: disable=no-self-argument
"""
Normalised cross correlation
:param image2: second image
:type image2: :class:`~machinevisiontoolbox.ImageCore.Image`
:raises ValueError: image2 shape is not equal to self
:return: normalised cross correlation
:rtype: scalar
Returns an image similarity measure which is the normalized
cross-correlation between the image and ``image2``. The result is a
scalar in the interval -1 (non match) to 1 (perfect match) that
indicates similarity.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img1 = Image([[10, 11], [12, 13]])
>>> img2 = Image([[10, 11], [10, 13]])
>>> img1.ncc(img2)
>>> img1.ncc(img2+10)
>>> img1.ncc(img2*2)
:note:
- The ``ncc`` similarity measure is invariant to scale changes in
image intensity.
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso:
:meth:`zncc`
:meth:`sad`
:meth:`ssd`
"""
if not np.all(image1.shape == image2.shape):
raise ValueError("image2 shape is not equal to image1")
denom = np.sqrt(np.sum(image1.image**2) * np.sum(image2.image**2))
if denom < 1e-10:
return 0
else:
return np.sum(image1.image * image2.image) / denom
[docs]
def zsad(
image1, image2
): # lgtm[py/not-named-self] pylint: disable=no-self-argument
"""
Zero-mean sum of absolute differences
:param image2: second image
:type image2: :class:`~machinevisiontoolbox.ImageCore.Image`
:raises ValueError: image2 shape is not equal to self
:return: sum of absolute differences
:rtype: scalar
Returns a simple image disimilarity measure which is the zero-mean sum
of absolute differences between the image and ``image2``. The result
is a scalar and a value of 0 indicates identical pixel patterns
(relative to their mean values) and is increasingly positive as image
dissimilarity increases.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img1 = Image([[10, 11], [12, 13]])
>>> img2 = Image([[10, 11], [10, 13]])
>>> img1.zsad(img2)
>>> img1.zsad(img2+10)
>>> img1.zsad(img2*2)
:note:
- The ``zsad`` similarity measure is invariant to changes in image
brightness offset.
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso:
:meth:`zsad`
:meth:`ssd`
:meth:`ncc`
"""
if not np.all(image1.shape == image2.shape):
raise ValueError("image2 shape is not equal to image1")
image1 = image1.image - np.mean(image1.image)
image2 = image2.image - np.mean(image2.image)
m = np.abs(image1 - image2)
return np.sum(m)
[docs]
def zssd(
image1, image2
): # lgtm[py/not-named-self] pylint: disable=no-self-argument
"""
Zero-mean sum of squared differences
:param image2: second image
:type image2: :class:`~machinevisiontoolbox.ImageCore.Image`
:raises ValueError: image2 shape is not equal to self
:return: sum of squared differences
:rtype: scalar
Returns a simple image disimilarity measure which is the zero-mean sum of the
squared differences between the image and ``image2``. The result is a
scalar and a value of 0 indicates identical pixel patterns (relative to their maen) and is
increasingly positive as image dissimilarity increases.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img1 = Image([[10, 11], [12, 13]])
>>> img2 = Image([[10, 11], [10, 13]])
>>> img1.zssd(img2)
>>> img1.zssd(img2+10)
>>> img1.zssd(img2*2)
:note:
- The ``zssd`` similarity measure is invariant to changes in image
brightness offset.
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso:
:meth:`zssd`
:meth:`sad`
:meth:`ncc`
"""
if not np.all(image1.shape == image2.shape):
raise ValueError("image2 shape is not equal to image1")
image1 = image1.image - np.mean(image1.image)
image2 = image2.image - np.mean(image2.image)
m = np.power(image1 - image2, 2)
return np.sum(m)
[docs]
def zncc(
image1, image2
): # lgtm[py/not-named-self] pylint: disable=no-self-argument
"""
Zero-mean normalized cross correlation
:param image2: second image
:type image2: :class:`~machinevisiontoolbox.ImageCore.Image`
:raises ValueError: image2 shape is not equal to self
:return: normalised cross correlation
:rtype: scalar
Returns an image similarity measure which is the zero-mean normalized
cross-correlation between the image and ``image2``. The result is a
scalar in the interval -1 (non match) to 1 (perfect match) that
indicates similarity.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> img1 = Image([[10, 11], [12, 13]])
>>> img2 = Image([[10, 11], [10, 13]])
>>> img1.zncc(img2)
>>> img1.zncc(img2+10)
>>> img1.zncc(img2*2)
:note:
- The ``zncc`` similarity measure is invariant to affine changes (offset and scale factor)
in image intensity (brightness offset and scale).
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso:
:meth:`zncc`
:meth:`sad`
:meth:`ssd`
"""
if not np.all(image1.shape == image2.shape):
raise ValueError("image2 shape is not equal to image1")
image1 = image1.image - np.mean(image1.image)
image2 = image2.image - np.mean(image2.image)
denom = np.sqrt(np.sum(np.power(image1, 2) * np.sum(np.power(image2, 2))))
if denom < 1e-10:
return 0
else:
return np.sum(image1 * image2) / denom
[docs]
def similarity(self, T, metric="zncc"):
"""
Locate template in image
:param T: template image
:type T: ndarray(N,M)
:param metric: similarity metric, one of: 'ssd', 'zssd', 'ncc', 'zncc' [default]
:type metric: str
:raises ValueError: template T must have odd dimensions
:raises ValueError: bad metric specified
:return: similarity image
:rtype: :class:`~machinevisiontoolbox.ImageCore.Image` instance
Compute a similarity image where each output pixel is the similarity of
the template ``T`` to the same-sized neighbourhood surrounding the
corresonding input pixel in image.
Example:
.. runblock:: pycon
>>> from machinevisiontoolbox import Image
>>> crowd = Image.Read("wheres-wally.png", mono=True, dtype="float")
>>> T = Image.Read("wally.png", mono=True, dtype="float")
>>> sim = crowd.similarity(T, "zncc")
>>> sim.disp(colormap="signed", colorbar=True);
:note:
- For NCC and ZNCC the maximum similarity value corresponds to the most likely
template location. For SSD and ZSSD the minimum value
corresponds to the most likely location.
- Similarity is not computed for those pixels where the template
crosses the image boundary, and these output pixels are set
to NaN.
:references:
- Robotics, Vision & Control for Python, Section 11.5.2, P. Corke, Springer 2023.
:seealso: `cv2.matchTemplate <https://docs.opencv.org/master/df/dfb/group__imgproc__object.html#ga586ebfb0a7fb604b35a23d85391329be>`_
"""
# check inputs
if ((T.shape[0] % 2) == 0) or ((T.shape[1] % 2) == 0):
raise ValueError("template T must have odd dimensions")
metricdict = {
"ssd": cv.TM_SQDIFF,
"zssd": cv.TM_SQDIFF,
"ncc": cv.TM_CCOEFF_NORMED,
"zncc": cv.TM_CCOEFF_NORMED,
}
im = self.A
T_im = T.A
if metric[0] == "z":
T_im -= np.mean(T_im) # remove offset from template
im = im - np.mean(im) # remove offset from image
try:
out = cv.matchTemplate(im, T_im, method=metricdict[metric])
except KeyError:
raise ValueError("bad metric specified")
return self.__class__(out)
# --------------------------------------------------------------------------#
if __name__ == "__main__":
from machinevisiontoolbox import *
K = Kernel.Gauss(h=3, sigma=2)
print(K)
K.print()
print(K.shape)
im = Image.Read("monalisa.png", grey=True)
im.convolve(K).disp()
im.gradients(K)[0].disp(block=True)
Kd = Kernel.DGauss(sigma=2)
print(Kd)
# img = Image(np.array(np.tile(np.r_[-2, -1, 1, 2, 3], (4, 1))), dtype="float")
# img.zerocross().A
# print("ImageProcessingKernel.py")
# from machinevisiontoolbox import *
# print(Kernel.Circle([2, 3]))
# image = Image.Read("monalisa.png", grey=True)
# blur = image.convolve(Kernel.Gauss(5))
# blur.disp(block=True)