Source code for machinevisiontoolbox.ImageMultiview

#!/usr/bin/env python
import numpy as np
from spatialmath.base import argcheck, getvector, e2h, h2e, transl2
import cv2 as cv

[docs]class ImageMultiviewMixin: # ======================= stereo ================================== #
[docs] def stereo_simple(self, right, hw, drange): """ Simple stereo matching :param right: right image :type right: :class:`Image` :param hw: window half width :type hw: int :param drange: disparity range :type drange: array_like(2) :return: disparity image, similarity image, disparity space image :rtype: :class:`Image`, :class:`Image`, ndarray(H,W,D) This is a simple stereo matching implementation for pedagogical purposes. It returns: - the disparity image, same size as input images, whose elements give the integer disparity (in pixels) of the corresponding point in the left image. - the similarity image, same size as input images, whose elements give the strength of the stereo match. This is a ZNCC measure where 1 is a perfect match, greater than 0.8 is a decent match. - the disparity space image, a 3D array, whose first two dimensions are the same size as the input imagesm and whose third dimension is the number of disparities. Each fibre DSI[v,u,:] is window similarity versus disparity. Example:: >>> rocks_l = Image.Read("rocks2-l.png", reduce=2) >>> rocks_r = Image.Read("rocks2-r.png", reduce=2) >>> disparity, similarity, DSI = rocks_l.stereo_simple(rocks_r, hw=3, drange=[40, 90]) .. note:: The images are assumed to be epipolar aligned. :references: - Robotics, Vision & Control for Python, Section 14.4, P. Corke, Springer 2023. .. warning:: Not fast. :seealso: :meth:`DSI_refine` :meth:`stereo_BM` :meth:`stereo_SGBM` """ def window_stack(image, hw): # convert an image to a stack where the planes represent shifted # version of image. For a pixel at (u, v) ... stack = [] w = 2 * hw + 1 w1 = w - 1 for upad in range(w): for vpad in range(w): stack.append( np.pad(image, ((vpad, w1 - vpad), (upad, w1- upad)), mode='constant', constant_values=np.nan) ) return np.dstack(stack) if isinstance(drange, int): drange = (0, drange) # left = self.mono().image.astype(np.float32) # right = right.mono().image.astype(np.float32) left = self.mono().image right = right.mono().image # convert to window stacks left = window_stack(left, hw) right = window_stack(right, hw) # offset the mean value of each template left = left - left.mean(axis=2)[..., np.newaxis] right = right - right.mean(axis=2)[..., np.newaxis] # idisp(np.sum(left ** 2, axis=2)) # idisp(np.sum(right ** 2, axis=2)) # shift right image to the right right = right[:, :-drange[0], :] right = np.pad(right, ((0, 0), (drange[0], 0), (0, 0)), mode='constant', constant_values=np.nan) similarities = [] # suppress divide by zero error messages # possible ZNCC values include: # - NaN 0 / 0 invalid value encountered in true_divide # - inf x / 0 divide by zero encountered in true_divide with np.errstate(divide='ignore', invalid='ignore'): for d in np.arange(drange[1] - drange[0]): # lgtm[py/unused-loop-variable] # compute the ZNCC sumLL = np.sum(left ** 2, axis=2) sumRR = np.sum(right ** 2, axis=2) sumLR = np.sum(left * right, axis=2) denom = np.sqrt(sumLL * sumRR) # if (denom == 0).sum() > 0: # print('divide by zero in ZNCC') similarity = sumLR / denom similarity = np.where(denom==0, np.nan, similarity) similarities.append(similarity) # shift right image 1 pixel to the right right = right[:, :-1, :] right = np.pad(right, ((0, 0), (1, 0), (0, 0)), mode='constant', constant_values=np.nan) # stack the similarity images at each disparity into the 3D DSI dsi = np.dstack(similarities) # disparity is the index of the maxima in the disparity direction disparity = np.argmax(dsi, axis=2).astype(np.float32) + drange[0] # maxima is the maximum similarity in the disparity direction maxima = np.max(dsi, axis=2) # whereever maxima is nan set disparity to nan, similarity will be # done for border regions disparity = np.where(np.isnan(maxima), np.nan, disparity) disparity[:, :drange[0]] = np.nan return self.__class__(disparity, dtype=np.float32), \ self.__class__(maxima), \ dsi
[docs] @classmethod def DSI_refine(cls, DSI, drange=None): """ Refine disparity from disparity space image :param DSI: disparity space image :type DSI: ndarray(H,W,D) :param drange: disparity range, defaults to span of DSI values :type drange: array_like(2), optional :return: refined disparity image :rtype: :class:`Image` Performs subpixel interpolation on the peaks in the DSI to provide disparity estimates to a fraction of a pixel. Example:: >>> rocks_l = Image.Read("rocks2-l.png", reduce=2) >>> rocks_r = Image.Read("rocks2-r.png", reduce=2) >>> disparity, similarity, DSI = rocks_l.stereo_simple(rocks_r, hw=3, drange=[40, 90]) >>> disparity = Image.DSI_refine(DSI) :references: - Robotics, Vision & Control for Python, Section 14.4.1, P. Corke, Springer 2023. :seealso: :meth:`stereo_simple` """ DSI_flat = DSI.reshape((-1,DSI.shape[2])) YP = [] Y = [] YN = [] if drange is None: disparity = np.argmax(DSI, axis=2) drange = [disparity.min(), disparity.max()] for i, d in enumerate(np.argmax(DSI, axis=2).ravel()): if drange[0] < d < drange[1]: YP.append(DSI_flat[i, d-1]) Y.append(DSI_flat[i, d]) YN.append(DSI_flat[i, d+1]) else: YP.append(np.nan) Y.append(np.nan) YN.append(np.nan) YP= np.array(YP).reshape(DSI.shape[:2]) Y = np.array(Y).reshape(DSI.shape[:2]) YN = np.array(YN).reshape(DSI.shape[:2]) A = YP + YN - 2 * Y B = YN - YP d_subpix = disparity - B / (2 * A) return cls(d_subpix), cls(A)
[docs] def stereo_BM(self, right, hw, drange, speckle=None): """ Stereo block matching :param right: right image :type right: :class:`Image` :param hw: window half width :type hw: int :param drange: disparity range :type drange: array_like(2) :param speckle: speckle filter parameters, defaults to None :type speckle: array_like(2), optional :raises ValueError: block size too small :return: disparity image :rtype: :class:`Image` This is an efficient block-matching stereo implementation. It returns the disparity image, same size as input images, whose elements give the subpixel-interpolated disparity (in pixels) of the corresponding point in the left image. Speckle are small regions of anomalous disparity. A speckle is defined as less than A pixels with disparity variation less than V, and the filter parameters are (A, V). The disparity values within a detected speckle are set to that of its enclosing region. Example:: >>> rocks_l = Image.Read("rocks2-l.png", reduce=2) >>> rocks_r = Image.Read("rocks2-r.png", reduce=2) >>> disparity = rocks_l.stereo_BM(rocks_r, hw=3, drange=[40, 90], speckle=(200, 2)) .. note:: The images are assumed to be epipolar aligned. :references: - Robotics, Vision & Control for Python, Section 14.4.2.7, P. Corke, Springer 2023. :seealso: :meth:`stereo_SGBM` :meth:`stereo_simple` `opencv.StereoBM <https://docs.opencv.org/3.4/d9/dba/classcv_1_1StereoBM.html>`_ """ # https://docs.opencv.org/master/d9/dba/classcv_1_1StereoBM.html if isinstance(drange, int): drange = (0, drange) if hw < 2: raise ValueError('block size too small') # number of disparities must be multiple of 16 ndisparities = drange[1] - drange[0] ndisparities = int(np.ceil(ndisparities // 16) * 16) # create the stereo matcher stereo = cv.StereoBM_create( numDisparities=ndisparities, blockSize=2*hw+1) stereo.setMinDisparity(drange[0]) left = self.mono().image.astype(np.uint8) right = right.mono().image.astype(np.uint8) # set speckle filter # it seems to make very little difference # it's not clear if range is in the int16 units or not if speckle is None: speckle = (0, 0) stereo.setSpeckleWindowSize(speckle[0]) stereo.setSpeckleRange(int(16 * speckle[1])) disparity = stereo.compute( left=left, right=right) return self.__class__(disparity / 16.0)
[docs] def stereo_SGBM(self, right, hw, drange, speckle=None): """ Stereo semi-global block matching :param right: right image :type right: :class:`Image` :param hw: window half width :type hw: int :param drange: disparity range :type drange: array_like(2) :param speckle: speckle filter parameters, defaults to None :type speckle: array_like(2), optional :raises ValueError: block size too small :return: disparity image :rtype: :class:`Image` This is an efficient semi-global block-matching stereo implementation. It returns the disparity image, same size as input images, whose elements give the subpixel-interpolated disparity (in pixels) of the corresponding point in the left image. Speckle are small regions of anomalous disparity. A speckle is defined as less than A pixels with disparity variation less than V, and the filter parameters are (A, V). The disparity values within a detected speckle are set to that of its enclosing region. Example:: >>> rocks_l = Image.Read("rocks2-l.png", reduce=2) >>> rocks_r = Image.Read("rocks2-r.png", reduce=2) >>> disparity = rocks_l.stereo_SGBM(rocks_r, hw=3, drange=[40, 90], speckle=(200, 2)) .. note:: The images are assumed to be epipolar aligned. :references: - Stereo processing by semiglobal matching and mutual information, Heiko Hirschmuller, IEEE Transactions on Pattern Analysis and Machine Intelligence, 30(2):328–341, 2008. - Robotics, Vision & Control for Python, Section 14.4.2.7, P. Corke, Springer 2023. :seealso: :meth:`stereo_SGBM` :meth:`stereo_simple` `opencv.StereoSGBM <https://docs.opencv.org/3.4/d2/d85/classcv_1_1StereoSGBM.html>`_ """ # https://docs.opencv.org/master/d2/d85/classcv_1_1StereoSGBM.html#details if isinstance(drange, int): drange = (0, drange) if hw < 2: raise ValueError('block size too small') # number of disparities must be multiple of 16 ndisparities = drange[1] - drange[0] ndisparities = int(np.ceil(ndisparities // 16) * 16) # create the stereo matcher stereo = cv.StereoSGBM_create( minDisparity=drange[0], numDisparities=ndisparities, blockSize=2*hw+1) left = self.mono().image.astype(np.uint8) right = right.mono().image.astype(np.uint8) # set speckle filter # it seems to make very little difference # it's not clear if range is in the int16 units or not if speckle is not None: stereo.setSpeckleWindowSize(speckle[0]) stereo.setSpeckleRange(speckle[1]) disparity = stereo.compute( left=left, right=right) return self.__class__(disparity / 16.0)
# def line(self, start, end, color): # should be draw_line # return self.__class__(cv.line(self.image, start, end, color))
[docs] def rectify_homographies(self, m, F): """ Create rectification homographies :param m: corresponding points :type m: :class:`~machinevisiontoolbox.ImagePointFeatures.FeatureMatch` :param F: fundamental matrix :type F: ndarray(3,3) :return: rectification homographies :rtype: ndarray(3,3), ndarray(3,3) Given the epipolar geometry between two images, defined by the fundamental matrix and corresponding points, compute a pair of homographies that can be used to rectify the images so that they are epipolar aligned. Examples:: >>> walls_l = Image.Read('walls-l.png', reduce=2) >>> walls_r = Image.Read('walls-r.png', reduce=2) >>> sf_l = walls_l.SIFT() >>> sf_r = walls_r.SIFT() >>> matches = sf_l.match(sf_r); >>> F, resid = matches.estimate(CentralCamera.points2F, method="ransac", confidence=0.95); >>> H_l, H_r = walls_l.rectify_homographies(matches, F) >>> walls_l_rect = walls_l.warp_perspective(H_l) >>> walls_r_rect = walls_r.warp_perspective(H_r) :references: - Robotics, Vision & Control for Python, Section 14.4.3, P. Corke, Springer 2023. :seealso: :meth:`warp_perspective` :class:`Match` `opencv.stereoRectifyUncalibrated <https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html#gaadc5b14471ddc004939471339294f052>`_ """ retval, H1, H2 = cv.stereoRectifyUncalibrated(m.inliers.p1, m.inliers.p2, F, self.size) return H1, H2