Source code for cars.applications.sparse_matching.sparse_matching_algo

#!/usr/bin/env python
# coding: utf8
#
# Copyright (c) 2020 Centre National d'Etudes Spatiales (CNES).
#
# This file is part of CARS
# (see https://github.com/CNES/cars).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=too-many-lines

"""
Sparse matching Sift module:
contains sift sparse matching method
"""

# Standard imports
from __future__ import absolute_import

# Third party imports
import numpy as np
from vlsift.sift.sift import sift

# CARS imports
from cars.applications.sparse_matching.sparse_matching_wrappers import (
    euclidean_matrix_distance,
)
from cars.core import constants as cst



[docs]
def compute_matches(  # pylint: disable=too-many-positional-arguments
    left: np.ndarray,
    right: np.ndarray,
    left_mask: np.ndarray = None,
    right_mask: np.ndarray = None,
    left_origin: [float, float] = None,
    right_origin: [float, float] = None,
    matching_threshold: float = 0.7,
    n_octave: int = 8,
    n_scale_per_octave: int = 3,
    peak_threshold: float = 4.0,
    edge_threshold: float = 10.0,
    magnification: float = 7.0,
    window_size: int = 2,
    backmatching: bool = True,
    disp_lower_bound=None,
    disp_upper_bound=None,
):
    """
    Compute matches between left and right
    Convention for masks: True is a valid pixel

    :param left: left image as numpy array
    :type left: np.ndarray
    :param right: right image as numpy array
    :type right: np.ndarray
    :param left_mask: left mask as numpy array
    :type left_mask: np.ndarray
    :param right_mask: right mask as numpy array
    :type right_mask: np.ndarray
    :param left_origin: left image origin in the full image
    :type left_origin: [float, float]
    :param right_origin: right image origin in the full image
    :type right_origin: [float, float]
    :param matching_threshold: threshold for the ratio to nearest second match
    :type matching_threshold: float
    :param n_octave: the number of octaves of the DoG scale space
    :type n_octave: int
    :param n_scale_per_octave: the nb of levels / octave of the DoG scale space
    :type n_scale_per_octave: int
    :param peak_threshold: the peak selection threshold
    :type peak_threshold: float
    :param edge_threshold: the edge selection threshold
    :type edge_threshold: float
    :param magnification: set the descriptor magnification factor
    :type magnification: float
    :param window_size: size of the window
    :type window_size: int
    :param backmatching: also check that right vs. left gives same match
    :type backmatching: bool

    :return: matches
    :rtype: numpy buffer of shape (nb_matches,4)

    """
    left_origin = [0, 0] if left_origin is None else left_origin
    right_origin = [0, 0] if right_origin is None else right_origin

    # compute keypoints + descriptors
    left_frames, left_descr = sift(
        left,
        n_octaves=n_octave,
        n_levels=n_scale_per_octave,
        first_octave=-1,
        peak_thresh=peak_threshold,
        edge_thresh=edge_threshold,
        magnification=magnification,
        window_size=window_size,
        float_descriptors=True,
        compute_descriptor=True,
        verbose=False,
    )

    right_frames, right_descr = sift(
        right,
        n_octaves=n_octave,
        n_levels=n_scale_per_octave,
        first_octave=-1,
        peak_thresh=peak_threshold,
        edge_thresh=edge_threshold,
        magnification=magnification,
        window_size=window_size,
        float_descriptors=True,
        compute_descriptor=True,
        verbose=False,
    )

    # Filter keypoints that falls out of the validity mask (0=valid)
    if left_mask is not None:
        pixel_indices = np.floor(left_frames[:, 0:2]).astype(int)
        valid_left_frames_mask = left_mask[
            pixel_indices[:, 0], pixel_indices[:, 1]
        ]
        left_frames = left_frames[valid_left_frames_mask]
        left_descr = left_descr[valid_left_frames_mask]

    if right_mask is not None:
        pixel_indices = np.floor(right_frames[:, 0:2]).astype(int)
        valid_right_frames_mask = right_mask[
            pixel_indices[:, 0], pixel_indices[:, 1]
        ]
        right_frames = right_frames[valid_right_frames_mask]
        right_descr = right_descr[valid_right_frames_mask]

    # Early return for empty frames
    # also if there are points to match
    # need minimum two right points to find the second nearest neighbor
    # (and two left points for backmatching)
    if left_frames.shape[0] < 2 or right_frames.shape[0] < 2:
        return np.empty((0, 4))

    # translate matches according image origin
    # revert origin due to frame convention: [Y, X, S, TH] X: 1, Y: 0)
    left_frames[:, 0:2] += left_origin[::-1]
    right_frames[:, 0:2] += right_origin[::-1]

    # sort frames (and descriptors) along X axis
    order = np.argsort(left_frames[:, 1])
    left_frames = left_frames[order]
    left_descr = left_descr[order]

    order = np.argsort(right_frames[:, 1])
    right_frames = right_frames[order]
    right_descr = right_descr[order]

    # compute best matches by blocks
    splits = np.arange(500, len(left_frames), 500)
    left_frames_splitted = np.split(left_frames, splits)
    left_descr_splitted = np.split(left_descr, splits)
    splits = np.insert(splits, 0, 0)

    matches_id = []

    for (
        left_id_offset,
        left_frames_block,
        left_descr_block,
    ) in zip(  # noqa: B905
        splits, left_frames_splitted, left_descr_splitted
    ):
        if disp_lower_bound is not None and disp_upper_bound is not None:
            # Find right block extremas
            right_x_min = np.min(left_frames_block[:, 1]) + disp_lower_bound
            right_x_max = np.max(left_frames_block[:, 1]) + disp_upper_bound
            if (
                np.max(right_frames[:, 1]) > right_x_min
                and np.min(right_frames[:, 1]) < right_x_max
            ):
                left_id = np.min(np.where(right_frames[:, 1] > right_x_min))
                right_id = np.max(np.where(right_frames[:, 1] < right_x_max))
                right_descr_block = right_descr[left_id:right_id]
                right_id_offset = left_id
            else:
                right_descr_block = []
                right_id_offset = 0
        else:
            right_descr_block = right_descr
            right_id_offset = 0

        if len(left_descr_block) >= 2 and len(right_descr_block) >= 2:
            # compute euclidean matrix distance
            emd = euclidean_matrix_distance(left_descr_block, right_descr_block)

            # Check is columns/rows are all nan
            valid_rows = np.sum(~np.isnan(emd), axis=1) >= 2
            valid_cols = np.sum(~np.isnan(emd), axis=0) >= 2
            if not np.any(valid_rows) or not np.any(valid_cols):
                continue

            # filter invalid rows/cols
            emd_filtered = emd[np.ix_(valid_rows, valid_cols)]
            row_indices = np.where(valid_rows)[0]
            col_indices = np.where(valid_cols)[0]
            # Robustify for second element
            if len(row_indices) < 2 or len(col_indices) < 2:
                continue

            nearest_cols = col_indices[np.nanargmin(emd_filtered, axis=1)]
            nearest_rows = row_indices[np.nanargmin(emd_filtered, axis=0)]

            # get nearest sift (regarding descriptors)
            id_nearest_dlr = (row_indices, nearest_cols)
            id_nearest_drl = (nearest_rows, col_indices)

            # get absolute distances
            dist_dlr = emd[id_nearest_dlr]
            dist_drl = emd[id_nearest_drl]

            # get relative distance (ratio to second nearest distance)
            second_dist_dlr = np.partition(emd_filtered, 1, axis=1)[:, 1]
            second_dist_drl = np.partition(emd_filtered, 1, axis=0)[1, :]

            # Get valid second distances
            valid_second_dlr = ~np.isnan(second_dist_dlr)
            valid_second_drl = ~np.isnan(second_dist_drl)

            # Compute ratios only for valid ones
            dist_dlr_ratio = np.full_like(dist_dlr, np.nan)
            dist_drl_ratio = np.full_like(dist_drl, np.nan)
            dist_dlr_ratio[valid_second_dlr] = (
                dist_dlr[valid_second_dlr] / second_dist_dlr[valid_second_dlr]
            )
            dist_drl_ratio[valid_second_drl] = (
                dist_drl[valid_second_drl] / second_dist_drl[valid_second_drl]
            )

            # stack matches which its distance
            id_matches_dlr = np.column_stack((*id_nearest_dlr, dist_dlr_ratio))
            id_matches_drl = np.column_stack((*id_nearest_drl, dist_drl_ratio))

            # filter matches with nan ratio
            id_matches_dlr = id_matches_dlr[~np.isnan(id_matches_dlr[:, 2])]
            id_matches_drl = id_matches_drl[~np.isnan(id_matches_drl[:, 2])]

            # check backmatching
            if backmatching is True:
                back = (
                    id_matches_dlr[:, 0]
                    == id_matches_drl[id_matches_dlr[:, 1].astype(int)][:, 0]
                )
                id_matches_dlr = id_matches_dlr[back]

            # threshold matches
            id_matches_dlr = id_matches_dlr[
                id_matches_dlr[:, -1] < matching_threshold, :
            ][:, :-1]

            id_matches_dlr += (left_id_offset, right_id_offset)

            matches_id.append(id_matches_dlr)

    if matches_id:
        matches_id = np.concatenate(matches_id)
    else:
        matches_id = np.empty((0, 4))

    # retrieve points: [Y, X, S, TH] X: 1, Y: 0
    # fyi: ``S`` is the scale and ``TH`` is the orientation (in radians)
    left_points = left_frames[matches_id[:, 0].astype(int), 1::-1]
    right_points = right_frames[matches_id[:, 1].astype(int), 1::-1]
    matches = np.concatenate((left_points, right_points), axis=1)
    return matches




[docs]
def dataset_matching(  # pylint: disable=too-many-positional-arguments
    ds1,
    ds2,
    used_band,
    matching_threshold=0.7,
    n_octave=8,
    n_scale_per_octave=3,
    peak_threshold=4.0,
    edge_threshold=10.0,
    magnification=7.0,
    window_size=2,
    backmatching=True,
    disp_lower_bound=None,
    disp_upper_bound=None,
    classif_bands_to_mask=None,
):
    """
    Compute sift matches between two datasets
    produced by stereo.epipolar_rectify_images

    :param ds1: Left image dataset
    :type ds1: xarray.Dataset as produced by stereo.epipolar_rectify_images
    :param ds2: Right image dataset
    :type ds2: xarray.Dataset as produced by stereo.epipolar_rectify_images
    :param matching_threshold: threshold for the ratio to nearest second match
    :type matching_threshold: float
    :param n_octave: the number of octaves of the DoG scale space
    :type n_octave: int
    :param n_scale_per_octave: the nb of levels / octave of the DoG scale space
    :type n_scale_per_octave: int
    :param peak_threshold: the peak selection threshold
    :type peak_threshold: int
    :param edge_threshold: the edge selection threshold.
    :param magnification: set the descriptor magnification factor
    :type magnification: float
    :param window_size: size of the window
    :type window_size: int
    :param backmatching: also check that right vs. left gives same match
    :type backmatching: bool
    :param classif_bands_to_mask: bands from classif to mask
    :type classif_bands_to_mask: list of str / int

    :return: matches
    :rtype: numpy buffer of shape (nb_matches,4)
    """
    # get input data from dataset
    origin1 = [float(ds1.attrs["region"][0]), float(ds1.attrs["region"][1])]
    origin2 = [float(ds2.attrs["region"][0]), float(ds2.attrs["region"][1])]

    left = ds1.im.loc[used_band].values
    right = ds2.im.loc[used_band].values
    # Generate validity masks
    left_mask = ds1.msk.loc[used_band].values == 0
    right_mask = ds2.msk.loc[used_band].values == 0

    # Update validity masks: all classes (used in filling) in
    # classification should be 0
    if cst.EPI_CLASSIFICATION in ds1 and classif_bands_to_mask not in (
        None,
        [],
    ):
        classif_values = (
            ds1[cst.EPI_CLASSIFICATION]
            .sel(band_classif=classif_bands_to_mask)
            .values
        )
        left_mask = np.logical_and(
            left_mask, ~np.any(classif_values > 0, axis=0)
        )

    if cst.EPI_CLASSIFICATION in ds2 and classif_bands_to_mask not in (
        None,
        [],
    ):
        classif_values = (
            ds2[cst.EPI_CLASSIFICATION]
            .sel(band_classif=classif_bands_to_mask)
            .values
        )
        right_mask = np.logical_and(
            right_mask, ~np.any(classif_values > 0, axis=0)
        )

    matches = compute_matches(
        left,
        right,
        left_mask=left_mask,
        right_mask=right_mask,
        left_origin=origin1,
        right_origin=origin2,
        matching_threshold=matching_threshold,
        n_octave=n_octave,
        n_scale_per_octave=n_scale_per_octave,
        peak_threshold=peak_threshold,
        edge_threshold=edge_threshold,
        magnification=magnification,
        window_size=window_size,
        backmatching=backmatching,
        disp_lower_bound=disp_lower_bound,
        disp_upper_bound=disp_upper_bound,
    )

    return matches