Source code for podpac.core.interpolation.nearest_neighbor_interpolator

"""
Interpolator implementations
"""

from __future__ import division, unicode_literals, print_function, absolute_import
from six import string_types

import numpy as np
import xarray as xr
import traitlets as tl
from scipy.spatial import cKDTree

# Optional dependencies


# podac imports
from podpac.core.interpolation.interpolator import COMMON_INTERPOLATOR_DOCS, Interpolator, InterpolatorException
from podpac.core.coordinates import Coordinates, UniformCoordinates1d, StackedCoordinates
from podpac.core.coordinates.utils import make_coord_delta, make_coord_value
from podpac.core.utils import common_doc
from podpac.core.coordinates.utils import get_timedelta
from podpac.core.interpolation.selector import Selector, _higher_precision_time_coords1d, _higher_precision_time_stack


[docs]@common_doc(COMMON_INTERPOLATOR_DOCS)
class NearestNeighbor(Interpolator):
    """Nearest Neighbor Interpolation

    {nearest_neighbor_attributes}
    """

    dims_supported = ["lat", "lon", "alt", "time"]
    methods_supported = ["nearest"]

    # defined at instantiation
    method = tl.Unicode(default_value="nearest")
    spatial_tolerance = tl.Float(default_value=np.inf, allow_none=True)
    time_tolerance = tl.Union([tl.Unicode(), tl.Instance(np.timedelta64, allow_none=True)])
    alt_tolerance = tl.Float(default_value=np.inf, allow_none=True)

    # spatial_scale only applies when the source is stacked with time or alt. The supplied value will be assigned a distance of "1'"
    spatial_scale = tl.Float(default_value=1, allow_none=True)
    # time_scale only applies when the source is stacked with lat, lon, or alt. The supplied value will be assigned a distance of "1'"
    time_scale = tl.Union([tl.Unicode(), tl.Instance(np.timedelta64, allow_none=True)])
    # alt_scale only applies when the source is stacked with lat, lon, or time. The supplied value will be assigned a distance of "1'"
    alt_scale = tl.Float(default_value=1, allow_none=True)

    respect_bounds = tl.Bool(True)
    remove_nan = tl.Bool(False)
    use_selector = tl.Bool(True)

    def __repr__(self):
        rep = super(NearestNeighbor, self).__repr__()
        # rep += '\n\tspatial_tolerance: {}\n\ttime_tolerance: {}'.format(self.spatial_tolerance, self.time_tolerance)
        return rep

[docs]    @common_doc(COMMON_INTERPOLATOR_DOCS)
    def can_interpolate(self, udims, source_coordinates, eval_coordinates):
        """
        {interpolator_interpolate}
        """
        udims_subset = self._filter_udims_supported(udims)

        return udims_subset

[docs]    def can_select(self, udims, source_coordinates, eval_coordinates):
        selector = super().can_select(udims, source_coordinates, eval_coordinates)
        if self.use_selector:
            return selector
        return ()

[docs]    @common_doc(COMMON_INTERPOLATOR_DOCS)
    def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, output_data):
        """
        {interpolator_interpolate}
        """
        # Note, some of the following code duplicates code in the Selector class.
        # This duplication is for the sake of optimization

        def is_stacked(d):
            return "_" in d

        if hasattr(source_data, "attrs") and "bounds" in source_data.attrs:
            bounds = source_data.attrs["bounds"]
            if "time" in bounds and bounds["time"]:
                if "time" in eval_coordinates.udims:
                    bounds["time"] = [
                        self._atime_to_float(b, source_coordinates["time"], eval_coordinates["time"])
                        for b in bounds["time"]
                    ]
                else:
                    bounds["time"] = [
                        self._atime_to_float(b, source_coordinates["time"], source_coordinates["time"])
                        for b in bounds["time"]
                    ]

        else:
            bounds = None

        if self.remove_nan:
            # Eliminate nans from the source data. Note, this could turn a uniform griddted dataset into a stacked one
            source_data, source_coordinates = self._remove_nans(source_data, source_coordinates)

        data_index = []
        for d in source_coordinates.dims:
            # Make sure we're supposed to do nearest neighbor interpolation for this UDIM, otherwise skip this dimension
            if len([dd for dd in d.split("_") if dd in udims]) == 0:
                index = self._resize_unstacked_index(np.arange(source_coordinates[d].size), d, eval_coordinates)
                data_index.append(index)
                continue
            source = source_coordinates[d]
            if is_stacked(d):
                if bounds is not None:
                    bound = np.stack([bounds[dd] for dd in d.split("_")], axis=1)
                else:
                    bound = None
                index = self._get_stacked_index(d, source, eval_coordinates, bound)

                if len(source.shape) == 2:  # Handle case of 2D-stacked coordinates
                    ncols = source.shape[1]
                    index1 = index // ncols
                    index1 = self._resize_stacked_index(index1, d, eval_coordinates)
                    # With nD stacked coordinates, there are 'n' indices in the tuple
                    # All of these need to get into the data_index, and in the right order
                    data_index.append(index1)  # This is a hack
                    index = index % ncols  # The second half can go through the usual machinery
                elif len(source.shape) > 2:  # Handle case of nD-stacked coordinates
                    raise NotImplementedError
                index = self._resize_stacked_index(index, d, eval_coordinates)
            elif source_coordinates[d].is_uniform:
                request = eval_coordinates[d]
                if bounds is not None:
                    bound = bounds[d]
                else:
                    bound = None
                index = self._get_uniform_index(d, source, request, bound)
                index = self._resize_unstacked_index(index, d, eval_coordinates)
            else:  # non-uniform coordinates... probably an optimization here
                request = eval_coordinates[d]
                if bounds is not None:
                    bound = bounds[d]
                else:
                    bound = None
                index = self._get_nonuniform_index(d, source, request, bound)
                index = self._resize_unstacked_index(index, d, eval_coordinates)

            data_index.append(index)

        index = tuple(data_index)

        output_data.data[:] = np.array(source_data)[index]

        bool_inds = sum([i == -1 for i in index]).astype(bool)
        output_data.data[bool_inds] = np.nan

        return output_data

    def _remove_nans(self, source_data, source_coordinates):
        index = np.array(np.isnan(source_data), bool)
        if not np.any(index):
            return source_data, source_coordinates

        data = source_data.data[~index]
        coords = np.meshgrid(
            *[source_coordinates[d.split("_")[0]].coordinates for d in source_coordinates.dims], indexing="ij"
        )
        repeat_shape = coords[0].shape
        coords = [c[~index] for c in coords]

        final_dims = [d.split("_")[0] for d in source_coordinates.dims]
        # Add back in any stacked coordinates
        for i, d in enumerate(source_coordinates.dims):
            dims = d.split("_")
            if len(dims) == 1:
                continue
            reshape = np.ones(len(coords), int)
            reshape[i] = -1
            repeats = list(repeat_shape)
            repeats[i] = 1
            for dd in dims[1:]:
                crds = source_coordinates[dd].coordinates.reshape(*reshape)
                for j, r in enumerate(repeats):
                    crds = crds.repeat(r, axis=j)
                coords.append(crds[~index])
                final_dims.append(dd)

        return data, Coordinates([coords], dims=[final_dims])

    def _get_tol(self, dim, source, request):
        if dim in ["lat", "lon"]:
            return self.spatial_tolerance
        if dim == "alt":
            return self.alt_tolerance
        if dim == "time":
            if self.time_tolerance == "":
                return np.inf
            return self._time_to_float(self.time_tolerance, source, request)
        raise NotImplementedError()

    def _get_scale(self, dim, source_1d, request_1d):
        if dim in ["lat", "lon"]:
            return 1 / self.spatial_scale
        if dim == "alt":
            return 1 / self.alt_scale
        if dim == "time":
            if self.time_scale == "":
                return 1.0
            return 1 / self._time_to_float(self.time_scale, source_1d, request_1d)
        raise NotImplementedError()

    def _time_to_float(self, time, time_source, time_request):
        dtype0 = time_source.coordinates[0].dtype
        dtype1 = time_request.coordinates[0].dtype
        dtype = dtype0 if dtype0 > dtype1 else dtype1
        time = make_coord_delta(time)
        if isinstance(time, np.timedelta64):
            time1 = (time + np.datetime64("2000")).astype(dtype).astype(float) - (
                np.datetime64("2000").astype(dtype).astype(float)
            )
        return time1

    def _atime_to_float(self, time, time_source, time_request):
        dtype0 = time_source.coordinates[0].dtype
        dtype1 = time_request.coordinates[0].dtype
        dtype = dtype0 if dtype0 > dtype1 else dtype1
        time = make_coord_value(time)
        if isinstance(time, np.datetime64):
            time = time.astype(dtype).astype(float)
        return time

    def _get_stacked_index(self, dim, source, request, bounds=None):
        # The udims are in the order of the request so that the meshgrid calls will be in the right order
        udims = [ud for ud in request.udims if ud in source.udims]

        time_source = time_request = None
        if "time" in udims:
            time_source = source["time"]
            time_request = request["time"]

        tols = np.array([self._get_tol(d, time_source, time_request) for d in udims])[None, :]
        scales = np.array([self._get_scale(d, time_source, time_request) for d in udims])[None, :]
        tol = np.linalg.norm((tols * scales).squeeze())
        src_coords, req_coords_diag = _higher_precision_time_stack(source, request, udims)
        # We need to unwravel the nD stacked coordinates
        ckdtree_source = cKDTree(src_coords.reshape(src_coords.shape[0], -1).T * scales)

        # if the udims are all stacked in the same stack as part of the request coordinates, then we're done.
        # Otherwise we have to evaluate each unstacked set of dimensions independently
        # Note, part of this code is duplicated in the selector
        indep_evals = [ud for ud in udims if not request.is_stacked(ud)]
        # two udims could be stacked, but in different dim groups, e.g. source (lat, lon), request (lat, time), (lon, alt)
        stacked = {d for d in request.dims for ud in udims if ud in d and request.is_stacked(ud)}

        if (len(indep_evals) + len(stacked)) <= 1:  # output is stacked in the same way
            # The ckdtree call below needs the lat/lon pairs in the last axis position
            req_coords = np.moveaxis(req_coords_diag, 0, -1)
        elif (len(stacked) == 0) | (len(indep_evals) == 0 and len(stacked) == len(udims)):
            req_coords = np.stack([i.ravel() for i in np.meshgrid(*req_coords_diag, indexing="ij")], axis=1)
        else:
            # Rare cases? E.g. lat_lon_time_alt source to lon, time_alt, lat destination
            sizes = [request[d].size for d in request.dims]
            reshape = np.ones(len(request.dims), int)
            coords = [None] * len(udims)
            for i in range(len(udims)):
                ii = [ii for ii in range(len(request.dims)) if udims[i] in request.dims[ii]][0]
                reshape[:] = 1
                reshape[ii] = -1
                coords[i] = req_coords_diag[i].reshape(*reshape)
                for j, d in enumerate(request.dims):
                    if udims[i] in d:  # Then we don't need to repeat
                        continue
                    coords[i] = coords[i].repeat(sizes[j], axis=j)
            req_coords = np.stack([i.ravel() for i in coords], axis=1)

        dist, index = ckdtree_source.query(req_coords * scales, k=1)

        if self.respect_bounds:
            if bounds is None:
                bounds = np.stack(
                    [
                        src_coords.reshape(src_coords.shape[0], -1).T.min(0),
                        src_coords.reshape(src_coords.shape[0], -1).T.max(0),
                    ],
                    axis=1,
                )
            # Fix order of bounds
            bounds = bounds[:, [source.udims.index(dim) for dim in udims]]
            index[np.any((req_coords > bounds[1]), axis=-1) | np.any((req_coords < bounds[0]), axis=-1)] = -1

        if tol and tol != np.inf:
            index[dist > tol] = -1

        return index

    def _get_uniform_index(self, dim, source, request, bounds=None):
        tol = self._get_tol(dim, source, request)

        index = (request.coordinates - source.start) / source.step
        rindex = np.around(index).astype(int)
        stop_ind = int(source.size)
        if self.respect_bounds:
            rindex[(rindex < 0) | (rindex >= stop_ind)] = -1
        else:
            rindex = np.clip(rindex, 0, stop_ind - 1)
        if tol and tol != np.inf:
            if dim == "time":
                step = self._time_to_float(source.step, source, request)
            else:
                step = source.step
            rindex[np.abs(index - rindex) * np.abs(step) > tol] = -1

        return rindex

    def _get_nonuniform_index(self, dim, source, request, bounds=None):
        tol = self._get_tol(dim, source, request)

        src, req = _higher_precision_time_coords1d(source, request)
        ckdtree_source = cKDTree(src.reshape(-1, 1))
        dist, index = ckdtree_source.query(req[:].reshape(-1, 1), k=1)
        index[index == source.coordinates.size] = -1

        if self.respect_bounds:
            if bounds is None:
                bounds = [src.min(), src.max()]
            index[(req.ravel() > bounds[1]) | (req.ravel() < bounds[0])] = -1
        if tol and tol != np.inf:
            index[dist > tol] = -1

        return index

    def _resize_unstacked_index(self, index, source_dim, request):
        # When the request is stacked, and the stacked dimensions are n-dimensions where n > 1,
        # Then len(request.shape) != len(request.dims), so it take s a little bit of footwork
        # to get the correct shape for the index
        reshape = np.array(request.shape)
        i = 0
        for dim in request.dims:
            addnext = len(request[dim].shape)
            if source_dim not in dim:
                reshape[i : i + addnext] = 1
            i += addnext
        return index.reshape(*reshape)

    def _resize_stacked_index(self, index, source_dim, request):
        reshape = np.array(request.shape)
        i = 0
        for dim in request.dims:
            addnext = len(request[dim].shape)
            d = dim.split("_")
            if not any([dd in source_dim for dd in d]):
                reshape[i : i + addnext] = 1
            i += addnext
        return index.reshape(*reshape)


[docs]@common_doc(COMMON_INTERPOLATOR_DOCS)
class NearestPreview(NearestNeighbor):
    """Nearest Neighbor (Preview) Interpolation

    {nearest_neighbor_attributes}
    """

    methods_supported = ["nearest_preview"]
    method = tl.Unicode(default_value="nearest_preview")
    spatial_tolerance = tl.Float(read_only=True, allow_none=True, default_value=None)

[docs]    @common_doc(COMMON_INTERPOLATOR_DOCS)
    def can_select(self, udims, source_coordinates, eval_coordinates):
        """
        {interpolator_can_select}
        """
        udims_subset = self._filter_udims_supported(udims)

        # confirm that udims are in source and eval coordinates
        # TODO: handle stacked coordinates
        if self._dim_in(udims_subset, source_coordinates):
            return udims_subset
        else:
            return tuple()

[docs]    @common_doc(COMMON_INTERPOLATOR_DOCS)
    def select_coordinates(self, udims, source_coordinates, eval_coordinates, index_type="numpy"):
        """
        {interpolator_select}
        """
        new_coords = []
        new_coords_idx = []

        source_coords, source_coords_index = source_coordinates.intersect(
            eval_coordinates, outer=True, return_index=True
        )

        if source_coords.size == 0:
            return source_coords, source_coords_index

        # iterate over the source coordinate dims in case they are stacked
        for src_dim, idx in zip(source_coords, source_coords_index):

            # TODO: handle stacked coordinates
            if isinstance(source_coords[src_dim], StackedCoordinates):
                raise InterpolatorException("NearestPreview select does not yet support stacked dimensions")

            if src_dim in eval_coordinates.dims:
                src_coords = source_coords[src_dim]
                dst_coords = eval_coordinates[src_dim]

                if isinstance(dst_coords, UniformCoordinates1d):
                    dst_start = dst_coords.start
                    dst_stop = dst_coords.stop
                    dst_delta = dst_coords.step
                else:
                    dst_start = dst_coords.coordinates[0]
                    dst_stop = dst_coords.coordinates[-1]
                    with np.errstate(invalid="ignore"):
                        dst_delta = (dst_stop - dst_start) / (dst_coords.size - 1)

                if isinstance(src_coords, UniformCoordinates1d):
                    src_start = src_coords.start
                    src_stop = src_coords.stop
                    src_delta = src_coords.step
                else:
                    src_start = src_coords.coordinates[0]
                    src_stop = src_coords.coordinates[-1]
                    with np.errstate(invalid="ignore"):
                        src_delta = (src_stop - src_start) / (src_coords.size - 1)

                ndelta = max(1, np.round(np.abs(dst_delta / src_delta)))
                idx_offset = 0
                if src_coords.size == 1:
                    c = src_coords.copy()
                else:
                    c_test = UniformCoordinates1d(src_start, src_stop, ndelta * src_delta, **src_coords.properties)
                    bounds = source_coordinates[src_dim].bounds
                    # The delta/2 ensures the endpoint is included when there is a floating point rounding error
                    # the delta/2 is more than needed, but does guarantee.
                    src_stop = np.clip(src_stop + ndelta * src_delta / 2, bounds[0], bounds[1])
                    c = UniformCoordinates1d(src_start, src_stop, ndelta * src_delta, **src_coords.properties)
                    if c.size > c_test.size:  # need to adjust the index as well
                        idx_offset = int(ndelta)

                idx_start = idx.start if isinstance(idx, slice) else idx[0]
                idx_stop = idx.stop if isinstance(idx, slice) else idx[-1]
                if idx_stop is not None:
                    idx_stop += idx_offset
                idx = slice(idx_start, idx_stop, int(ndelta))
            else:
                c = source_coords[src_dim]

            new_coords.append(c)
            new_coords_idx.append(idx)

        return Coordinates(new_coords, validate_crs=False), tuple(new_coords_idx)