Source code for podpac.datalib.modis_pds

"""
MODIS on AWS OpenData

MODIS Coordinates Grids: https://modis-land.gsfc.nasa.gov/MODLAND_grid.html
"""

import logging
import datetime

import numpy as np
import traitlets as tl

import podpac
from podpac.utils import cached_property
from podpac.compositor import TileCompositorRaw
from podpac.core.data.rasterio_source import RasterioRaw
from podpac.authentication import S3Mixin
from podpac.interpolators import InterpolationMixin

_logger = logging.getLogger(__name__)

BUCKET = "modis-pds"
PRODUCTS = ["MCD43A4.006", "MOD09GA.006", "MYD09GA.006", "MOD09GQ.006", "MYD09GQ.006"]
CRS = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs +type=crs"

SINUSOIDAL_HORIZONTAL = {
    "00": (-20014877.697641734, -18903390.490691263),
    "01": (-18902927.177974734, -17791439.971025266),
    "02": (-17790976.658308737, -16679489.451358264),
    "03": (-16679026.138641736, -15567538.931691263),
    "04": (-15567075.618974736, -14455588.412025262),
    "05": (-14455125.099308735, -13343637.892358262),
    "06": (-13343174.579641735, -12231687.372691263),
    "07": (-12231224.059974736, -11119736.853025263),
    "08": (-11119273.540308736, -10007786.333358264),
    "09": (-10007323.020641735, -8895835.813691262),
    "10": (-8895372.500974735, -7783885.294025263),
    "11": (-7783421.981308736, -6671934.774358263),
    "12": (-6671471.461641735, -5559984.254691264),
    "13": (-5559520.941974737, -4448033.735025264),
    "14": (-4447570.422308736, -3336083.215358263),
    "15": (-3335619.902641736, -2224132.695691264),
    "16": (-2223669.382974736, -1112182.176025264),
    "17": (-1111718.863308736, -231.656358264),
    "18": (231.656358264, 1111718.863308736),
    "19": (1112182.176025264, 2223669.382974736),
    "20": (2224132.695691264, 3335619.902641736),
    "21": (3336083.215358264, 4447570.422308736),
    "22": (4448033.735025263, 5559520.941974737),
    "23": (5559984.254691265, 6671471.461641736),
    "24": (6671934.774358264, 7783421.981308737),
    "25": (7783885.294025264, 8895372.500974735),
    "26": (8895835.813691264, 10007323.020641737),
    "27": (10007786.333358264, 11119273.540308736),
    "28": (11119736.853025265, 12231224.059974737),
    "29": (12231687.372691264, 13343174.579641737),
    "30": (13343637.892358264, 14455125.099308737),
    "31": (14455588.412025264, 15567075.618974738),
    "32": (15567538.931691265, 16679026.138641737),
    "33": (16679489.451358264, 17790976.658308737),
    "34": (17791439.971025266, 18902927.177974734),
    "35": (18903390.490691263, 20014877.697641734),
}

SINUSOIDAL_VERTICAL = {
    "00": (10007323.020641735, 8895835.813691262),
    "01": (8895372.500974735, 7783885.294025263),
    "02": (7783421.981308736, 6671934.774358263),
    "03": (6671471.461641735, 5559984.254691264),
    "04": (5559520.941974737, 4448033.735025264),
    "05": (4447570.422308736, 3336083.215358263),
    "06": (3335619.902641736, 2224132.695691264),
    "07": (2223669.382974736, 1112182.176025264),
    "08": (1111718.863308736, 231.656358264),
    "09": (-231.656358264, -1111718.863308736),
    "10": (-1112182.176025264, -2223669.382974736),
    "11": (-2224132.695691264, -3335619.902641736),
    "12": (-3336083.215358264, -4447570.422308736),
    "13": (-4448033.735025263, -5559520.941974737),
    "14": (-5559984.254691265, -6671471.461641736),
    "15": (-6671934.774358264, -7783421.981308737),
    "16": (-7783885.294025264, -8895372.500974735),
    "17": (-8895835.813691264, -10007323.020641737),
}


def _parse_modis_date(date):
    return datetime.datetime.strptime(date, "%Y%j").strftime("%Y-%m-%d")


def _available(s3, *l):
    prefix = "/".join([BUCKET] + list(l))
    return [obj.replace(prefix + "/", "") for obj in s3.ls(prefix) if "_scenes.txt" not in obj]


def get_tile_coordinates(h, v):
    """use pre-fetched lat and lon bounds to get coordinates for a single tile"""
    lat_start, lat_stop = SINUSOIDAL_VERTICAL[v]
    lon_start, lon_stop = SINUSOIDAL_HORIZONTAL[h]
    lat = podpac.clinspace(lat_start, lat_stop, 2400, name="lat")
    lon = podpac.clinspace(lon_start, lon_stop, 2400, name="lon")
    return podpac.Coordinates([lat, lon], crs=CRS)


class MODISSource(RasterioRaw):
    """
    Individual MODIS data tile using AWS OpenData, with caching.

    Attributes
    ----------
    product : str
        MODIS product ('MCD43A4.006', 'MOD09GA.006', 'MYD09GA.006', 'MOD09GQ.006', or 'MYD09GQ.006')
    horizontal : str
        column in the MODIS Sinusoidal Tiling System, e.g. '21'
    vertical : str
        row in the MODIS Sinusoidal Tiling System, e.g. '07'
    date : str
        year and three-digit day of year, e.g. '2011260'
    data_key : str
        individual object (varies by product)
    """

    product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True)
    horizontal = tl.Unicode(help="column in the MODIS Sinusoidal Tiling System, e.g. '21'").tag(attr=True)
    vertical = tl.Unicode(help="row in the MODIS Sinusoidal Tiling System, e.g. '07'").tag(attr=True)
    date = tl.Unicode(help="year and three-digit day of year, e.g. '2011460'").tag(attr=True)
    data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True)
    anon = tl.Bool(True)
    check_exists = tl.Bool(True)

    _repr_keys = ["prefix", "data_key"]

    def init(self):
        """validation"""
        for key in ["horizontal", "vertical", "date", "data_key"]:
            if not getattr(self, key):
                raise ValueError("MODISSource '%s' required" % key)
        if self.horizontal not in ["%02d" % h for h in range(36)]:
            raise ValueError("MODISSource horizontal invalid ('%s' should be between '00' and '35')" % self.horizontal)
        if self.vertical not in ["%02d" % v for v in range(36)]:
            raise ValueError("MODISSource vertical invalid ('%s' should be between '00' and '17'" % self.vertical)
        try:
            _parse_modis_date(self.date)
        except ValueError:
            raise ValueError("MODISSource date invalid ('%s' should be year and doy, e.g. '2009260'" % self.date)
        if self.check_exists and not self.exists:
            raise ValueError("No S3 object found at '%s'" % self.source)

    @cached_property(use_cache_ctrl=True)
    def filename(self):
        _logger.info(
            "Looking up source filename (product=%s, h=%s, v=%s, date=%s, data_key=%s)..."
            % (self.product, self.horizontal, self.vertical, self.date, self.data_key)
        )
        prefix = "/".join([BUCKET, self.product, self.horizontal, self.vertical, self.date])
        objs = [obj.replace(prefix + "/", "") for obj in self.s3.ls(prefix) if obj.endswith("%s.TIF" % self.data_key)]
        if len(objs) == 0:
            raise RuntimeError("No matches found for data_key='%s' at '%s'" % (self.data_key, prefix))
        if len(objs) > 1:
            raise RuntimeError("Too many matches for data_key='%s' at '%s' (%s)" % (self.data_key, prefix, objs))
        return objs[0]

    @property
    def prefix(self):
        return "%s/%s/%s/%s" % (self.product, self.horizontal, self.vertical, self.date)

    @cached_property
    def source(self):
        return "s3://%s/%s/%s" % (BUCKET, self.prefix, self.filename)

    @cached_property
    def exists(self):
        return self.s3.exists(self.source)

    def get_coordinates(self):
        # use pre-fetched coordinate bounds (instead of loading from the dataset)
        spatial_coords = get_tile_coordinates(self.horizontal, self.vertical)
        time_coords = podpac.Coordinates([_parse_modis_date(self.date)], ["time"], crs=spatial_coords.crs)
        return podpac.coordinates.merge_dims([spatial_coords, time_coords])


class MODISComposite(S3Mixin, TileCompositorRaw):
    """MODIS whole-world compositor.
    For documentation about the data, start here: https://ladsweb.modaps.eosdis.nasa.gov/search/order/1
    For information about the bands, see here: https://modis.gsfc.nasa.gov/about/specifications.php

    Attributes
    ----------
    product : str
        MODIS product ('MCD43A4.006', 'MOD09GA.006', 'MYD09GA.006', 'MOD09GQ.006', or 'MYD09GQ.006')
    data_key : str
        individual object (varies by product)
    """

    product = tl.Enum(values=PRODUCTS, help="MODIS product ID").tag(attr=True, required=True)
    data_key = tl.Unicode(help="data to retrieve (varies by product)").tag(attr=True, required=True)

    tile_width = (1, 2400, 2400)
    start_date = "2013-01-01"
    end_date = datetime.date.today().strftime("%Y-%m-%d")
    anon = tl.Bool(True)

    dims = ["time", "lat", "lon"]

    _repr_keys = ["product", "data_key"]

    @cached_property(use_cache_ctrl=True)
    def tile_coordinates(self):
        return [get_tile_coordinates(*hv) for hv in self.available_tiles]

    @cached_property(use_cache_ctrl=True)
    def available_tiles(self):
        _logger.info("Looking up available tiles...")
        return [(h, v) for h in _available(self.s3, self.product) for v in _available(self.s3, self.product, h)]

    def select_sources(self, coordinates, _selector=None):
        """2d select sources filtering"""

        # filter tiles spatially
        ct = coordinates.transform(CRS)
        tiles = [at for at, atc in zip(self.available_tiles, self.tile_coordinates) if ct.select(atc.bounds).size > 0]
        sources = []
        for tile in tiles:
            h, v = tile
            available_dates = _available(self.s3, self.product, h, v)
            dates = [_parse_modis_date(date) for date in available_dates]
            date_coords = podpac.Coordinates([dates], dims=["time"])
            # Filter individual tiles temporally
            if _selector is not None:
                _, I = _selector(date_coords, ct, index_type="numpy")
            else:
                _, I = date_coords.intersect(ct, outer=True, return_index=True)
            valid_dates = np.array(available_dates)[I]
            valid_sources = [
                MODISSource(
                    product=self.product,
                    horizontal=h,
                    vertical=v,
                    date=date,
                    data_key=self.data_key,
                    check_exists=False,
                    cache_ctrl=self.cache_ctrl,
                    force_eval=self.force_eval,
                    cache_output=self.cache_output,
                    cache_dataset=True,
                    s3=self.s3,
                )
                for date in valid_dates
            ]
            sources.extend(valid_sources)
        self.set_trait("sources", sources)
        return sources


[docs]class MODIS(InterpolationMixin, MODISComposite): pass
if __name__ == "__main__": from matplotlib import pyplot # ------------------------------------------------------------------------- # basic modis source # ------------------------------------------------------------------------- source = MODISSource( product=PRODUCTS[0], data_key="B01", horizontal="01", vertical="11", date="2020009", cache_ctrl=["disk"], cache_dataset=True, cache_output=False, ) print("source: %s" % repr(source)) print("path: %s" % source.source) print("coordinates: %s", source.coordinates) # native coordinates o1 = source.eval(source.coordinates) # cropped and resampled using EPSG:4326 coordinates c = podpac.Coordinates([podpac.clinspace(-22, -20, 200), podpac.clinspace(-176, -174, 200)], dims=["lat", "lon"]) o2 = source.eval(c) # ------------------------------------------------------------------------- # modis tile with time # ------------------------------------------------------------------------- tile = MODISTile( product=PRODUCTS[0], data_key="B01", horizontal="01", vertical="11", cache_ctrl=["disk"], cache_output=False ) print("tile: %s" % repr(tile)) print( "available dates: %s-%s (n=%d)" % (tile.available_dates[0], tile.available_dates[-1], len(tile.available_dates)) ) print("coordinates: %s" % tile.coordinates) # existing date assert "2020009" in tile.available_dates ct1 = podpac.Coordinates(["2020-01-09", c["lat"], c["lon"]], dims=["time", "lat", "lon"]) o2 = tile.eval(ct1) # nearest date assert "2020087" not in tile.available_dates ct2 = podpac.Coordinates(["2020-03-27", c["lat"], c["lon"]], dims=["time", "lat", "lon"]) o3 = tile.eval(ct2) # time-series ct3 = podpac.Coordinates([["2019-01-01", "2019-02-01", "2019-03-01"], -21.45, -174.92], dims=["time", "lat", "lon"]) o4 = tile.eval(ct3) # ------------------------------------------------------------------------- # modis compositor # ------------------------------------------------------------------------- node = MODIS(product=PRODUCTS[0], data_key="B01", cache_ctrl=["disk"], cache_output=False) print("node: %s" % repr(node)) print("sources: n=%d" % len(node.sources)) print(" .e.g: %s" % repr(node.sources[0])) # single tile assert len(node.select_sources(ct2)) == 1 o5 = node.eval(ct2) # time-series in a single tile assert len(node.select_sources(ct3)) == 1 o6 = node.eval(ct3) # multiple tiles ct3 = podpac.Coordinates( ["2020-01-09", podpac.clinspace(45, 55, 200), podpac.clinspace(-80, -40, 200)], dims=["time", "lat", "lon"] ) assert len(node.select_sources(ct3)) == 7 o7 = node.eval(ct3) # o7.plot() # pyplot.show()