"""
PODPAC Nodes to access SMAP data via EGI Interface
"""
from __future__ import division, unicode_literals, print_function, absolute_import
import os
import copy
import logging
from datetime import datetime
import requests
from six import string_types
import numpy as np
import xarray as xr
import traitlets as tl
from podpac.datalib import nasaCMR
# Set up logging
_log = logging.getLogger(__name__)
# Helper utility for optional imports
from lazy_import import lazy_module, lazy_class
h5py = lazy_module("h5py")
lazy_class("h5py.File")
# fixing problem with older versions of numpy
if not hasattr(np, "isnat"):
def isnat(a):
return a.astype(str) == "None"
np.isnat = isnat
# Internal dependencies
from podpac import Coordinates, UnitsDataArray, cached_property
from podpac.datalib import EGI
BASE_URL = "https://n5eil01u.ecs.nsidc.org/egi/request"
SMAP_PRODUCT_DICT = {
#'shortname': ['lat_key', 'lon_key', '_data_key', 'quality_flag', 'default_verison']
"SPL4SMAU": ["/x", "/y", "/Analysis_Data/sm_surface_analysis", None, None],
"SPL4SMGP": ["/x", "/y", "/Geophysical_Data/sm_surface", None, 4],
"SPL4SMLM": ["/x", "/y", "/Land_Model_Constants_Data", None, 4],
"SPL3SMAP": [
"/Soil_Moisture_Retrieval_Data/latitude",
"/Soil_Moisture_Retrieval_Data/longitude",
"/Soil_Moisture_Retrieval_Data/soil_moisture",
"/Soil_Moisture_Retrieval_Data/retrieval_qual_flag",
"003",
],
"SPL3SMA": [
"/Soil_Moisture_Retrieval_Data/latitude",
"/Soil_Moisture_Retrieval_Data/longitude",
"/Soil_Moisture_Retrieval_Data/soil_moisture",
"/Soil_Moisture_Retrieval_Data/retrieval_qual_flag",
"003",
],
"SPL3SMP_AM": [
"/Soil_Moisture_Retrieval_Data_AM/latitude",
"/Soil_Moisture_Retrieval_Data_AM/longitude",
"/Soil_Moisture_Retrieval_Data_AM/soil_moisture",
"/Soil_Moisture_Retrieval_Data_AM/retrieval_qual_flag",
"005",
],
"SPL3SMP_PM": [
"/Soil_Moisture_Retrieval_Data_PM/latitude",
"/Soil_Moisture_Retrieval_Data_PM/longitude",
"/Soil_Moisture_Retrieval_Data_PM/soil_moisture_pm",
"/Soil_Moisture_Retrieval_Data_PM/retrieval_qual_flag_pm",
"005",
],
"SPL3SMP_E_AM": [
"/Soil_Moisture_Retrieval_Data_AM/latitude",
"/Soil_Moisture_Retrieval_Data_AM/longitude",
"/Soil_Moisture_Retrieval_Data_AM/soil_moisture",
"/Soil_Moisture_Retrieval_Data_AM/retrieval_qual_flag",
"004",
],
"SPL3SMP_E_PM": [
"/Soil_Moisture_Retrieval_Data_PM/latitude_pm",
"/Soil_Moisture_Retrieval_Data_PM/longitude_pm",
"/Soil_Moisture_Retrieval_Data_PM/soil_moisture_pm",
"/Soil_Moisture_Retrieval_Data_PM/retrieval_qual_flag_pm",
"004",
],
}
SMAP_PRODUCTS = list(SMAP_PRODUCT_DICT.keys())
[docs]class SMAP(EGI):
"""
SMAP Node. For more information about SMAP, see https://nsidc.org/data/smap
SMAP interface using the EGI Data Portal
https://developer.earthdata.nasa.gov/sdps/programmatic-access-docs
with the base URL: https://n5eil01u.ecs.nsidc.org/egi/request
To access data from this node, an Earthdata login is required. This can either be specified when
creating the node:
```python
smap = SMAP(username="your_user_name", password="your_password")
```
OR you can set the following PODPAC settings:
```python
podpac.settings["username@urs.earthdata.nasa.gov"] = "your_user_name"
podpac.settings["password@urs.earthdata.nasa.gov"] = "your_password"
podpac.settings.save() # To have this information persist
smap = SMAP()
```
Parameters
----------
product : str
One of the :list:`SMAP_PRODUCTS` strings
check_quality_flags : bool, optional
Default is True. If True, data will be filtered based on the SMAP data quality flag, and only
high quality data is returned.
data_key : str, optional
Default will return soil moisture and is set automatically based on the product selected. Other
possible data keys can be found
Attributes
----------
nan_vals : list
Nan values in SMAP data
username : str, optional
Earthdata username (https://urs.earthdata.nasa.gov/)
If undefined, node will look for a username under setting key "username@urs.earthdata.nasa.gov"
password : str, optional
Earthdata password (https://urs.earthdata.nasa.gov/)
If undefined, node will look for a password under setting key "password@urs.earthdata.nasa.gov"
"""
product = tl.Enum(SMAP_PRODUCTS, default_value="SPL4SMAU").tag(attr=True)
nan_vals = [-9999.0]
min_bounds_span = tl.Dict(default_value={"lon": 0.3, "lat": 0.3, "time": "3,h"}).tag(attr=True)
check_quality_flags = tl.Bool(True).tag(attr=True, default=True)
quality_flag_key = tl.Unicode(allow_none=True).tag(attr=True)
data_key = tl.Unicode(allow_none=True, default_value=None).tag(attr=True)
base_url = tl.Unicode(default_value=BASE_URL).tag(attr=True)
@property
def short_name(self):
if "SPL3SMP" in self.product:
return self.product.replace("_AM", "").replace("_PM", "")
else:
return self.product
# pull _data_key, lat_key, lon_key, and version from product dict
@cached_property
def _product_data(self):
return SMAP_PRODUCT_DICT[self.product]
@property
def udims(self):
return ["lat", "lon", "time"]
@property
def lat_key(self):
return self._product_data[0]
@property
def lon_key(self):
return self._product_data[1]
@property
def _data_key(self):
if self.data_key is None:
return self._product_data[2]
else:
return self.data_key
@property
def quality_flag_key(self):
return self._product_data[3]
@property
def version(self):
try:
return nasaCMR.get_collection_entries(short_name=self.product)[-1]["version_id"]
except:
_log.warning("Could not automatically retrieve newest product version id from NASA CMR.")
return self._product_data[4]
@property
def coverage(self):
if self.quality_flag_key:
return (self._data_key, self.quality_flag_key, self.lat_key, self.lon_key)
else:
return (self._data_key, self.lat_key, self.lon_key)
[docs] def read_file(self, filelike):
"""Interpret individual SMAP file from EGI zip archive.
Parameters
----------
filelike : filelike
Reference to file inside EGI zip archive
Returns
-------
podpac.UnitsDataArray
Raises
------
ValueError
"""
ds = h5py.File(filelike, "r")
# handle data
data = ds[self._data_key][()]
if self.check_quality_flags and self.quality_flag_key:
flag = ds[self.quality_flag_key][()]
flag = flag > 0
[flag] == np.nan
data = np.array([data]) # add extra dimension for time slice
# handle time
if "SPL3" in self.product:
# TODO: make this py2.7 compatible
# take the midpoint between the range identified in the file
t_start = np.datetime64(ds["Metadata/Extent"].attrs["rangeBeginningDateTime"].replace("Z", ""))
t_end = np.datetime64(ds["Metadata/Extent"].attrs["rangeEndingDateTime"].replace("Z", ""))
time = np.array([t_start + (t_end - t_start) / 2])
time = time.astype("datetime64[D]")
elif "SPL4" in self.product:
time_unit = ds["time"].attrs["units"].decode()
time = xr.coding.times.decode_cf_datetime(ds["time"][()][0], units=time_unit)
time = time.astype("datetime64[h]")
# handle spatial coordinates
if "SPL3" in self.product:
# take nan mean along each axis
lons = ds[self.lon_key][()]
lats = ds[self.lat_key][()]
lons[lons == self.nan_vals[0]] = np.nan
lats[lats == self.nan_vals[0]] = np.nan
# short-circuit if all lat/lon are non
if np.all(np.isnan(lats)) and np.all(np.isnan(lons)):
return None
# make podpac coordinates
lon = np.nanmean(lons, axis=0)
lat = np.nanmean(lats, axis=1)
c = Coordinates([time, lat, lon], dims=["time", "lat", "lon"])
elif "SPL4" in self.product:
# lat/lon coordinates in EPSG:6933 (https://epsg.io/6933)
lon = ds["x"][()]
lat = ds["y"][()]
# short-circuit if all lat/lon are nan
if np.all(np.isnan(lat)) and np.all(np.isnan(lon)):
return None
c = Coordinates([time, lat, lon], dims=["time", "lat", "lon"], crs="epsg:6933")
# make units data array with coordinates and data
return UnitsDataArray.create(c, data=data)
[docs] def append_file(self, all_data, data):
"""Append data
Parameters
----------
all_data : podpac.UnitsDataArray
aggregated data
data : podpac.UnitsDataArray
new data to append
Raises
------
NotImplementedError
"""
if all_data.shape[1:] == data.shape[1:]:
data.lat.data[:] = all_data.lat.data
data.lon.data[:] = all_data.lon.data
else:
# select only data with finite coordinates
data = data.isel(lon=np.isfinite(data.lon), lat=np.isfinite(data.lat))
# select lat based on the old data
lat = all_data.lat.sel(lat=data.lat, method="nearest")
# When the difference between old and new coordintaes are large, it means there are new coordinates
Ilat = (np.abs(lat.data - data.lat) > 1e-3).data
# Use the new data's coordinates for the new coordinates
lat.data[Ilat] = data.lat.data[Ilat]
# Repeat for lon
lon = all_data.lon.sel(lon=data.lon, method="nearest")
Ilon = (np.abs(lon.data - data.lon) > 1e-3).data
lon.data[Ilon] = data.lon.data[Ilon]
# Assign to data
data.lon.data[:] = lon.data
data.lat.data[:] = lat.data
return all_data.combine_first(data)