Skip to content

Commit e150df1

Browse files
authored
Merge pull request #3126 from snbianco/ASB-29312-missions-params
Parameter validation on MastMissions queries
2 parents 524c2f4 + ed3bdff commit e150df1

10 files changed

+3280
-111
lines changed

CHANGES.rst

+7
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,13 @@ mast
223223
- Deprecated ``enable_cloud_dataset`` and ``disable_cloud_dataset`` in classes where they
224224
are non-operational. They will be removed in a future release. [#3113]
225225

226+
- Present users with an error when nonexistent query criteria are used in ``mast.MastMissions`` query functions. [#3126]
227+
228+
- Present users with an error when nonexistent query criteria are used in ``mast.Catalogs.query_region`` and
229+
``mast.Catalogs.query_object``. [#3126]
230+
231+
- Handle HLSP data products in ``Observations.get_cloud_uris``. [#3126]
232+
226233
mpc
227234
^^^
228235

astroquery/mast/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ class Conf(_config.ConfigNamespace):
2020
ssoserver = _config.ConfigItem(
2121
'https://ssoportal.stsci.edu',
2222
'MAST SSO Portal server.')
23+
catalogs_server = _config.ConfigItem(
24+
'https://catalogs.mast.stsci.edu',
25+
'Catalogs.MAST server.')
2326
timeout = _config.ConfigItem(
2427
600,
2528
'Time limit for requests from the STScI server.')

astroquery/mast/collections.py

+159-25
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
This module contains various methods for querying MAST collections such as catalogs.
77
"""
88

9+
import difflib
10+
from json import JSONDecodeError
911
import warnings
1012
import os
1113
import time
1214

13-
from requests import HTTPError
15+
from requests import HTTPError, RequestException
1416

1517
import astropy.units as u
1618
import astropy.coordinates as coord
@@ -21,7 +23,7 @@
2123
from ..utils.class_or_instance import class_or_instance
2224
from ..exceptions import InvalidQueryError, MaxResultsWarning, InputWarning
2325

24-
from . import utils
26+
from . import utils, conf
2527
from .core import MastQueryWithLogin
2628

2729

@@ -42,11 +44,13 @@ def __init__(self):
4244

4345
services = {"panstarrs": {"path": "panstarrs/{data_release}/{table}.json",
4446
"args": {"data_release": "dr2", "table": "mean"}}}
47+
self._catalogs_mast_search_options = ['columns', 'sort_by', 'table', 'data_release']
4548

4649
self._service_api_connection.set_service_params(services, "catalogs", True)
4750

4851
self.catalog_limit = None
4952
self._current_connection = None
53+
self._service_columns = dict() # Info about columns for Catalogs.MAST services
5054

5155
def _parse_result(self, response, *, verbose=False):
5256

@@ -58,9 +62,102 @@ def _parse_result(self, response, *, verbose=False):
5862

5963
return results_table
6064

65+
def _get_service_col_config(self, catalog, release='dr2', table='mean'):
66+
"""
67+
For a given Catalogs.MAST catalog, return a list of all searchable columns and their descriptions.
68+
As of now, this function is exclusive to the Pan-STARRS catalog.
69+
70+
Parameters
71+
----------
72+
catalog : str
73+
The catalog to be queried.
74+
release : str, optional
75+
Catalog data release to query from.
76+
table : str, optional
77+
Catalog table to query from.
78+
79+
Returns
80+
-------
81+
response : `~astropy.table.Table` that contains columns names, types, and descriptions
82+
"""
83+
# Only supported for PanSTARRS currently
84+
if catalog != 'panstarrs':
85+
return
86+
87+
service_key = (catalog, release, table)
88+
if service_key not in self._service_columns:
89+
try:
90+
# Send server request to get column list for given parameters
91+
request_url = f'{conf.catalogs_server}/api/v0.1/{catalog}/{release}/{table}/metadata.json'
92+
resp = utils._simple_request(request_url)
93+
94+
# Parse JSON and extract necessary info
95+
results = resp.json()
96+
rows = [
97+
(result['column_name'], result['db_type'], result['description'])
98+
for result in results
99+
]
100+
101+
# Create Table with parsed data
102+
col_table = Table(rows=rows, names=('name', 'data_type', 'description'))
103+
self._service_columns[service_key] = col_table
104+
105+
except JSONDecodeError as ex:
106+
raise JSONDecodeError(f'Failed to decode JSON response while attempting to get column list'
107+
f' for {catalog} catalog {table}, {release}: {ex}')
108+
except RequestException as ex:
109+
raise ConnectionError(f'Failed to connect to the server while attempting to get column list'
110+
f' for {catalog} catalog {table}, {release}: {ex}')
111+
except KeyError as ex:
112+
raise KeyError(f'Expected key not found in response data while attempting to get column list'
113+
f' for {catalog} catalog {table}, {release}: {ex}')
114+
except Exception as ex:
115+
raise RuntimeError(f'An unexpected error occurred while attempting to get column list'
116+
f' for {catalog} catalog {table}, {release}: {ex}')
117+
118+
return self._service_columns[service_key]
119+
120+
def _validate_service_criteria(self, catalog, **criteria):
121+
"""
122+
Check that criteria keyword arguments are valid column names for the service.
123+
Raises InvalidQueryError if a criteria argument is invalid.
124+
125+
Parameters
126+
----------
127+
catalog : str
128+
The catalog to be queried.
129+
**criteria
130+
Keyword arguments representing criteria filters to apply.
131+
132+
Raises
133+
-------
134+
InvalidQueryError
135+
If a keyword does not match any valid column names, an error is raised that suggests the closest
136+
matching column name, if available.
137+
"""
138+
# Ensure that self._service_columns is populated
139+
release = criteria.get('data_release', 'dr2')
140+
table = criteria.get('table', 'mean')
141+
col_config = self._get_service_col_config(catalog, release, table)
142+
143+
if col_config:
144+
# Check each criteria argument for validity
145+
valid_cols = list(col_config['name']) + self._catalogs_mast_search_options
146+
for kwd in criteria.keys():
147+
col = next((name for name in valid_cols if name.lower() == kwd.lower()), None)
148+
if not col:
149+
closest_match = difflib.get_close_matches(kwd, valid_cols, n=1)
150+
error_msg = (
151+
f"Filter '{kwd}' does not exist for {catalog} catalog {table}, {release}. "
152+
f"Did you mean '{closest_match[0]}'?"
153+
if closest_match
154+
else f"Filter '{kwd}' does not exist for {catalog} catalog {table}, {release}."
155+
)
156+
raise InvalidQueryError(error_msg)
157+
61158
@class_or_instance
62159
def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
63-
version=None, pagesize=None, page=None, **kwargs):
160+
version=None, pagesize=None, page=None, **criteria):
64161
"""
65162
Given a sky position and radius, returns a list of catalog entries.
66163
See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
@@ -88,10 +185,18 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
88185
Default None.
89186
Can be used to override the default behavior of all results being returned to obtain a
90187
specific page of results.
91-
**kwargs
188+
**criteria
92189
Other catalog-specific keyword args.
93190
These can be found in the (service documentation)[https://mast.stsci.edu/api/v0/_services.html]
94-
for specific catalogs. For example one can specify the magtype for an HSC search.
191+
for specific catalogs. For example, one can specify the magtype for an HSC search.
192+
For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
193+
should be either an acceptable value for that parameter, or a list consisting values, or tuples of
194+
decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
195+
consisting of a list of column names. Results may also be sorted through the query with the parameter
196+
sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
197+
tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
198+
Detailed information of Catalogs.MAST criteria usage can
199+
be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
95200
96201
Returns
97202
-------
@@ -113,9 +218,20 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
113218
if catalog.lower() in self._service_api_connection.SERVICES:
114219
self._current_connection = self._service_api_connection
115220
service = catalog
221+
222+
# validate user criteria
223+
self._validate_service_criteria(catalog.lower(), **criteria)
224+
225+
# adding additional user specified parameters
226+
for prop, value in criteria.items():
227+
params[prop] = value
228+
116229
else:
117230
self._current_connection = self._portal_api_connection
118231

232+
# valid criteria keywords
233+
valid_criteria = []
234+
119235
# Sorting out the non-standard portal service names
120236
if catalog.lower() == "hsc":
121237
if version == 2:
@@ -125,19 +241,20 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
125241
warnings.warn("Invalid HSC version number, defaulting to v3.", InputWarning)
126242
service = "Mast.Hsc.Db.v3"
127243

128-
self.catalog_limit = kwargs.get('nr', 50000)
129-
130244
# Hsc specific parameters (can be overridden by user)
131-
params['nr'] = 50000
132-
params['ni'] = 1
133-
params['magtype'] = 1
245+
self.catalog_limit = criteria.pop('nr', 50000)
246+
valid_criteria = ['nr', 'ni', 'magtype']
247+
params['nr'] = self.catalog_limit
248+
params['ni'] = criteria.pop('ni', 1)
249+
params['magtype'] = criteria.pop('magtype', 1)
134250

135251
elif catalog.lower() == "galex":
136252
service = "Mast.Galex.Catalog"
137-
self.catalog_limit = kwargs.get('maxrecords', 50000)
253+
self.catalog_limit = criteria.get('maxrecords', 50000)
138254

139255
# galex specific parameters (can be overridden by user)
140-
params['maxrecords'] = 50000
256+
valid_criteria = ['maxrecords']
257+
params['maxrecords'] = criteria.pop('maxrecords', 50000)
141258

142259
elif catalog.lower() == "gaia":
143260
if version == 1:
@@ -158,9 +275,16 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
158275
service = "Mast.Catalogs." + catalog + ".Cone"
159276
self.catalog_limit = None
160277

161-
# adding additional user specified parameters
162-
for prop, value in kwargs.items():
163-
params[prop] = value
278+
# additional user-specified parameters are not valid
279+
if criteria:
280+
key = next(iter(criteria))
281+
closest_match = difflib.get_close_matches(key, valid_criteria, n=1)
282+
error_msg = (
283+
f"Filter '{key}' does not exist for catalog {catalog}. Did you mean '{closest_match[0]}'?"
284+
if closest_match
285+
else f"Filter '{key}' does not exist for catalog {catalog}."
286+
)
287+
raise InvalidQueryError(error_msg)
164288

165289
# Parameters will be passed as JSON objects only when accessing the PANSTARRS API
166290
use_json = catalog.lower() == 'panstarrs'
@@ -170,7 +294,7 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
170294

171295
@class_or_instance
172296
def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
173-
pagesize=None, page=None, version=None, **kwargs):
297+
pagesize=None, page=None, version=None, **criteria):
174298
"""
175299
Given an object name, returns a list of catalog entries.
176300
See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
@@ -197,10 +321,18 @@ def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
197321
to obtain a specific page of results.
198322
version : int, optional
199323
Version number for catalogs that have versions. Default is highest version.
200-
**kwargs
324+
**criteria
201325
Catalog-specific keyword args.
202326
These can be found in the `service documentation <https://mast.stsci.edu/api/v0/_services.html>`__.
203-
for specific catalogs. For example one can specify the magtype for an HSC search.
327+
for specific catalogs. For example, one can specify the magtype for an HSC search.
328+
For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
329+
should be either an acceptable value for that parameter, or a list consisting values, or tuples of
330+
decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
331+
consisting of a list of column names. Results may also be sorted through the query with the parameter
332+
sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
333+
tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
334+
Detailed information of Catalogs.MAST criteria usage can
335+
be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
204336
205337
Returns
206338
-------
@@ -215,7 +347,7 @@ def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
215347
version=version,
216348
pagesize=pagesize,
217349
page=page,
218-
**kwargs)
350+
**criteria)
219351

220352
@class_or_instance
221353
def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria):
@@ -281,6 +413,9 @@ def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria)
281413
self._current_connection = self._service_api_connection
282414
service = catalog
283415

416+
# validate user criteria
417+
self._validate_service_criteria(catalog.lower(), **criteria)
418+
284419
if not self._current_connection.check_catalogs_criteria_params(criteria):
285420
raise InvalidQueryError("At least one non-positional criterion must be supplied.")
286421

@@ -295,26 +430,25 @@ def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria)
295430
if coordinates or objectname:
296431
service += ".Position"
297432
service += ".Rows" # Using the rowstore version of the query for speed
298-
filters = self._current_connection.build_filter_set("Mast.Catalogs.Tess.Cone",
299-
service, **criteria)
433+
column_config_name = "Mast.Catalogs.Tess.Cone"
300434
params["columns"] = "*"
301435
elif catalog.lower() == "ctl":
302436
service = "Mast.Catalogs.Filtered.Ctl"
303437
if coordinates or objectname:
304438
service += ".Position"
305439
service += ".Rows" # Using the rowstore version of the query for speed
306-
filters = self._current_connection.build_filter_set("Mast.Catalogs.Tess.Cone",
307-
service, **criteria)
440+
column_config_name = "Mast.Catalogs.Tess.Cone"
308441
params["columns"] = "*"
309442
elif catalog.lower() == "diskdetective":
310443
service = "Mast.Catalogs.Filtered.DiskDetective"
311444
if coordinates or objectname:
312445
service += ".Position"
313-
filters = self._current_connection.build_filter_set("Mast.Catalogs.Dd.Cone",
314-
service, **criteria)
446+
column_config_name = "Mast.Catalogs.Dd.Cone"
315447
else:
316448
raise InvalidQueryError("Criteria query not available for {}".format(catalog))
317449

450+
filters = self._current_connection.build_filter_set(column_config_name, service, **criteria)
451+
318452
if not filters:
319453
raise InvalidQueryError("At least one non-positional criterion must be supplied.")
320454
params["filters"] = filters

0 commit comments

Comments
 (0)