6
6
This module contains various methods for querying MAST collections such as catalogs.
7
7
"""
8
8
9
+ import difflib
10
+ from json import JSONDecodeError
9
11
import warnings
10
12
import os
11
13
import time
12
14
13
- from requests import HTTPError
15
+ from requests import HTTPError , RequestException
14
16
15
17
import astropy .units as u
16
18
import astropy .coordinates as coord
21
23
from ..utils .class_or_instance import class_or_instance
22
24
from ..exceptions import InvalidQueryError , MaxResultsWarning , InputWarning
23
25
24
- from . import utils
26
+ from . import utils , conf
25
27
from .core import MastQueryWithLogin
26
28
27
29
@@ -42,11 +44,13 @@ def __init__(self):
42
44
43
45
services = {"panstarrs" : {"path" : "panstarrs/{data_release}/{table}.json" ,
44
46
"args" : {"data_release" : "dr2" , "table" : "mean" }}}
47
+ self ._catalogs_mast_search_options = ['columns' , 'sort_by' , 'table' , 'data_release' ]
45
48
46
49
self ._service_api_connection .set_service_params (services , "catalogs" , True )
47
50
48
51
self .catalog_limit = None
49
52
self ._current_connection = None
53
+ self ._service_columns = dict () # Info about columns for Catalogs.MAST services
50
54
51
55
def _parse_result (self , response , * , verbose = False ):
52
56
@@ -58,9 +62,102 @@ def _parse_result(self, response, *, verbose=False):
58
62
59
63
return results_table
60
64
65
+ def _get_service_col_config (self , catalog , release = 'dr2' , table = 'mean' ):
66
+ """
67
+ For a given Catalogs.MAST catalog, return a list of all searchable columns and their descriptions.
68
+ As of now, this function is exclusive to the Pan-STARRS catalog.
69
+
70
+ Parameters
71
+ ----------
72
+ catalog : str
73
+ The catalog to be queried.
74
+ release : str, optional
75
+ Catalog data release to query from.
76
+ table : str, optional
77
+ Catalog table to query from.
78
+
79
+ Returns
80
+ -------
81
+ response : `~astropy.table.Table` that contains columns names, types, and descriptions
82
+ """
83
+ # Only supported for PanSTARRS currently
84
+ if catalog != 'panstarrs' :
85
+ return
86
+
87
+ service_key = (catalog , release , table )
88
+ if service_key not in self ._service_columns :
89
+ try :
90
+ # Send server request to get column list for given parameters
91
+ request_url = f'{ conf .catalogs_server } /api/v0.1/{ catalog } /{ release } /{ table } /metadata.json'
92
+ resp = utils ._simple_request (request_url )
93
+
94
+ # Parse JSON and extract necessary info
95
+ results = resp .json ()
96
+ rows = [
97
+ (result ['column_name' ], result ['db_type' ], result ['description' ])
98
+ for result in results
99
+ ]
100
+
101
+ # Create Table with parsed data
102
+ col_table = Table (rows = rows , names = ('name' , 'data_type' , 'description' ))
103
+ self ._service_columns [service_key ] = col_table
104
+
105
+ except JSONDecodeError as ex :
106
+ raise JSONDecodeError (f'Failed to decode JSON response while attempting to get column list'
107
+ f' for { catalog } catalog { table } , { release } : { ex } ' )
108
+ except RequestException as ex :
109
+ raise ConnectionError (f'Failed to connect to the server while attempting to get column list'
110
+ f' for { catalog } catalog { table } , { release } : { ex } ' )
111
+ except KeyError as ex :
112
+ raise KeyError (f'Expected key not found in response data while attempting to get column list'
113
+ f' for { catalog } catalog { table } , { release } : { ex } ' )
114
+ except Exception as ex :
115
+ raise RuntimeError (f'An unexpected error occurred while attempting to get column list'
116
+ f' for { catalog } catalog { table } , { release } : { ex } ' )
117
+
118
+ return self ._service_columns [service_key ]
119
+
120
+ def _validate_service_criteria (self , catalog , ** criteria ):
121
+ """
122
+ Check that criteria keyword arguments are valid column names for the service.
123
+ Raises InvalidQueryError if a criteria argument is invalid.
124
+
125
+ Parameters
126
+ ----------
127
+ catalog : str
128
+ The catalog to be queried.
129
+ **criteria
130
+ Keyword arguments representing criteria filters to apply.
131
+
132
+ Raises
133
+ -------
134
+ InvalidQueryError
135
+ If a keyword does not match any valid column names, an error is raised that suggests the closest
136
+ matching column name, if available.
137
+ """
138
+ # Ensure that self._service_columns is populated
139
+ release = criteria .get ('data_release' , 'dr2' )
140
+ table = criteria .get ('table' , 'mean' )
141
+ col_config = self ._get_service_col_config (catalog , release , table )
142
+
143
+ if col_config :
144
+ # Check each criteria argument for validity
145
+ valid_cols = list (col_config ['name' ]) + self ._catalogs_mast_search_options
146
+ for kwd in criteria .keys ():
147
+ col = next ((name for name in valid_cols if name .lower () == kwd .lower ()), None )
148
+ if not col :
149
+ closest_match = difflib .get_close_matches (kwd , valid_cols , n = 1 )
150
+ error_msg = (
151
+ f"Filter '{ kwd } ' does not exist for { catalog } catalog { table } , { release } . "
152
+ f"Did you mean '{ closest_match [0 ]} '?"
153
+ if closest_match
154
+ else f"Filter '{ kwd } ' does not exist for { catalog } catalog { table } , { release } ."
155
+ )
156
+ raise InvalidQueryError (error_msg )
157
+
61
158
@class_or_instance
62
159
def query_region_async (self , coordinates , * , radius = 0.2 * u .deg , catalog = "Hsc" ,
63
- version = None , pagesize = None , page = None , ** kwargs ):
160
+ version = None , pagesize = None , page = None , ** criteria ):
64
161
"""
65
162
Given a sky position and radius, returns a list of catalog entries.
66
163
See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
@@ -88,10 +185,18 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
88
185
Default None.
89
186
Can be used to override the default behavior of all results being returned to obtain a
90
187
specific page of results.
91
- **kwargs
188
+ **criteria
92
189
Other catalog-specific keyword args.
93
190
These can be found in the (service documentation)[https://mast.stsci.edu/api/v0/_services.html]
94
- for specific catalogs. For example one can specify the magtype for an HSC search.
191
+ for specific catalogs. For example, one can specify the magtype for an HSC search.
192
+ For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
193
+ should be either an acceptable value for that parameter, or a list consisting values, or tuples of
194
+ decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
195
+ consisting of a list of column names. Results may also be sorted through the query with the parameter
196
+ sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
197
+ tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
198
+ Detailed information of Catalogs.MAST criteria usage can
199
+ be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
95
200
96
201
Returns
97
202
-------
@@ -113,9 +218,20 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
113
218
if catalog .lower () in self ._service_api_connection .SERVICES :
114
219
self ._current_connection = self ._service_api_connection
115
220
service = catalog
221
+
222
+ # validate user criteria
223
+ self ._validate_service_criteria (catalog .lower (), ** criteria )
224
+
225
+ # adding additional user specified parameters
226
+ for prop , value in criteria .items ():
227
+ params [prop ] = value
228
+
116
229
else :
117
230
self ._current_connection = self ._portal_api_connection
118
231
232
+ # valid criteria keywords
233
+ valid_criteria = []
234
+
119
235
# Sorting out the non-standard portal service names
120
236
if catalog .lower () == "hsc" :
121
237
if version == 2 :
@@ -125,19 +241,20 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
125
241
warnings .warn ("Invalid HSC version number, defaulting to v3." , InputWarning )
126
242
service = "Mast.Hsc.Db.v3"
127
243
128
- self .catalog_limit = kwargs .get ('nr' , 50000 )
129
-
130
244
# Hsc specific parameters (can be overridden by user)
131
- params ['nr' ] = 50000
132
- params ['ni' ] = 1
133
- params ['magtype' ] = 1
245
+ self .catalog_limit = criteria .pop ('nr' , 50000 )
246
+ valid_criteria = ['nr' , 'ni' , 'magtype' ]
247
+ params ['nr' ] = self .catalog_limit
248
+ params ['ni' ] = criteria .pop ('ni' , 1 )
249
+ params ['magtype' ] = criteria .pop ('magtype' , 1 )
134
250
135
251
elif catalog .lower () == "galex" :
136
252
service = "Mast.Galex.Catalog"
137
- self .catalog_limit = kwargs .get ('maxrecords' , 50000 )
253
+ self .catalog_limit = criteria .get ('maxrecords' , 50000 )
138
254
139
255
# galex specific parameters (can be overridden by user)
140
- params ['maxrecords' ] = 50000
256
+ valid_criteria = ['maxrecords' ]
257
+ params ['maxrecords' ] = criteria .pop ('maxrecords' , 50000 )
141
258
142
259
elif catalog .lower () == "gaia" :
143
260
if version == 1 :
@@ -158,9 +275,16 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
158
275
service = "Mast.Catalogs." + catalog + ".Cone"
159
276
self .catalog_limit = None
160
277
161
- # adding additional user specified parameters
162
- for prop , value in kwargs .items ():
163
- params [prop ] = value
278
+ # additional user-specified parameters are not valid
279
+ if criteria :
280
+ key = next (iter (criteria ))
281
+ closest_match = difflib .get_close_matches (key , valid_criteria , n = 1 )
282
+ error_msg = (
283
+ f"Filter '{ key } ' does not exist for catalog { catalog } . Did you mean '{ closest_match [0 ]} '?"
284
+ if closest_match
285
+ else f"Filter '{ key } ' does not exist for catalog { catalog } ."
286
+ )
287
+ raise InvalidQueryError (error_msg )
164
288
165
289
# Parameters will be passed as JSON objects only when accessing the PANSTARRS API
166
290
use_json = catalog .lower () == 'panstarrs'
@@ -170,7 +294,7 @@ def query_region_async(self, coordinates, *, radius=0.2*u.deg, catalog="Hsc",
170
294
171
295
@class_or_instance
172
296
def query_object_async (self , objectname , * , radius = 0.2 * u .deg , catalog = "Hsc" ,
173
- pagesize = None , page = None , version = None , ** kwargs ):
297
+ pagesize = None , page = None , version = None , ** criteria ):
174
298
"""
175
299
Given an object name, returns a list of catalog entries.
176
300
See column documentation for specific catalogs `here <https://mast.stsci.edu/api/v0/pages.html>`__.
@@ -197,10 +321,18 @@ def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
197
321
to obtain a specific page of results.
198
322
version : int, optional
199
323
Version number for catalogs that have versions. Default is highest version.
200
- **kwargs
324
+ **criteria
201
325
Catalog-specific keyword args.
202
326
These can be found in the `service documentation <https://mast.stsci.edu/api/v0/_services.html>`__.
203
- for specific catalogs. For example one can specify the magtype for an HSC search.
327
+ for specific catalogs. For example, one can specify the magtype for an HSC search.
328
+ For catalogs available through Catalogs.MAST (PanSTARRS), the Column Name is the keyword, and the argument
329
+ should be either an acceptable value for that parameter, or a list consisting values, or tuples of
330
+ decorator, value pairs (decorator, value). In addition, columns may be used to select the return columns,
331
+ consisting of a list of column names. Results may also be sorted through the query with the parameter
332
+ sort_by composed of either a single Column Name to sort ASC, or a list of Column Nmaes to sort ASC or
333
+ tuples of Column Name and Direction (ASC, DESC) to indicate sort order (Column Name, DESC).
334
+ Detailed information of Catalogs.MAST criteria usage can
335
+ be found `here <https://catalogs.mast.stsci.edu/docs/index.html>`__.
204
336
205
337
Returns
206
338
-------
@@ -215,7 +347,7 @@ def query_object_async(self, objectname, *, radius=0.2*u.deg, catalog="Hsc",
215
347
version = version ,
216
348
pagesize = pagesize ,
217
349
page = page ,
218
- ** kwargs )
350
+ ** criteria )
219
351
220
352
@class_or_instance
221
353
def query_criteria_async (self , catalog , * , pagesize = None , page = None , ** criteria ):
@@ -281,6 +413,9 @@ def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria)
281
413
self ._current_connection = self ._service_api_connection
282
414
service = catalog
283
415
416
+ # validate user criteria
417
+ self ._validate_service_criteria (catalog .lower (), ** criteria )
418
+
284
419
if not self ._current_connection .check_catalogs_criteria_params (criteria ):
285
420
raise InvalidQueryError ("At least one non-positional criterion must be supplied." )
286
421
@@ -295,26 +430,25 @@ def query_criteria_async(self, catalog, *, pagesize=None, page=None, **criteria)
295
430
if coordinates or objectname :
296
431
service += ".Position"
297
432
service += ".Rows" # Using the rowstore version of the query for speed
298
- filters = self ._current_connection .build_filter_set ("Mast.Catalogs.Tess.Cone" ,
299
- service , ** criteria )
433
+ column_config_name = "Mast.Catalogs.Tess.Cone"
300
434
params ["columns" ] = "*"
301
435
elif catalog .lower () == "ctl" :
302
436
service = "Mast.Catalogs.Filtered.Ctl"
303
437
if coordinates or objectname :
304
438
service += ".Position"
305
439
service += ".Rows" # Using the rowstore version of the query for speed
306
- filters = self ._current_connection .build_filter_set ("Mast.Catalogs.Tess.Cone" ,
307
- service , ** criteria )
440
+ column_config_name = "Mast.Catalogs.Tess.Cone"
308
441
params ["columns" ] = "*"
309
442
elif catalog .lower () == "diskdetective" :
310
443
service = "Mast.Catalogs.Filtered.DiskDetective"
311
444
if coordinates or objectname :
312
445
service += ".Position"
313
- filters = self ._current_connection .build_filter_set ("Mast.Catalogs.Dd.Cone" ,
314
- service , ** criteria )
446
+ column_config_name = "Mast.Catalogs.Dd.Cone"
315
447
else :
316
448
raise InvalidQueryError ("Criteria query not available for {}" .format (catalog ))
317
449
450
+ filters = self ._current_connection .build_filter_set (column_config_name , service , ** criteria )
451
+
318
452
if not filters :
319
453
raise InvalidQueryError ("At least one non-positional criterion must be supplied." )
320
454
params ["filters" ] = filters
0 commit comments