Skip to content

Commit aa35035

Browse files
authored
Merge pull request #3014 from esdc-esac-esa-int/ESA_gaia_GAIAMNGT-1700_load_data
Gaia: change the signature of the method load_data
2 parents 22db7f2 + 58f8692 commit aa35035

10 files changed

+538
-54
lines changed

CHANGES.rst

+7
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,13 @@ gaia
181181

182182
- Fix method search_async_jobs in the class TapPlus. [#2967]
183183

184+
- Change the signature of the function load_data: the parameter output_file that defined the file where the results were
185+
saved, is replaced by boolean parameter dump_to_file, that in case it is true, a compressed directory named "datalink_output.zip" with
186+
all the DataLink files is made. So the users cannot specified the output file anymore [#3014]
187+
184188
- New retrieval types for datalink (Gaia DR4 release). [#3110]
185189

190+
186191
jplhorizons
187192
^^^^^^^^^^^
188193

@@ -423,6 +428,8 @@ gaia
423428
epoch photometry service to return all data associated to a given source.
424429
[#2376]
425430

431+
- New retrieval types for datalink (Gaia DR4 release). [#3110]
432+
426433
- Default Gaia catalog updated to DR3. [#2596]
427434

428435
heasarc

astroquery/gaia/core.py

+48-37
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@
1313
Created on 30 jun. 2016
1414
Modified on 18 Ene. 2022 by mhsarmiento
1515
"""
16+
import datetime
1617
import json
1718
import os
1819
import shutil
1920
import zipfile
2021
from collections.abc import Iterable
21-
from datetime import datetime, timezone
22-
from pathlib import Path
2322

2423
from astropy import units
2524
from astropy import units as u
@@ -28,6 +27,7 @@
2827
from astropy.io import votable
2928
from astropy.table import Table
3029
from astropy.units import Quantity
30+
from astropy.utils.decorators import deprecated_renamed_argument
3131
from requests import HTTPError
3232

3333
from astroquery import log
@@ -168,9 +168,11 @@ def logout(self, *, verbose=False):
168168
except HTTPError:
169169
log.error("Error logging out data server")
170170

171+
@deprecated_renamed_argument("output_file", None, since="0.4.8")
171172
def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL",
172173
linking_parameter='SOURCE_ID', valid_data=False, band=None, avoid_datatype_check=False,
173-
format="votable", output_file=None, overwrite_output_file=False, verbose=False):
174+
format="votable", dump_to_file=False, overwrite_output_file=False, verbose=False,
175+
output_file=None):
174176
"""Loads the specified table
175177
TAP+ only
176178
@@ -218,44 +220,53 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
218220
By default, this value will be set to False. If it is set to 'true'
219221
the Datalink items tags will not be checked.
220222
format : str, optional, default 'votable'
221-
loading format. Other available formats are 'csv', 'ecsv','votable_plain' and 'fits'
222-
output_file : string or pathlib.PosixPath, optional, default None
223-
file where the results are saved.
224-
If it is not provided, the http response contents are returned.
223+
loading format. Other available formats are 'csv', 'ecsv','votable_plain', 'json' and 'fits'
224+
dump_to_file: boolean, optional, default False.
225+
If it is true, a compressed directory named "datalink_output_<time_stamp>.zip" with all the DataLink
226+
files is made in the current working directory. The <time_stamp> format follows the ISO 8601 standard:
227+
"yyyymmddThhmmss".
225228
overwrite_output_file : boolean, optional, default False
226-
To overwrite the output_file if it already exists.
229+
To overwrite the output file ("datalink_output.zip") if it already exists.
227230
verbose : bool, optional, default 'False'
228231
flag to display information about the process
229232
230233
Returns
231234
-------
232235
A dictionary where the keys are the file names and its value is a list of astropy.table.table.Table objects
233236
"""
234-
now = datetime.now(timezone.utc)
235-
now_formatted = now.strftime("%Y%m%d_%H%M%S")
236-
temp_dirname = "temp_" + now_formatted
237-
downloadname_formated = "download_" + now_formatted
238237

239238
output_file_specified = False
240-
if output_file is None:
239+
240+
now = datetime.datetime.now(datetime.timezone.utc)
241+
if not dump_to_file:
242+
now_formatted = now.strftime("%Y%m%d_%H%M%S")
243+
temp_dirname = "temp_" + now_formatted
244+
downloadname_formated = "download_" + now_formatted
241245
output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated)
246+
242247
else:
248+
output_file = 'datalink_output_' + now.strftime("%Y%m%dT%H%M%S") + '.zip'
243249
output_file_specified = True
244-
245-
if isinstance(output_file, str):
246-
if not output_file.lower().endswith('.zip'):
247-
output_file = output_file + '.zip'
248-
elif isinstance(output_file, Path):
249-
if not output_file.suffix.endswith('.zip'):
250-
output_file.with_suffix('.zip')
251-
252250
output_file = os.path.abspath(output_file)
251+
log.info(f"DataLink products will be stored in the {output_file} file")
252+
253253
if not overwrite_output_file and os.path.exists(output_file):
254254
raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='True' to "
255255
f"overwrite output file.")
256256

257257
path = os.path.dirname(output_file)
258258

259+
log.debug(f"Directory where the data will be saved: {path}")
260+
261+
if path != '':
262+
if not os.path.isdir(path):
263+
try:
264+
os.mkdir(path)
265+
except FileExistsError:
266+
log.warn("Path %s already exist" % path)
267+
except OSError:
268+
log.error("Creation of the directory %s failed" % path)
269+
259270
if avoid_datatype_check is False:
260271
# we need to check params
261272
rt = str(retrieval_type).upper()
@@ -298,14 +309,7 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
298309
if linking_parameter != 'SOURCE_ID':
299310
params_dict['LINKING_PARAMETER'] = linking_parameter
300311

301-
if path != '':
302-
try:
303-
os.mkdir(path)
304-
except FileExistsError:
305-
log.error("Path %s already exist" % path)
306-
except OSError:
307-
log.error("Creation of the directory %s failed" % path)
308-
312+
files = dict()
309313
try:
310314
self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose)
311315
files = Gaia.__get_data_files(output_file=output_file, path=path)
@@ -314,6 +318,9 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
314318
finally:
315319
if not output_file_specified:
316320
shutil.rmtree(path)
321+
else:
322+
for file in files.keys():
323+
os.remove(os.path.join(os.getcwd(), path, file))
317324

318325
if verbose:
319326
if output_file_specified:
@@ -329,18 +336,21 @@ def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retr
329336
@staticmethod
330337
def __get_data_files(output_file, path):
331338
files = {}
332-
if zipfile.is_zipfile(output_file):
333-
with zipfile.ZipFile(output_file, 'r') as zip_ref:
334-
zip_ref.extractall(os.path.dirname(output_file))
339+
extracted_files = []
340+
341+
with zipfile.ZipFile(output_file, "r") as zip_ref:
342+
extracted_files.extend(zip_ref.namelist())
343+
zip_ref.extractall(os.path.dirname(output_file))
335344

336345
# r=root, d=directories, f = files
337346
for r, d, f in os.walk(path):
338347
for file in f:
339-
if file.lower().endswith(('.fits', '.xml', '.csv', '.ecsv')):
348+
if file in extracted_files:
340349
files[file] = os.path.join(r, file)
341350

342351
for key, value in files.items():
343-
if '.fits' in key:
352+
353+
if key.endswith('.fits'):
344354
tables = []
345355
with fits.open(value) as hduList:
346356
num_hdus = len(hduList)
@@ -349,19 +359,20 @@ def __get_data_files(output_file, path):
349359
Gaia.correct_table_units(table)
350360
tables.append(table)
351361
files[key] = tables
352-
elif '.xml' in key:
362+
363+
elif key.endswith('.xml'):
353364
tables = []
354365
for table in votable.parse(value).iter_tables():
355366
tables.append(table)
356367
files[key] = tables
357368

358-
elif '.csv' in key:
369+
elif key.endswith('.csv'):
359370
tables = []
360371
table = Table.read(value, format='ascii.csv', fast_reader=False)
361372
tables.append(table)
362373
files[key] = tables
363374

364-
elif '.json' in key:
375+
elif key.endswith('.json'):
365376
tables = []
366377
with open(value) as f:
367378
data = json.load(f)

astroquery/gaia/tests/setup_package.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ def get_package_data():
1010
paths = [os.path.join('data', '*.vot'),
1111
os.path.join('data', '*.vot.gz'),
1212
os.path.join('data', '*.json'),
13-
os.path.join('data', '*.ecsv')
13+
os.path.join('data', '*.ecsv'),
14+
os.path.join('data', '*.zip')
1415
] # etc, add other extensions
1516
# you can also enlist files individually by names
1617
# finally construct and return a dict for the sub module

0 commit comments

Comments
 (0)