DestinE-Climate-DT
diff --git a/‎CHANGELOG.md
+4 b/‎CHANGELOG.md
+4
diff --git a/‎aqua/cli/main.py
+11-9 b/‎aqua/cli/main.py
+11-9
diff --git a/‎aqua/lra_generator/lra_generator.py
+132-43 b/‎aqua/lra_generator/lra_generator.py
+132-43
@@ -15,10 +15,14 @@ Attention: this version is not compatible with catalog entries with ecCodes >= 2
 2. ecCodes temporarily restricted to < 2.34
 
 AQUA core complete list:
+- Refactor the fdb-catalog-generator tool to work with data-portfolio repository (#1275)
+- Introduce a function to convert NetCDF to Zarr and zarr catalog entry for LRA (#1068)
+- Suppress the warning of missing catalogs in the AQUA console `add` command (#1288)
 - Lumi installation is completely updated to LUMI/23.09 modules (#1290)
 - gsv_intake switches eccodes also for shortname definitions (#1279)
 - Increase compatibility between LRA generator and multi-catalog (#1278)
 - Allow for intake string replacement within LRA-generated catalogs (#1278)
+- Avoid warning for missing intake variable default when calling the `Reader()` (#1287)
 
 AQUA diagnostic complete list:
 - Teleconnections: catalog feature bugfix (#1276)
 
@@ -62,12 +62,12 @@ def execute(self):
 
         # Set the log level
         if args.very_verbose or (args.verbose and args.very_verbose):
-            loglevel = 'DEBUG'
+            self.loglevel = 'DEBUG'
         elif args.verbose:
-            loglevel = 'INFO'
+            self.loglevel = 'INFO'
         else:
-            loglevel = 'WARNING'
-        self.logger = log_configure(loglevel, 'AQUA')
+            self.loglevel = 'WARNING'
+        self.logger = log_configure(self.loglevel, 'AQUA')
 
         command = args.command
         method = self.command_map.get(command, parser_dict['main'].print_help)
@@ -229,7 +229,7 @@ def _install_default_diagnostics(self, diagnostic_type):
                 sys.exit(1)
 
             # Ensure the target directory exists using create_folder
-            create_folder(target_directory, loglevel="WARNING")
+            create_folder(target_directory, loglevel=self.loglevel)
 
             if not os.path.exists(target_file):
                 self.logger.debug('Copying from %s to %s', source_file, target_file)
@@ -267,7 +267,7 @@ def _install_editable_diagnostics(self, diagnostic_type, editable):
                 sys.exit(1)
 
             # Ensure the target directory exists using create_folder
-            create_folder(target_directory, loglevel="WARNING")
+            create_folder(target_directory, loglevel=self.loglevel)
 
             if not os.path.exists(target_file):
                 self.logger.debug('Linking from %s to %s', source_file, target_file)
@@ -389,7 +389,7 @@ def add(self, args):
             args (argparse.Namespace): arguments from the command line
         """
         print('Adding the AQUA catalog', args.catalog)
-        self._check()
+        self._check(silent=True)
 
         if args.editable is not None:
             self._add_catalog_editable(args.catalog, args.editable)
@@ -564,10 +564,12 @@ def remove_file(self, args):
                               kind, file)
             sys.exit(1)
 
-    def _check(self):
+    def _check(self, silent=False):
         """check installation"""
+
+        checklevel = 'ERROR' if silent else self.loglevel
         try:
-            self.configpath = ConfigPath().configdir
+            self.configpath = ConfigPath(loglevel=checklevel).configdir
             self.configfile = os.path.join(self.configpath, 'config-aqua.yaml')
             self.logger.debug('AQUA found in %s', self.configpath)
         except FileNotFoundError:
 
@@ -19,10 +19,8 @@
 from aqua.util import create_folder, generate_random_string
 from aqua.util import dump_yaml, load_yaml
 from aqua.util import ConfigPath, file_is_complete
-from aqua.lra_generator.lra_util import move_tmp_files, replace_intake_vars
-
-
-#from aqua.lra_generator.lra_util import check_correct_ifs_fluxes
+from aqua.util import create_zarr_reference
+from aqua.lra_generator.lra_util import move_tmp_files, list_lra_files_complete, replace_intake_vars
 
 
 class LRAgenerator():
@@ -94,47 +92,51 @@ def __init__(self,
             self.logger.warning('IMPORTANT: no file will be created, this is a dry run')
 
         self.nproc = int(nproc)
-        self.tmpdir = tmpdir
+        if tmpdir is None:
+            self.logger.warning('No tmpdir specifield, will use outdir')
+            self.tmpdir = os.path.join(outdir, 'tmp')
+        else:
+            self.tmpdir = tmpdir
+
         if self.dask:
             self.logger.info('Running dask.distributed with %s workers', self.nproc)
-            if not self.tmpdir:
-                raise KeyError('Please specify tmpdir for dask.distributed.')
 
         self.tmpdir = os.path.join(self.tmpdir, 'LRA_' +
-                                    generate_random_string(10))
+                                   generate_random_string(10))
 
-        if model:
+        # safechecks
+        if model is not None:
             self.model = model
         else:
             raise KeyError('Please specify model.')
 
-        if exp:
+        if exp is not None:
             self.exp = exp
         else:
             raise KeyError('Please specify experiment.')
 
-        if source:
+        if source is not None:
             self.source = source
         else:
             raise KeyError('Please specify source.')
 
+        if var is not None:
+            self.var = var
+        else:
+            raise KeyError('Please specify variable string or list.')
+
+        if resolution is not None:
+            self.resolution = resolution
+        else:
+            raise KeyError('Please specify resolution.')
+        self.logger.info('Variable(s) to be processed: %s', self.var)
+
         self.kwargs = kwargs
 
         Configurer = ConfigPath(configdir=configdir)
         self.configdir = Configurer.configdir
         self.catalog = catalog
 
-        # Initialize variable(s)
-        self.var = var
-
-        if not self.var:
-            raise KeyError('Please specify variable string or list.')
-        self.logger.info('Variable(s) to be processed: %s', self.var)
-
-        self.resolution = resolution
-        if not self.resolution:
-            raise KeyError('Please specify resolution.')
-
         self.frequency = frequency
         if not self.frequency:
             self.logger.info('Frequency not specified, no time averagin will be performed.')
@@ -144,7 +146,7 @@ def __init__(self,
             'units': 'days since 1850-01-01 00:00:00',
             'calendar': 'standard',
             'dtype': 'float64'}
-        
+
         self.var_encoding = {
             'dtype': 'float64',
             'zlib': True,
@@ -162,7 +164,10 @@ def __init__(self,
         self.last_record = None
         self.check = False
 
-        # Create LRA folder
+        # Create LRA folders
+        if outdir is None:
+            raise KeyError('Please specify outdir.')
+
         self.outdir = os.path.join(outdir, self.model, self.exp, self.resolution)
 
         if self.frequency:
@@ -205,7 +210,7 @@ def retrieve(self):
 
         self.logger.info('Retrieving data...')
         self.data = self.reader.retrieve(var=self.var)
-        
+
         self.logger.debug(self.data)
 
     def generate_lra(self):
@@ -223,10 +228,10 @@ def generate_lra(self):
 
         else:  # Only one variable
             self._write_var(self.var)
-                
+
         self.logger.info('Move tmp files to output directory')
         move_tmp_files(self.tmpdir, self.outdir)
-            
+
         # Cleaning
         self.data.close()
         self._close_dask()
@@ -282,6 +287,97 @@ def create_catalog_entry(self):
         # dump the update file
         dump_yaml(outfile=catalogfile, cfg=cat_file)
 
+    def create_zarr_entry(self, verify=True):
+        """
+        Create a Zarr entry in the catalog for the LRA
+
+        Args:
+            verify: open the LRA source and verify it can be read by the reader
+        """
+
+        entry_name = f'lra-{self.resolution}-{self.frequency}-zarr'
+        full_dict, partial_dict = list_lra_files_complete(self.outdir)
+        # full_dict, partial_dict = list_lra_files_vars(self.outdir)
+        self.logger.info('Creating zarr files for %s %s %s', self.model, self.exp, entry_name)
+
+        # extra zarr only directory
+        zarrdir = os.path.join(self.outdir, 'zarr')
+        create_folder(zarrdir)
+
+        # this dictionary based structure is an overkill but guarantee flexibility
+        urlpath = []
+        for key, value in full_dict.items():
+            jsonfile = os.path.join(zarrdir, f'lra-yearly-{key}.json')
+            self.logger.debug('Creating zarr files for full files %s', key)
+            if value:
+                jsonfile = create_zarr_reference(value, jsonfile, loglevel=self.loglevel)
+                if jsonfile is not None:
+                    urlpath = urlpath + [f'reference::{jsonfile}']
+
+        for key, value in partial_dict.items():
+            jsonfile = os.path.join(zarrdir, f'lra-monthly-{key}.json')
+            self.logger.debug('Creating zarr files for partial files %s', key)
+            if value:
+                jsonfile = create_zarr_reference(value, jsonfile, loglevel=self.loglevel)
+                if jsonfile is not None:
+                    urlpath = urlpath + [f'reference::{jsonfile}']
+
+        if not urlpath:
+            raise FileNotFoundError('No files found to create zarr reference')
+
+        # apply intake replacement: works on string need to loop on the list
+        for index, value in enumerate(urlpath):
+            urlpath[index] = replace_intake_vars(catalog=self.catalog, path=value)
+
+        # load, add the block and close
+        catalogfile = os.path.join(self.configdir, 'catalogs', self.catalog,
+                                   'catalog', self.model, self.exp + '.yaml')
+        cat_file = load_yaml(catalogfile)
+
+        # if entry exists
+        if entry_name in cat_file['sources']:
+
+            self.logger.info('Catalog entry for %s %s %s exists, updating the urlpath only...',
+                             self.model, self.exp, entry_name)
+            cat_file['sources'][entry_name]['args']['urlpath'] = urlpath
+
+        else:
+            self.logger.info('Creating zarr catalog entry %s %s %s', self.model, self.exp, entry_name)
+
+            # define the block to be uploaded into the catalog
+            block_cat = {
+                'driver': 'zarr',
+                'description': f'LRA data {self.frequency} at {self.resolution} reference on zarr',
+                'args': {
+                    'consolidated': False,
+                    'combine': 'by_coords',
+                    'urlpath': urlpath
+                },
+                'metadata': {
+                    'source_grid_name': 'lon-lat',
+                },
+                'fixer_name': False
+            }
+            cat_file['sources'][entry_name] = block_cat
+
+        dump_yaml(outfile=catalogfile, cfg=cat_file)
+
+        # verify the zarr entry makes sense
+        if verify:
+            self.logger.info('Verifying that zarr entry can be loaded...')
+            try:
+                reader = Reader(model=self.model, exp=self.exp, source='lra-r100-monthly-zarr')
+                data = reader.retrieve()
+                self.logger.info('Zarr entry successfully created!!!')
+            except (KeyError, ValueError) as e:
+                self.logger.error('Cannot load zarr LRA with error --> %s', e)
+                self.logger.error('Zarr source is not accessible by the Reader likely due to irregular amount of NetCDF file')
+                self.logger.error('To avoid issues in the catalog, the entry will be removed')
+                self.logger.error('In case you want to keep it, please run with verify=False')
+                cat_file = load_yaml(catalogfile)
+                del cat_file['sources'][entry_name]
+                dump_yaml(outfile=catalogfile, cfg=cat_file)
+
     def _set_dask(self):
         """
         Set up dask cluster
@@ -319,15 +415,15 @@ def _concat_var_year(self, var, year):
         from the same year
         """
 
-        #infiles = os.path.join(self.outdir,
+        # infiles = os.path.join(self.outdir,
         #                       f'{var}_{self.exp}_{self.resolution}_{self.frequency}_{year}??.nc')
         infiles = self.get_filename(var, year, month = '??')
         if len(glob.glob(infiles)) == 12:
             xfield = xr.open_mfdataset(infiles)
             self.logger.info('Creating a single file for %s, year %s...', var, str(year))
             outfile = self.get_filename(var, year)
-            #outfile = os.path.join(self.tmpdir,
-            #                       f'{var}_{self.exp}_{self.resolution}_{self.frequency}_{year}.nc')
+            # outfile = os.path.join(self.tmpdir,
+            #                        f'{var}_{self.exp}_{self.resolution}_{self.frequency}_{year}.nc')
             # clean older file
             if os.path.exists(outfile):
                 os.remove(outfile)
@@ -338,7 +434,6 @@ def _concat_var_year(self, var, year):
                 self.logger.info('Cleaning %s...', infile)
                 os.remove(infile)
 
-
     def get_filename(self, var, year=None, month=None, tmp=False):
         """Create output filenames"""
 
@@ -437,7 +532,7 @@ def _remove_regridded(self, data):
     #                 continue
     #             else:
     #                 self.logger.warning('Monthly file %s already exists, overwriting as requested...', outfile)
-            
+
     #         # real writing
     #         if self.definitive:
     #             self.write_chunk(temp_data, outfile)
@@ -480,7 +575,7 @@ def _write_var_catalog(self, var):
         for year in years:
 
             self.logger.info('Processing year %s...', str(year))
-            yearfile = self.get_filename(var, year = year)
+            yearfile = self.get_filename(var, year=year)
 
             # checking if file is there and is complete
             filecheck = file_is_complete(yearfile, loglevel=self.loglevel)
@@ -498,7 +593,7 @@ def _write_var_catalog(self, var):
                 months = [months[0]]
             for month in months:
                 self.logger.info('Processing month %s...', str(month))
-                outfile = self.get_filename(var, year = year, month = month)
+                outfile = self.get_filename(var, year=year, month=month)
 
                 # checking if file is there and is complete
                 filecheck = file_is_complete(outfile, loglevel=self.loglevel)
@@ -511,12 +606,9 @@ def _write_var_catalog(self, var):
 
                 month_data = year_data.sel(time=year_data.time.dt.month == month)
 
-                #self.logger.debug(month_data.mean().values)
-                #self.logger.debug(month_data)
-
                 # real writing
                 if self.definitive:
-                    tmpfile = self.get_filename(var, year = year, month = month, tmp = True)
+                    tmpfile = self.get_filename(var, year=year, month=month, tmp=True)
                     schunk = time()
                     self.write_chunk(month_data, tmpfile)
                     tchunk = time() - schunk
@@ -537,7 +629,7 @@ def _write_var_catalog(self, var):
     def write_chunk(self, data, outfile):
         """Write a single chunk of data - Xarray Dataset - to a specific file
         using dask if required and monitoring the progress"""
-        
+
         # update data attributes for history
         if self.frequency:
             log_history(data, f'regridded from {self.reader.src_grid_name} to {self.resolution} and from frequency {self.reader.orig_freq} to {self.frequency} through LRA generator')                
@@ -576,13 +668,10 @@ def write_chunk(self, data, outfile):
                 avg_mem = np.mean(array_data[:, 1])/1e9
                 max_mem = np.max(array_data[:, 1])/1e9
                 self.logger.info('Avg memory used: %.2f GiB, Peak memory used: %.2f GiB', avg_mem, max_mem)
-                
+
         else:
             with ProgressBar():
                 write_job.compute()
 
         del write_job
         self.logger.info('Writing file %s successfull!', outfile)
-
-
-