diff --git a/doc/config.rst b/doc/config.rst index c000590..4c29c29 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -226,4 +226,17 @@ SBE 37 MicroCAT SBE 26plus Seagauge ------------------- +For Seagauge tides: +- ``file_type``: set to ``.tid`` or ``.wb`` to specify raw data file type +- ``calculated_tide_interval``: enter the desired tide interval when using the .wb file +- ``calculated_tide_interval_units``: tide interval units +- ``calculated_tide_duration``: enter the desired tide duration when using the .wb file +- ``calculated_tide_duration_units``: tide duration units - All the _min, _max, _bad_ens, etc. options available to the EXO. + +For Seagauge waves: +- ``calculated_wave_interval``: enter the desired wave interval +- ``calculated_wave_interval_units``: wave interval units +- ``wp_min``, ``wp_max``: min/max allowable wave period, in seconds +- ``wh_min``, ``wh_max``: min/max allowable wave height, in meters +- ``wp_ratio``: maximum allowable ratio between peak period (``wp_peak``) and mean period (``wp_4060``) \ No newline at end of file diff --git a/doc/sg.rst b/doc/sg.rst deleted file mode 100644 index cc9c6cd..0000000 --- a/doc/sg.rst +++ /dev/null @@ -1,4 +0,0 @@ -Seabird SBE 26plus Seagauge -*************************** - -Starting from exported .tid file, use :doc:`runots.py ` to process using the two :doc:`configuration files `. diff --git a/doc/sgtid.rst b/doc/sgtid.rst new file mode 100644 index 0000000..fb0eddd --- /dev/null +++ b/doc/sgtid.rst @@ -0,0 +1,4 @@ +SBE 26plus Seagauge (tides) +*************************** + +Starting from exported .tid or .wb file, use :doc:`runots.py ` to process using the two :doc:`configuration files `. \ No newline at end of file diff --git a/doc/sgwvs.rst b/doc/sgwvs.rst new file mode 100644 index 0000000..742b477 --- /dev/null +++ b/doc/sgwvs.rst @@ -0,0 +1,4 @@ +SBE 26plus Seagauge (waves) +*************************** + +Starting from exported .wb file, use :doc:`runots.py ` to process using the two :doc:`configuration files `. \ No newline at end of file diff --git a/setup.py b/setup.py index 49cbef0..06af06c 100644 --- a/setup.py +++ b/setup.py @@ -80,9 +80,6 @@ "runtcmcdf2nc.py=stglib.core.runcmd:runtcmcdf2nc", "runmcasc2cdf.py=stglib.core.runcmd:runmcasc2cdf", "runmccdf2nc.py=stglib.core.runcmd:runmccdf2nc", - "runsgtid2cdf.py=stglib.core.runcmd:runsgtid2cdf", - "runsgcdf2nc.py=stglib.core.runcmd:runsgcdf2nc", - "runsgnc2waves.py=stglib.core.runcmd:runsgnc2waves", "runots.py=stglib.core.runcmd:runots", ], }, diff --git a/stglib/__init__.py b/stglib/__init__.py index b5124c1..8098cd2 100644 --- a/stglib/__init__.py +++ b/stglib/__init__.py @@ -23,5 +23,6 @@ from .aqd import aqdutils from .core import cmd, filter, qaqc, utils, waves from .core.utils import read_globalatts +from .sg import sgutils __version__ = _version.get_versions()["version"] diff --git a/stglib/core/cmd.py b/stglib/core/cmd.py index 5b3427a..8d635b7 100644 --- a/stglib/core/cmd.py +++ b/stglib/core/cmd.py @@ -120,9 +120,13 @@ def runots_parser(): addinst2cdf(instsp, "asc2cdf") addcdf2nc(instsp) - instsp = add_instrument(subparsers, "sg", "Seabird Seagauge") + instsp = add_instrument(subparsers, "sgtid", "Seabird Seagauge Tides") addinst2cdf(instsp, "tid2cdf") addcdf2nc(instsp) + + instsp = add_instrument(subparsers, "sgwvs", "Seabird Seagauge Waves") + addinst2cdf(instsp, "wb2cdf") + addcdf2nc(instsp) addnc2waves(instsp) instsp = add_instrument(subparsers, "tcm", "Lowell Tilt Current Meter") @@ -543,29 +547,3 @@ def mccdf2nc_parser(): cdfarg(parser) return parser - - -def sgtid2cdf_parser(): - description = "Convert SBE 26plus Seagauge .tid files to raw .cdf format. Run this script from the directory containing Seagauge files." - parser = argparse.ArgumentParser(description=description) - gattsarg(parser) - yamlarg(parser) - - return parser - - -def sgcdf2nc_parser(): - description = "Convert raw SBE 26plus Seagauge .cdf format to processed .nc files, optionally compensating for atmospheric pressure" - parser = argparse.ArgumentParser(description=description) - cdfarg(parser) - atmarg(parser) - - return parser - - -def sgnc2waves_parser(): - description = "Generate SBE 26plus Seagauge waves statistics file" - parser = argparse.ArgumentParser(description=description) - ncarg(parser) - - return parser diff --git a/stglib/core/runcmd.py b/stglib/core/runcmd.py index 4abdc12..29804e0 100644 --- a/stglib/core/runcmd.py +++ b/stglib/core/runcmd.py @@ -349,32 +349,12 @@ def runmcasc2cdf(args=None): stglib.mc.asc_to_cdf(metadata) -def runsgcdf2nc(args=None): - if not args: - args = stglib.cmd.sgcdf2nc_parser().parse_args() - - run_cdf_to_nc(stglib.sg.cdf_to_nc, args) - - -def runsgtid2cdf(args=None): - if not args: - args = stglib.cmd.sgtid2cdf_parser().parse_args() - - metadata = get_metadata(args) - - stglib.sg.tid_to_cdf(metadata) - - -def runsgnc2waves(args=None): - if not args: - args = stglib.cmd.sgnc2waves_parser().parse_args() - - stglib.sg.nc_to_waves(args.ncname) - - def runots(): args = stglib.cmd.runots_parser().parse_args() + if "2cdf" in args.step: + metadata = get_metadata(args) + if args.instrument == "aqd": if args.step == "hdr2cdf": runaqdhdr2cdf(args) @@ -446,13 +426,18 @@ def runots(): runmcasc2cdf(args) elif args.step == "cdf2nc": runmccdf2nc(args) - elif args.instrument == "sg": + elif args.instrument == "sgtid": if args.step == "tid2cdf": - runsgtid2cdf(args) + stglib.sg.tid2cdf.tid_to_cdf(metadata) + elif args.step == "cdf2nc": + run_cdf_to_nc(stglib.sg.cdf2nc.cdf_to_nc, args) + elif args.instrument == "sgwvs": + if args.step == "wb2cdf": + stglib.sg.wvswb2cdf.wb_to_cdf(metadata) elif args.step == "cdf2nc": - runsgcdf2nc(args) + run_cdf_to_nc(stglib.sg.wvscdf2nc.cdf_to_nc, args) elif args.step == "nc2waves": - runsgnc2waves(args) + stglib.sg.wvsnc2waves.nc_to_waves(args.ncname) elif args.instrument == "tcm": if args.step == "csv2cdf": runtcmcsv2cdf(args) diff --git a/stglib/core/utils.py b/stglib/core/utils.py index dca54fc..0fa0d61 100644 --- a/stglib/core/utils.py +++ b/stglib/core/utils.py @@ -1398,3 +1398,30 @@ def _todict(matobj): else: dic[strg] = elem return dic + + +def create_water_level_var(ds): + """ + Create water level variable from NAVD88 sensor height + """ + + if ds.z.attrs["geopotential_datum_name"] == "NAVD88": + + if "sample" in ds.dims: + ds["water_level"] = xr.DataArray( + ds["P_1ac"].squeeze().mean(dim="sample") + ds["z"].values + ) + else: + ds["water_level"] = ds["P_1ac"] + ds["z"].values + + ds["water_level"].attrs["long_name"] = "Water level NAVD88" + ds["water_level"].attrs["units"] = "m" + ds["water_level"].attrs[ + "standard_name" + ] = "sea_surface_height_above_geopotential_datum" + ds["water_level"].attrs["geopotential_datum_name"] = "NAVD88" + else: + print( + "Cannot create water_level variable without height_above_geopotential_datum relative to NAVD88 in global attributes file." + ) + return ds diff --git a/stglib/sg/__init__.py b/stglib/sg/__init__.py new file mode 100644 index 0000000..0970263 --- /dev/null +++ b/stglib/sg/__init__.py @@ -0,0 +1 @@ +from . import cdf2nc, sgutils, tid2cdf, wvscdf2nc, wvsnc2waves, wvswb2cdf diff --git a/stglib/sg/cdf2nc.py b/stglib/sg/cdf2nc.py new file mode 100644 index 0000000..3163ceb --- /dev/null +++ b/stglib/sg/cdf2nc.py @@ -0,0 +1,208 @@ +import math + +import numpy as np +import pandas as pd +import xarray as xr + +from ..core import utils +from . import sgutils + + +def cdf_to_nc(cdf_filename, atmpres=None): + """ + Load a raw .cdf file and generate a processed .nc file + """ + + # Check for atmpres correction file + # Atmpres file is required for seagauge because pressure is measured as absolute pressure + if atmpres is None: + raise FileNotFoundError( + "The atmpres file does not exist. Atmpres file is required for Seagauge because pressure is measured as absolute pressure." + ) + + # Load raw .cdf data + ds = xr.open_dataset(cdf_filename) + + # Atmospheric pressure correction + if ds.attrs["file_type"] == ".tid": + ds = utils.atmos_correct(ds, atmpres) + elif ds.attrs["file_type"] == ".wb": + ds = sgutils.atmos_correct_burst(ds, atmpres) + + # If .wb file, split into tide bursts + if ds.attrs["file_type"] == ".wb": + ds = avg_tide_bursts(ds) + + # Rename variables to CF compliant names + ds = ds_rename_vars(ds) + + # Add attributes + ds = sgutils.ds_add_attrs(ds) + + # Edit metadata depending on file type + if ds.attrs["file_type"] == ".tid": + ds = ds_drop_tid(ds) + ds = ds.drop("sample") # Drop sample variable + elif ds.attrs["file_type"] == ".wb": + ds = ds_drop_wb(ds) + + # Call QAQC + ds = sgutils.sg_qaqc(ds) + + # Run utilities + ds = utils.clip_ds(ds) + ds = utils.create_nominal_instrument_depth(ds) + ds = utils.create_z(ds) + ds = utils.create_water_level_var(ds) + ds = utils.create_water_depth_var(ds) + ds = utils.ds_add_lat_lon(ds) + ds = utils.add_start_stop_time(ds) + ds = utils.add_delta_t(ds) + ds = utils.add_min_max(ds) + + # Write to .nc file + print("Writing cleaned/trimmed data to .nc file") + nc_filename = ds.attrs["filename"] + "s-tide-a.nc" + + ds.to_netcdf( + nc_filename, unlimited_dims=["time"], encoding={"time": {"dtype": "i4"}} + ) + utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"]) + + print(f"Done writing netCDF file {nc_filename}") + + +def ds_rename_vars(ds): + """ + Rename variables to be CF compliant + """ + varnames = {"Temp": "T_28"} + + # Check to make sure they exist before trying to rename + newvars = {} + for k in varnames: + if k in ds: + newvars[k] = varnames[k] + return ds.rename(newvars) + + +def ds_drop_tid(ds): + """ + Drop global attribute metadata not needed for .tid file + """ + gatts = [ + "WaveInterval", + "WaveIntervalUnits", + "WaveSamples", + "sample_rate", + "sample_rate_units", + "BurstDuration", + "BurstDurationUnits", + "WaveBurstsPerDay", + "NumberOfWaveBursts", + ] + + # Check to make sure they exist + for k in gatts: + if k in ds.attrs: + del ds.attrs[k] + return ds + + +def ds_drop_wb(ds): + """ + Drop global attribute metadata not needed for .wb file + """ + gatts = [ + "WaveInterval", + "WaveIntervalUnits", + "WaveSamples", + "sample_rate", + "sample_rate_units", + "BurstDuration", + "BurstDurationUnits", + "WaveBurstsPerDay", + "NumberOfWaveBursts", + "TideInterval", + "TideIntervalUnits", + "TideDuration", + "TideDurationUnits", + "TideSamplesPerDay", + "NumberOfTideMeasurements", + ] + + # Check to make sure they exist + for k in gatts: + if k in ds.attrs: + del ds.attrs[k] + return ds + + +def avg_tide_bursts(ds): + + # Calculate how many rows to subdivide each wave burst (round up) + rows = math.ceil( + float(ds.attrs["BurstDuration"]) / float(ds.attrs["calculated_tide_interval"]) + ) + + # Calculate how many columns to subdivide each wave burst + cols = int( + float(ds.attrs["calculated_tide_interval"]) * float(ds.attrs["sample_rate"]) + ) + + # Calculate number of values to average based on tide duration + values_avg = int( + float(ds.attrs["calculated_tide_duration"]) * float(ds.attrs["sample_rate"]) + ) + + # Define time interval + delta_t = int(ds.attrs["calculated_tide_interval"]) + delta_t = f"{delta_t}s" + + # Reshape P_1 and P_1ac pressure bursts and average + P1_avg = [] + P1ac_avg = [] + timestamp = [] + for t in range(0, ds["time"].size): + + # Make timestamp + new_time = np.array( + pd.date_range(start=ds.time[t].values, periods=rows, freq=delta_t) + ) + timestamp.append(new_time) + + # Reshape adding nans to end of array + no_pads = rows * cols - int(ds.attrs["WaveSamples"]) + new_P1_burst = np.pad( + ds.P_1[t].values, (0, no_pads), mode="constant", constant_values=np.nan + ).reshape(rows, cols) + new_P1ac_burst = np.pad( + ds.P_1ac[t].values, (0, no_pads), mode="constant", constant_values=np.nan + ).reshape(rows, cols) + + # Average burst for tide duration + for j in range(0, new_P1_burst.shape[0]): + avg_pres_P1 = np.mean(new_P1_burst[j, slice(0, values_avg)]) + avg_pres_P1ac = np.mean(new_P1ac_burst[j, slice(0, values_avg)]) + P1_avg.append(avg_pres_P1) + P1ac_avg.append(avg_pres_P1ac) + + # Append all timestamps to array + date = np.concatenate(timestamp) + + # Copy global attributes + attr_copy = ds.attrs.copy() + + # Delete old xarray because no longer need burst data + del ds + + # Make new xarray with averaged data + ds = xr.Dataset( + data_vars=dict(P_1=(["time"], P1_avg), P_1ac=(["time"], P1ac_avg)), + coords=dict(time=date), + ) + + # Add back global attributes + ds.attrs = attr_copy + + return ds diff --git a/stglib/sg.py b/stglib/sg/sgutils.py similarity index 65% rename from stglib/sg.py rename to stglib/sg/sgutils.py index 1e31d8d..d86176a 100644 --- a/stglib/sg.py +++ b/stglib/sg/sgutils.py @@ -1,222 +1,10 @@ +import math + +import numpy as np import pandas as pd import xarray as xr -from .core import filter, qaqc, utils - - -def read_tid(filnam, encoding="utf-8"): - """Read data from an SBE 26plus Seagauge .tid file into an xarray dataset.""" - df = pd.read_csv( - filnam, - header=None, - sep=r"\s+", - names=["Sample", "Date", "Time", "P_1", "Temp"], - parse_dates={"time": ["Date", "Time"]}, - encoding=encoding, - index_col=False, - ) - df.set_index("time", inplace=True) - sg = df.to_xarray() - return sg - - -def tid_to_cdf(metadata): - """ - Load a raw .tid and .hex file and generate a .cdf file - """ - basefile = metadata["basefile"] - - # Get metadata from .hex file - hexmeta = read_hex(basefile + ".hex") - - # Append to metadata variable - metadata.update(hexmeta) - - # Read in data - ds = read_tid(basefile + ".tid") - - # Convert pressure from psia to dbar - ds["P_1"] = ds.P_1 / 14.503773800722 * 10 - - ds = utils.write_metadata(ds, metadata) - - ds = utils.ensure_cf(ds) - - cdf_filename = ds.attrs["filename"] + "-raw.cdf" - - ds.to_netcdf(cdf_filename, unlimited_dims=["time"]) - - print(f"Finished writing data to {cdf_filename}") - - return ds - - -def cdf_to_nc(cdf_filename, atmpres=None): - """ - Load a raw .cdf file and generate a processed .nc file - """ - - # Load raw .cdf data - ds = xr.open_dataset(cdf_filename) - - # remove units in case we change and we can use larger time steps - ds.time.encoding.pop("units") - - # Drop sample variable - ds = ds.drop_vars("Sample") - - # Rename variables to CF compliant names - ds = ds_rename_vars(ds) - - # Atmospheric pressure correction - if atmpres is not None: - ds = atmos_correct(ds, atmpres) - - # Add attributes - ds = ds_add_attrs(ds) - - # Call QAQC - ds = sg_qaqc(ds) - - # Run utilities - ds = utils.clip_ds(ds) - ds = utils.ds_add_lat_lon(ds) - ds = utils.create_nominal_instrument_depth(ds) - ds = utils.create_z(ds) - ds = utils.add_start_stop_time(ds) - ds = utils.add_min_max(ds) - ds = utils.add_delta_t(ds) - - # Write to .nc file - print("Writing cleaned/trimmed data to .nc file") - nc_filename = ds.attrs["filename"] + "-a.nc" - - ds.to_netcdf( - nc_filename, unlimited_dims=["time"], encoding={"time": {"dtype": "i4"}} - ) - utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"]) - - print(f"Done writing netCDF file {nc_filename}") - - -def ds_rename_vars(ds): - """ - Rename variables to be CF compliant - """ - varnames = {"Temp": "T_28"} - - # Check to make sure they exist before trying to rename - newvars = {} - for k in varnames: - if k in ds: - newvars[k] = varnames[k] - return ds.rename(newvars) - - -def ds_add_attrs(ds): - """ - Add attributes: units, standard name from CF website, long names - """ - ds = utils.ds_coord_no_fillvalue(ds) - - ds["time"].attrs.update( - {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"} - ) - - if "T_28" in ds: - ds["T_28"].attrs.update( - { - "units": "degree_C", - "standard_name": "sea_water_temperature", - "long_name": "Temperature", - } - ) - if "P_1" in ds: - ds["P_1"].attrs.update( - { - "units": "dbar", - "long_name": "Uncorrected pressure", - "standard_name": "sea_water_pressure", - } - ) - if "P_1ac" in ds: - ds["P_1ac"].attrs.update( - { - "units": "dbar", - "long_name": "Corrected pressure", - "standard_name": "sea_water_pressure_due_to_sea_water", - } - ) - return ds - - -def sg_qaqc(ds): - """ - QA/QC - Trim Seagauge data based on metadata - """ - - varlist = ["T_28", "P_1", "P_1ac"] - - [varlist.append(k) for k in ds.data_vars if k not in varlist] - - for var in varlist: - # check if any filtering before other qaqc - ds = filter.apply_butter_filt(ds, var) - ds = filter.apply_med_filt(ds, var) - - ds = qaqc.trim_min(ds, var) - - ds = qaqc.trim_max(ds, var) - - ds = qaqc.trim_min_diff(ds, var) - - ds = qaqc.trim_min_diff_pct(ds, var) - - ds = qaqc.trim_max_diff(ds, var) - - ds = qaqc.trim_max_diff_pct(ds, var) - - ds = qaqc.trim_med_diff(ds, var) - - ds = qaqc.trim_med_diff_pct(ds, var) - - ds = qaqc.trim_bad_ens(ds, var) - - for var in varlist: - ds = qaqc.trim_by_any( - ds, var - ) # re-run and trim by other variables as necessary - - return ds - - -def atmos_correct(ds, atmpres): - met = xr.load_dataset(atmpres) - # need to save attrs before the subtraction, otherwise they are lost - attrs = ds["P_1"].attrs - # need to set a tolerance since we can be off by a couple seconds somewhere - # TODO is this still needed? - ds["P_1ac"] = xr.DataArray( - ds["P_1"] - - met["atmpres"].reindex_like(ds["P_1"], method="nearest", tolerance="5s") - - met["atmpres"].attrs["offset"] - ) - ds["P_1ac"].attrs = attrs - - ds.attrs["atmospheric_pressure_correction_file"] = atmpres - ds.attrs["atmospheric_pressure_correction_offset_applied"] = met["atmpres"].attrs[ - "offset" - ] - - histtext = f"Atmospherically corrected using time-series from {atmpres} and offset of {met['atmpres'].offset}" - - ds = utils.insert_history(ds, histtext) - - # Also add it as a note - ds["P_1ac"].attrs["note"] = histtext - - return ds +from ..core import filter, qaqc, utils def read_hex(filnam): @@ -235,7 +23,7 @@ def read_hex(filnam): col = row.split() hexmeta["InstrumentType"] = col[0][1:] + " " + col[1] hexmeta["FirmwareVersion"] = col[3] - hexmeta["InstrumentSerialNumber"] = col[5] + hexmeta["serial_number"] = col[5] elif "quartz pressure sensor" in row: col = row.split() hexmeta["PressureSensorSerial"] = col[6][:-1] @@ -252,8 +40,8 @@ def read_hex(filnam): elif "wave samples/burst" in row: col = row.split() hexmeta["WaveSamples"] = col[0][1:] - hexmeta["WaveSampleRate"] = col[4] - hexmeta["WaveSampleRateUnits"] = col[5][:-1] + hexmeta["sample_rate"] = col[4] + hexmeta["sample_rate_units"] = col[5][:-1] hexmeta["BurstDuration"] = col[8] hexmeta["BurstDurationUnits"] = col[9] elif "tide samples/day" in row: @@ -336,3 +124,203 @@ def read_hex(filnam): hexmeta["TemperatureCalibrationTA3"] = float(col[3]) f.close() return hexmeta + + +def read_wb(filnam, encoding="utf-8"): + """Read data from an SBE 26plus Seagauge .wb file into an xarray dataset.""" + + burst_no = [] + start_time = [] + values = [] + pressure = [] + + with open(filnam) as file: + for line in file: + if "SBE" in line: + continue + elif "*" in line: + col = line.split() + burst_no.append(int(col[1])) + start_time.append(int(col[2])) + sample_no = int(col[4]) + sample_rows = math.floor( + sample_no / 4 + ) # Take total number of samples in each burst and divide by 4 columns to get number of rows to iterate + + # Loop through rows for each burst number + for j in range(sample_rows): + + row = file.readline().rstrip() + col = row.split() + values.append(col) + + # Convert to numpy float and reshape to 1 row + burst = np.float64(values) + burst = np.reshape(burst, (1, -1)) + + # Convert back to list to append bursts + burst = burst.tolist() + pressure.append(burst) + + # Clear values for next burst + values = [] + file.close() + + # Convert pressure to numpy array + pressure = np.float64(pressure) + pressure = pressure.squeeze() + + # Convert time + start_time = int_to_date(np.array(start_time)) + + # Create sample variable + sample = list(range(1, sample_no + 1)) + + # Make xarray + sg = xr.Dataset( + data_vars=dict( + burst_number=(["time"], burst_no), + sample=(["sample"], sample), + P_1=(["time", "sample"], pressure), + ), + coords=dict(time=start_time), + ) + + return sg + + +def int_to_date(int_time): + """Read integer time in seconds since January 1, 2000 and convert to datetime""" + + dt = pd.Timestamp("2000-01-01T00:00:00") + pd.to_timedelta(int_time, unit="s") + + # This gave me a nanosecond precision error + # t0 = np.datetime64("2000-01-01T00:00:00") + # dt = t0 + int_time.astype("timedelta64[s]") + + return dt + + +def atmos_correct_burst(ds, atmpres): + met = xr.load_dataset(atmpres) + pressure = [] + + # Apply the correction for each burst in turn + ds["P_1ac"] = xr.full_like(ds["P_1"], np.nan) + for burst in ds.burst_number: + burst_pres = ( + ds["P_1"][burst].values + - met["atmpres"][burst].values + - met["atmpres"].offset + ) + burst_pres = np.reshape(burst_pres, (1, -1)) + + # Convert back to list to append bursts + burst_pres = burst_pres.tolist() + pressure.append(burst_pres) + + # Convert to xarray + pressure = xr.DataArray.squeeze( + xr.DataArray(pressure, dims=["time", "one", "sample"], name="P_1ac") + ) + ds = xr.merge([ds, pressure]) + + ds = utils.insert_history( + ds, + f"Atmospherically correcting using time-series from {atmpres} and offset of {met['atmpres'].offset}", + ) + ds.attrs["atmospheric_pressure_correction_file"] = atmpres + ds.attrs["atmospheric_pressure_correction_offset_applied"] = met["atmpres"].attrs[ + "offset" + ] + if "comment" in met["atmpres"].attrs: + ds.attrs["atmospheric_pressure_correction_comment"] = met["atmpres"].attrs[ + "comment" + ] + + return ds + + +def ds_add_attrs(ds): + """ + Add attributes: units, standard name from CF website, long names + """ + ds = utils.ds_coord_no_fillvalue(ds) + + ds["time"].attrs.update( + {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"} + ) + + if "T_28" in ds: + ds["T_28"].attrs.update( + { + "units": "degree_C", + "standard_name": "sea_water_temperature", + "long_name": "Temperature", + } + ) + if "P_1" in ds: + ds["P_1"].attrs.update( + { + "units": "dbar", + "long_name": "Uncorrected pressure", + "standard_name": "sea_water_pressure", + } + ) + if "P_1ac" in ds: + ds["P_1ac"].attrs.update( + { + "units": "dbar", + "long_name": "Corrected pressure", + "standard_name": "sea_water_pressure_due_to_sea_water", + } + ) + if "sample" in ds: + ds["sample"].attrs.update( + { + "units": "1", + "long_name": "sample number", + } + ) + return ds + + +def sg_qaqc(ds): + """ + QA/QC + Trim Seagauge data based on metadata + """ + + varlist = ["T_28", "P_1", "P_1ac"] + + [varlist.append(k) for k in ds.data_vars if k not in varlist] + + for var in varlist: + # check if any filtering before other qaqc + ds = filter.apply_butter_filt(ds, var) + ds = filter.apply_med_filt(ds, var) + + ds = qaqc.trim_min(ds, var) + + ds = qaqc.trim_max(ds, var) + + ds = qaqc.trim_min_diff(ds, var) + + ds = qaqc.trim_min_diff_pct(ds, var) + + ds = qaqc.trim_max_diff(ds, var) + + ds = qaqc.trim_max_diff_pct(ds, var) + + ds = qaqc.trim_med_diff(ds, var) + + ds = qaqc.trim_med_diff_pct(ds, var) + + ds = qaqc.trim_bad_ens(ds, var) + + for var in varlist: + ds = qaqc.trim_by_any( + ds, var + ) # re-run and trim by other variables as necessary + + return ds diff --git a/stglib/sg/tid2cdf.py b/stglib/sg/tid2cdf.py new file mode 100644 index 0000000..74b7c90 --- /dev/null +++ b/stglib/sg/tid2cdf.py @@ -0,0 +1,55 @@ +import pandas as pd + +from ..core import utils +from . import sgutils + + +def read_tid(filnam, encoding="utf-8"): + """Read data from an SBE 26plus Seagauge .tid file into an xarray dataset.""" + df = pd.read_csv( + filnam, + header=None, + sep=r"\s+", + names=["sample", "Date", "Time", "P_1", "Temp"], + parse_dates={"time": ["Date", "Time"]}, + encoding=encoding, + index_col=False, + ) + df.set_index("time", inplace=True) + sg = df.to_xarray() + return sg + + +def tid_to_cdf(metadata): + """ + Load a raw .tid (or .wb with config.yaml arguments) and .hex file and generate a .cdf file + """ + basefile = metadata["basefile"] + file_type = metadata["file_type"] + + # Get metadata from .hex file + hexmeta = sgutils.read_hex(basefile + ".hex") + + # Append to metadata variable + metadata.update(hexmeta) + + # Read in data + if file_type == ".tid": + ds = read_tid(basefile + file_type) + elif file_type == ".wb": + ds = sgutils.read_wb(basefile + file_type) + + # Convert pressure from psia to dbar + ds["P_1"] = ds.P_1 / 14.503773800722 * 10 + + ds = utils.write_metadata(ds, metadata) + + ds = utils.ensure_cf(ds) + + cdf_filename = ds.attrs["filename"] + "-tide-raw.cdf" + + ds.to_netcdf(cdf_filename, unlimited_dims=["time"]) + + print(f"Finished writing data to {cdf_filename}") + + return ds diff --git a/stglib/sg/wvscdf2nc.py b/stglib/sg/wvscdf2nc.py new file mode 100644 index 0000000..e1c6175 --- /dev/null +++ b/stglib/sg/wvscdf2nc.py @@ -0,0 +1,80 @@ +import xarray as xr + +from ..core import utils +from . import sgutils + + +def cdf_to_nc(cdf_filename, atmpres=None): + """ + Load a raw .cdf file and generate a processed .nc file + """ + + # Check for atmpres correction file + # Atmpres file is required for seagauge because pressure is measured as absolute pressure + if atmpres is None: + raise FileNotFoundError( + "The atmpres file does not exist. Atmpres file is required for Seagauge because pressure is measured as absolute pressure." + ) + + # Load raw .cdf data + ds = xr.open_dataset(cdf_filename) + + # remove units in case we change and we can use larger time steps + ds.time.encoding.pop("units") + + # Add sample_interval to metadata (Convert Hertz to sample interval in seconds) + ds.attrs["sample_interval"] = 1 / float(ds.attrs["sample_rate"]) + + # Atmospheric pressure correction + ds = sgutils.atmos_correct_burst(ds, atmpres) + + # Drop variables + ds = ds.drop("burst_number") + + # Edit metadata + ds = ds_drop_meta(ds) + + # Add attributes + ds = sgutils.ds_add_attrs(ds) + + # Call QAQC + ds = sgutils.sg_qaqc(ds) + + # Run utilities + ds = utils.clip_ds(ds) + ds = utils.create_nominal_instrument_depth(ds) + ds = utils.create_z(ds) + ds = utils.ds_add_lat_lon(ds) + ds = utils.add_start_stop_time(ds) + ds = utils.add_min_max(ds) + + # Write to .nc file + print("Writing cleaned/trimmed data to .nc file") + nc_filename = ds.attrs["filename"] + "b-cal.nc" + + ds.to_netcdf( + nc_filename, unlimited_dims=["time"], encoding={"time": {"dtype": "i4"}} + ) + utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"]) + + print(f"Done writing netCDF file {nc_filename}") + + +def ds_drop_meta(ds): + """ + Drop global attribute metadata not needed for .wb file + """ + gatts = [ + "TideInterval", + "TideIntervalUnits", + "TideDuration", + "TideDurationUnits", + "TideSamplesPerDay", + "NumberOfTideMeasurements", + ] + + # Check to make sure they exist + for k in gatts: + if k in ds.attrs: + del ds.attrs[k] + return ds diff --git a/stglib/sg/wvsnc2waves.py b/stglib/sg/wvsnc2waves.py new file mode 100644 index 0000000..cc53c70 --- /dev/null +++ b/stglib/sg/wvsnc2waves.py @@ -0,0 +1,110 @@ +import numpy as np +import pandas as pd +import xarray as xr + +from ..core import qaqc, utils, waves + + +def nc_to_waves(nc_filename): + """ + Process burst data to wave statistics + """ + + ds = xr.open_dataset(nc_filename) + + # Check to see if need to make smaller wave bursts from really long wave bursts + if "calculated_wave_interval" in ds.attrs: + # Divide large burst into smaller bursts at specified calculated_wave_interval + ds = make_wave_bursts(ds) + + spec = waves.make_waves_ds(ds) + + for k in ["wp_peak", "wh_4061", "wp_4060", "pspec"]: + ds[k] = spec[k] + + ds = utils.create_water_level_var(ds) + ds = utils.create_water_depth_var(ds) + + # Drop unneeded variables/attributes + for k in ["P_1", "P_1ac", "sample", "T_28"]: + if k in ds: + ds = ds.drop_vars(k) + + ds = qaqc.drop_vars(ds) + + ds = utils.trim_max_wp(ds) + + ds = utils.trim_min_wh(ds) + + ds = utils.trim_max_wh(ds) + + ds = utils.trim_wp_ratio(ds) + + # Add attrs + ds = utils.ds_add_wave_attrs(ds) + + # assign min/max (need to do this after trimming): + ds = utils.add_min_max(ds) + + nc_filename = ds.attrs["filename"] + "w-a.nc" + + ds.to_netcdf(nc_filename, unlimited_dims=["time"]) + utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"]) + print("Done writing netCDF file", nc_filename) + + return ds + + +def make_wave_bursts(ds): + ds.attrs["samples_per_burst"] = int( + ds.attrs["calculated_wave_interval"] / ds.attrs["sample_interval"] + ) + + # Calculate how many rows to subdivide each burst + rows = float(ds.attrs["BurstDuration"]) / float( + ds.attrs["calculated_wave_interval"] + ) + + # Define time interval + delta_t = int(ds.attrs["calculated_wave_interval"]) + delta_t = f"{delta_t}s" + + timestamp = [] + for t in range(0, ds["time"].size): + + # Make timestamp + new_time = np.array( + pd.date_range(start=ds.time[t].values, periods=rows, freq=delta_t) + ) + timestamp.append(new_time) + + # Append all timestamps to array + ds["date"] = np.concatenate(timestamp) + + # new_date_rng = pd.date_range(start=ds.time[0].values, end=last_time, freq=delta_t) + + # ds["new_time"] = xr.DataArray(new_date_rng, dims="new_time") + + ds["new_sample"] = xr.DataArray( + range(ds.attrs["samples_per_burst"]), dims="new_sample" + ) + + for v in ["P_1", "P_1ac"]: + if v in ds: + attrsbak = ds[v].attrs + ds[v] = xr.DataArray( + np.reshape(ds[v].values, (-1, int(ds.attrs["samples_per_burst"]))), + dims=["date", "new_sample"], + ) + ds[v].attrs = attrsbak + + ds = ds.rename({"time": "timeold"}) + ds = ds.drop("timeold") + + ds = ds.rename({"sample": "sampleold"}) + ds = ds.drop("sampleold") + + ds = ds.rename({"new_sample": "sample"}) + ds = ds.rename({"date": "time"}) + + return ds diff --git a/stglib/sg/wvswb2cdf.py b/stglib/sg/wvswb2cdf.py new file mode 100644 index 0000000..122fbd4 --- /dev/null +++ b/stglib/sg/wvswb2cdf.py @@ -0,0 +1,33 @@ +from ..core import utils +from . import sgutils + + +def wb_to_cdf(metadata): + """ + Load a raw .wb and .hex file and generate a .cdf file + """ + basefile = metadata["basefile"] + + # Get metadata from .hex file + hexmeta = sgutils.read_hex(basefile + ".hex") + + # Append to metadata variable + metadata.update(hexmeta) + + # Read in data + ds = sgutils.read_wb(basefile + ".wb") + + # Convert pressure from psia to dbar + ds["P_1"] = ds.P_1 / 14.503773800722 * 10 + + ds = utils.write_metadata(ds, metadata) + + ds = utils.ensure_cf(ds) + + cdf_filename = ds.attrs["filename"] + "-waves-raw.cdf" + + ds.to_netcdf(cdf_filename, unlimited_dims=["time"]) + + print(f"Finished writing data to {cdf_filename}") + + return ds diff --git a/stglib/tests/data/.gitattributes b/stglib/tests/data/.gitattributes index dec9581..70d8f1d 100644 --- a/stglib/tests/data/.gitattributes +++ b/stglib/tests/data/.gitattributes @@ -113,3 +113,5 @@ glob_att1126_msl.txt filter=lfs diff=lfs merge=lfs -text mc3575.asc filter=lfs diff=lfs merge=lfs -text 11264sg_raw2.tid filter=lfs diff=lfs merge=lfs -text 11264sg_raw2.hex filter=lfs diff=lfs merge=lfs -text +11264sg_raw2.wb filter=lfs diff=lfs merge=lfs -text +11264sg-atmpres.cdf filter=lfs diff=lfs merge=lfs -text diff --git a/stglib/tests/data/11264sg-atmpres.cdf b/stglib/tests/data/11264sg-atmpres.cdf new file mode 100644 index 0000000..6d5d3cb --- /dev/null +++ b/stglib/tests/data/11264sg-atmpres.cdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148f5cc4908f98a0255c44ac3e1ac720a49500956b93607fe98b6f5ba525cadc +size 12005 diff --git a/stglib/tests/data/11264sg_config.yaml b/stglib/tests/data/11264sg_config.yaml index 9647235..cb14253 100644 --- a/stglib/tests/data/11264sg_config.yaml +++ b/stglib/tests/data/11264sg_config.yaml @@ -1,5 +1,13 @@ basefile: 11264sg_raw2 +file_type: .wb filename: 11264sg LatLonDatum: NAD83 -initial_instrument_height: 0.123 -initial_instrument_height_note: height above seabed +initial_instrument_height: 1.44 +initial_instrument_height_note: height above seabed in meters +calculated_tide_interval: 720 +calculated_tide_interval_units: seconds +calculated_tide_duration: 272 +calculated_tide_duration_units: seconds +calculated_wave_interval: 1024 +calculated_wave_interval_units: seconds +wh_min: 0.02 diff --git a/stglib/tests/data/11264sg_raw2.wb b/stglib/tests/data/11264sg_raw2.wb new file mode 100644 index 0000000..faa9aae --- /dev/null +++ b/stglib/tests/data/11264sg_raw2.wb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a2c58c0820631961ff9a483af10150dcbfb07eeb2212efe643f8460e76093f +size 156953671 diff --git a/stglib/tests/test_scripts.py b/stglib/tests/test_scripts.py index 889890d..e62a5f8 100644 --- a/stglib/tests/test_scripts.py +++ b/stglib/tests/test_scripts.py @@ -493,18 +493,15 @@ def test_mc(): def sg_raw(glob_att, config_yaml): result = subprocess.run( - [scripts / "runsgtid2cdf.py", glob_att, config_yaml], + [scripts / "runots.py", "sgtid", "tid2cdf", glob_att, config_yaml], capture_output=True, cwd=cwd, ) assert "Finished writing data" in result.stdout.decode("utf8") -def sg_nc(nc_file, atmpres=None): - if atmpres is not None: - runlist = [scripts / "runsgcdf2nc.py", nc_file, "--atmpres", atmpres] - else: - runlist = [scripts / "runsgcdf2nc.py", nc_file] +def sg_nc(nc_file, atmpres): + runlist = [scripts / "runots.py", "sgtid", "cdf2nc", nc_file, "--atmpres", atmpres] result = subprocess.run( runlist, capture_output=True, @@ -515,4 +512,38 @@ def sg_nc(nc_file, atmpres=None): def test_sg(): sg_raw("sg_glob_att1126.txt", "11264sg_config.yaml") - sg_nc("11264sg-raw.cdf") + sg_nc("11264sg-tide-raw.cdf", "11264sg-atmpres.cdf") + + +def sg_wv_raw(glob_att, config_yaml): + result = subprocess.run( + [scripts / "runots.py", "sgwvs", "wb2cdf", glob_att, config_yaml], + capture_output=True, + cwd=cwd, + ) + assert "Finished writing data" in result.stdout.decode("utf8") + + +def sg_wv_nc(nc_file, atmpres): + runlist = [scripts / "runots.py", "sgwvs", "cdf2nc", nc_file, "--atmpres", atmpres] + result = subprocess.run( + runlist, + capture_output=True, + cwd=cwd, + ) + assert "Done writing netCDF file" in result.stdout.decode("utf8") + + +def sg_wv_wvs(nc_file): + result = subprocess.run( + [scripts / "runots.py", "sgwvs", "nc2waves", nc_file], + capture_output=True, + cwd=cwd, + ) + assert "Done writing netCDF file" in result.stdout.decode("utf8") + + +def test_sg_wvs(): + sg_wv_raw("sg_glob_att1126.txt", "11264sg_config.yaml") + sg_wv_nc("11264sg-waves-raw.cdf", "11264sg-atmpres.cdf") + sg_wv_wvs("11264sgb-cal.nc")