From a56ff8bffe506fb4cd3981920dead22fb2ce5824 Mon Sep 17 00:00:00 2001 From: cjbas22 <35705452+cjbas22@users.noreply.github.com> Date: Mon, 2 May 2022 15:52:58 -0600 Subject: [PATCH] New get_pmcodes function This upload fixes issue #17. --- dataretrieval/nwis.py | 69 ++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index c871289..4209489 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -12,14 +12,16 @@ import re from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md -from .utils import query +from dataretrieval.utils import query WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/' +PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' +ALLPARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?' WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat', 'gwlevels'] -WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings'] +WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'water_use', 'ratings'] def format_response(df, service=None): @@ -436,32 +438,57 @@ def _iv(**kwargs): return _read_json(response.json()), _set_metadata(response, **kwargs) -def get_pmcodes(parameterCd='All', **kwargs): +def get_pmcodes(parameterCd = 'all', partial = True): """ - Return a DataFrame containing all NWIS parameter codes. + Returns a DataFrame containing all NWIS parameter code information. - Note: NWIS may return incorrect column names. Rename them with - - >>> df.rename(columns={key:value}) - - Parameters (Additional parameters, if supplied, will be used as query parameters). + Parameters ---------- - parameterCd: string or listlike + parameterCd: string or list + Accepts parameter codes or names + + partial: boolean + Default is True (partial querying). If False, the funciton will query only exact matches Returns: DataFrame containing the USGS parameter codes and Metadata as tuple """ - payload = {'radio_pm_search' : 'pm_search', - 'pm_group' : 'All+--+include+all+parameter+groups', - 'pm_search' : parameterCd, - 'casrn_search' : None, - 'srsname_search' : None, - 'show' : ['parameter_group_nm', 'casrn', 'srsname','parameter_units', 'parameter_nm'], - 'format' : 'rdb'} + url = PARAMCODES_URL + payload = {'fmt':'rdb'} + + if parameterCd is None: + raise TypeError('The query must include a parameter name or code') + + else: + if isinstance(parameterCd, str): # when a single code or name is given + if parameterCd.lower() == "all": # Querying all parameters (this is also the default) + payload.update({'group_cd':'%'}) + url = ALLPARAMCODES_URL + else: # this is for querying with a single parameter code or name + if partial: + parameterCd ='%{0}%'.format(parameterCd) + payload.update({'parm_nm_cd':parameterCd}) + elif isinstance(parameterCd, list): # Querying with a list of parameters names, codes, or mixed + l = [] + for param in parameterCd: + if isinstance(param, str): + if partial: + param ='%{0}%'.format(param) + payload.update({'parm_nm_cd':param}) + response = query(url, payload) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError('One of the parameter codes or names entered does not return any information,'\ + ' please try a different value. Consider using partial = True to extend query results') + l.append(_read_rdb(response.text)) + else: + raise TypeError('Parameter information (code or name) must be type string or list') + return pd.concat(l), _set_metadata(response) + else: + raise TypeError('Parameter information (code or name) must be type string or list') - payload.update(kwargs) - url = WATERDATA_URL + 'pmcodes/pmcodes' response = query(url, payload) - return _read_rdb(response.text), _set_metadata(response, **kwargs) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError('The query does not return any information, please try a different code or name. Consider using partial = True to expand query results') + return _read_rdb(response.text), _set_metadata(response) def get_water_use(years="ALL", state=None, counties="ALL", categories="ALL"): @@ -714,7 +741,7 @@ def _read_rdb(rdb): fields = re.split("[\t]", rdb.splitlines()[count]) fields = [field.replace(",", "") for field in fields] - dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float} + dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float, 'parm_cd': str, 'parameter_cd':str} df = pd.read_csv(StringIO(rdb), delimiter='\t', skiprows=count + 2, names=fields, na_values='NaN', dtype=dtypes)