diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 3f75f22..266dfda 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -11,12 +11,13 @@ from io import StringIO import re -from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md -from .utils import query +from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md, query WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/' +PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' +ALLPARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?' WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat', 'gwlevels'] WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings'] @@ -436,32 +437,54 @@ def _iv(**kwargs): return _read_json(response.json()), _set_metadata(response, **kwargs) -def get_pmcodes(parameterCd='All', **kwargs): +def get_pmcodes(parameterInfo = 'all', partial = False): """ - Return a DataFrame containing all NWIS parameter codes. + Return a DataFrame containing NWIS parameter code information. - Note: NWIS may return incorrect column names. Rename them with - - >>> df.rename(columns={key:value}) - - Parameters (Additional parameters, if supplied, will be used as query parameters). + parameterInfo accepts parameter codes or names. ---------- - parameterCd: string or listlike + parameterInfo: string or listlike + partial: default is False, can take any value. If a value is provided the function will query partial matches + Returns: DataFrame containing the USGS parameter codes and Metadata as tuple """ - payload = {'radio_pm_search' : 'pm_search', - 'pm_group' : 'All+--+include+all+parameter+groups', - 'pm_search' : parameterCd, - 'casrn_search' : None, - 'srsname_search' : None, - 'show' : ['parameter_group_nm', 'casrn', 'srsname','parameter_units', 'parameter_nm'], - 'format' : 'rdb'} + url = PARAMCODES_URL + payload = {'fmt':'rdb'} + + if parameterInfo is None: + raise TypeError('The query must include a parameter name or code') + + else: + if isinstance(parameterInfo, str): # when a single code or name is given + if parameterInfo.lower() == "all": # Querying all parameters (this is also the default) + payload.update({'group_cd':'%'}) + url = ALLPARAMCODES_URL + else: # this is for querying with a single parameter code or name + if partial: + parameterInfo ='%{0}%'.format(parameterInfo) + payload.update({'parm_nm_cd':parameterInfo}) + elif isinstance(parameterInfo, list): # Querying with a list of parameters names, codes, or mixed + l = [] + for param in parameterInfo: + if isinstance(param, str): + if partial: + param ='%{0}%'.format(param) + payload.update({'parm_nm_cd':param}) + response = query(url, payload) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError('One of the parameter code or name entered does not return any information, please try a different value') + l.append(_read_rdb(response.text)) + else: + raise TypeError('Parameter information (code or name) must be type string or list') + return pd.concat(l), _set_metadata(response) + else: + raise TypeError('Parameter information (code or name) must be type string or list') - payload.update(kwargs) - url = WATERDATA_URL + 'pmcodes/pmcodes' response = query(url, payload) - return _read_rdb(response.text), _set_metadata(response, **kwargs) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError('The query does not return any information, please try a different code or name. Consider using partial = True to enlarge query results') + return _read_rdb(response.text), _set_metadata(response) def get_water_use(years="ALL", state=None, counties="ALL", categories="ALL"): @@ -713,7 +736,7 @@ def _read_rdb(rdb): break fields = re.split("[,\t]", rdb.splitlines()[count]) - dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float} + dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float, 'parm_cd': str, 'parameter_cd':str} df = pd.read_csv(StringIO(rdb), delimiter='\t', skiprows=count + 2, names=fields, na_values='NaN', dtype=dtypes) diff --git a/dataretrieval/waterwatch.py b/dataretrieval/waterwatch.py index a3410b4..d7c9485 100644 --- a/dataretrieval/waterwatch.py +++ b/dataretrieval/waterwatch.py @@ -41,7 +41,7 @@ def get_flood_stage(sites: List[str] = None, fmt: str= "DF") -> Union[pd.DataFra 07144101 None None None None 50057000 16 20 24 30 """ - res = requests.get(url + 'floodstage', params={"format": ResponseFormat}) + res = requests.get(waterwatch_url + 'floodstage', params={"format": ResponseFormat}) if res.ok: json_res = res.json()