Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing the get_pmcode error #18

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 44 additions & 21 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
from io import StringIO
import re

from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md
from .utils import query
from dataretrieval.utils import to_str, format_datetime, update_merge, set_metadata as set_md, query

WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/'
WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/'
WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/'
PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?'
ALLPARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?'

WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat', 'gwlevels']
WATERDATA_SERVICES = ['qwdata', 'measurements', 'peaks', 'pmcodes', 'water_use', 'ratings']
Expand Down Expand Up @@ -436,32 +437,54 @@ def _iv(**kwargs):
return _read_json(response.json()), _set_metadata(response, **kwargs)


def get_pmcodes(parameterCd='All', **kwargs):
def get_pmcodes(parameterInfo = 'all', partial = False):
"""
Return a DataFrame containing all NWIS parameter codes.
Return a DataFrame containing NWIS parameter code information.

Note: NWIS may return incorrect column names. Rename them with

>>> df.rename(columns={key:value})

Parameters (Additional parameters, if supplied, will be used as query parameters).
parameterInfo accepts parameter codes or names.
----------
parameterCd: string or listlike
parameterInfo: string or listlike
partial: default is False, can take any value. If a value is provided the function will query partial matches

Returns:
DataFrame containing the USGS parameter codes and Metadata as tuple
"""
payload = {'radio_pm_search' : 'pm_search',
'pm_group' : 'All+--+include+all+parameter+groups',
'pm_search' : parameterCd,
'casrn_search' : None,
'srsname_search' : None,
'show' : ['parameter_group_nm', 'casrn', 'srsname','parameter_units', 'parameter_nm'],
'format' : 'rdb'}
url = PARAMCODES_URL
payload = {'fmt':'rdb'}

if parameterInfo is None:
raise TypeError('The query must include a parameter name or code')

else:
if isinstance(parameterInfo, str): # when a single code or name is given
if parameterInfo.lower() == "all": # Querying all parameters (this is also the default)
payload.update({'group_cd':'%'})
url = ALLPARAMCODES_URL
else: # this is for querying with a single parameter code or name
if partial:
parameterInfo ='%{0}%'.format(parameterInfo)
payload.update({'parm_nm_cd':parameterInfo})
elif isinstance(parameterInfo, list): # Querying with a list of parameters names, codes, or mixed
l = []
for param in parameterInfo:
if isinstance(param, str):
if partial:
param ='%{0}%'.format(param)
payload.update({'parm_nm_cd':param})
response = query(url, payload)
if len(response.text.splitlines()) < 10: # empty query
raise TypeError('One of the parameter code or name entered does not return any information, please try a different value')
l.append(_read_rdb(response.text))
else:
raise TypeError('Parameter information (code or name) must be type string or list')
return pd.concat(l), _set_metadata(response)
else:
raise TypeError('Parameter information (code or name) must be type string or list')

payload.update(kwargs)
url = WATERDATA_URL + 'pmcodes/pmcodes'
response = query(url, payload)
return _read_rdb(response.text), _set_metadata(response, **kwargs)
if len(response.text.splitlines()) < 10: # empty query
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check a response code instead?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can, the url just returns the same header with no information when there is nothing to return.

raise TypeError('The query does not return any information, please try a different code or name. Consider using partial = True to enlarge query results')
return _read_rdb(response.text), _set_metadata(response)


def get_water_use(years="ALL", state=None, counties="ALL", categories="ALL"):
Expand Down Expand Up @@ -713,7 +736,7 @@ def _read_rdb(rdb):
break

fields = re.split("[,\t]", rdb.splitlines()[count])
dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float}
dtypes = {'site_no': str, 'dec_long_va': float, 'dec_lat_va': float, 'parm_cd': str, 'parameter_cd':str}

df = pd.read_csv(StringIO(rdb), delimiter='\t', skiprows=count + 2,
names=fields, na_values='NaN', dtype=dtypes)
Expand Down
2 changes: 1 addition & 1 deletion dataretrieval/waterwatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_flood_stage(sites: List[str] = None, fmt: str= "DF") -> Union[pd.DataFra
07144101 None None None None
50057000 16 20 24 30
"""
res = requests.get(url + 'floodstage', params={"format": ResponseFormat})
res = requests.get(waterwatch_url + 'floodstage', params={"format": ResponseFormat})

if res.ok:
json_res = res.json()
Expand Down