Skip to content

Commit

Permalink
DOC: Clarify dependency on file name extension
Browse files Browse the repository at this point in the history
Closes issue #68
  • Loading branch information
MartinThoma committed Jun 23, 2020
1 parent fee57e1 commit 2bf4151
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,6 @@ prints `1375.31 Euro`

### IO

* Download files with `mpu.io.download(source, sink)`
* Read CSV, JSON and pickle with `mpu.io.read(filepath)`
* Write CSV, JSON and pickle with `mpu.io.write(filepath, data)`
* Download files with [`mpu.io.download(source, sink)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.download).
* Read CSV, JSON and pickle with [`mpu.io.read(filepath)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.write).
* Write CSV, JSON and pickle with [`mpu.io.write(filepath, data)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.read)
25 changes: 18 additions & 7 deletions mpu/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pickle
import platform
from datetime import datetime
from typing import Dict, List

# First party
from mpu.datastructures import EList
Expand All @@ -31,14 +32,15 @@ def read(filepath: str, **kwargs):
filepath : str
Path to the file that should be read. This methods action depends
mainly on the file extension.
kwargs : dict
kwargs : Dict
Any keywords for the specific file format. For CSV, this is
'delimiter', 'quotechar', 'skiprows', 'format'
Returns
-------
data : Union[str, bytes] or other (e.g. format=dicts)
"""
supported_formats = [".csv", ".json", ".jsonl", ".pickle"]
if filepath.lower().endswith(".csv"):
return _read_csv(filepath, kwargs)
elif filepath.lower().endswith(".json"):
Expand Down Expand Up @@ -66,10 +68,14 @@ def read(filepath: str, **kwargs):
" as a guide how to use it."
)
else:
raise NotImplementedError("File '{}' is not known.".format(filepath))
raise NotImplementedError(
f"File '{filepath}' does not end with one "
f"of the supported file name extensions. "
f"Supported are: {supported_formats}"
)


def _read_csv(filepath: str, kwargs):
def _read_csv(filepath: str, kwargs: Dict):
"""See documentation of mpu.io.read."""
if "delimiter" not in kwargs:
kwargs["delimiter"] = ","
Expand Down Expand Up @@ -100,7 +106,7 @@ def _read_csv(filepath: str, kwargs):
return data


def _read_jsonl(filepath: str, kwargs):
def _read_jsonl(filepath: str, kwargs) -> List:
"""See documentation of mpu.io.read."""
with open(filepath) as data_file:
data = [json.loads(line, **kwargs) for line in data_file if len(line) > 0]
Expand All @@ -121,7 +127,8 @@ def write(filepath: str, data, **kwargs):
----------
filepath : str
Path to the file that should be read. This methods action depends
mainly on the file extension.
mainly on the file extension. Make sure that it ends in .csv, .json,
.jsonl, or .pickle.
data : dict or list
Content that should be written
kwargs : dict
Expand All @@ -131,6 +138,7 @@ def write(filepath: str, data, **kwargs):
-------
data : str or bytes
"""
supported_formats = [".csv", ".json", ".jsonl", ".pickle"]
if filepath.lower().endswith(".csv"):
return _write_csv(filepath, data, kwargs)
elif filepath.lower().endswith(".json"):
Expand All @@ -149,12 +157,15 @@ def write(filepath: str, data, **kwargs):
)
elif filepath.lower().endswith(".h5") or filepath.lower().endswith(".hdf5"):
raise NotImplementedError(
"YAML is not supported. See "
"HDF5 is not supported. See "
"https://stackoverflow.com/a/41586571/562769"
" as a guide how to use it."
)
else:
raise NotImplementedError("File '{}' is not known.".format(filepath))
raise NotImplementedError(
f"File '{filepath}' does not end in one of the "
f"supported formats. Supported are: {supported_formats}"
)


def _write_csv(filepath: str, data, kwargs):
Expand Down

0 comments on commit 2bf4151

Please sign in to comment.