DOC: Clarify dependency on file name extension

Closes issue #68
MartinThoma · Jun 23, 2020 · 2bf4151 · 2bf4151
1 parent fee57e1
commit 2bf4151
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -94,6 +94,6 @@ prints `1375.31 Euro`
 
 ### IO
 
-* Download files with `mpu.io.download(source, sink)`
-* Read CSV, JSON and pickle with `mpu.io.read(filepath)`
-* Write CSV, JSON and pickle with `mpu.io.write(filepath, data)`
+* Download files with [`mpu.io.download(source, sink)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.download).
+* Read CSV, JSON and pickle with [`mpu.io.read(filepath)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.write).
+* Write CSV, JSON and pickle with [`mpu.io.write(filepath, data)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.read)
diff --git a/mpu/io.py b/mpu/io.py
@@ -11,6 +11,7 @@
 import pickle
 import platform
 from datetime import datetime
+from typing import Dict, List
 
 # First party
 from mpu.datastructures import EList
@@ -31,14 +32,15 @@ def read(filepath: str, **kwargs):
     filepath : str
         Path to the file that should be read. This methods action depends
         mainly on the file extension.
-    kwargs : dict
+    kwargs : Dict
         Any keywords for the specific file format. For CSV, this is
         'delimiter', 'quotechar', 'skiprows', 'format'
 
     Returns
     -------
     data : Union[str, bytes] or other (e.g. format=dicts)
     """
+    supported_formats = [".csv", ".json", ".jsonl", ".pickle"]
     if filepath.lower().endswith(".csv"):
         return _read_csv(filepath, kwargs)
     elif filepath.lower().endswith(".json"):
@@ -66,10 +68,14 @@ def read(filepath: str, **kwargs):
             " as a guide how to use it."
         )
     else:
-        raise NotImplementedError("File '{}' is not known.".format(filepath))
+        raise NotImplementedError(
+            f"File '{filepath}' does not end with one "
+            f"of the supported file name extensions. "
+            f"Supported are: {supported_formats}"
+        )
 
 
-def _read_csv(filepath: str, kwargs):
+def _read_csv(filepath: str, kwargs: Dict):
     """See documentation of mpu.io.read."""
     if "delimiter" not in kwargs:
         kwargs["delimiter"] = ","
@@ -100,7 +106,7 @@ def _read_csv(filepath: str, kwargs):
     return data
 
 
-def _read_jsonl(filepath: str, kwargs):
+def _read_jsonl(filepath: str, kwargs) -> List:
     """See documentation of mpu.io.read."""
     with open(filepath) as data_file:
         data = [json.loads(line, **kwargs) for line in data_file if len(line) > 0]
@@ -121,7 +127,8 @@ def write(filepath: str, data, **kwargs):
     ----------
     filepath : str
         Path to the file that should be read. This methods action depends
-        mainly on the file extension.
+        mainly on the file extension. Make sure that it ends in .csv, .json,
+        .jsonl, or .pickle.
     data : dict or list
         Content that should be written
     kwargs : dict
@@ -131,6 +138,7 @@ def write(filepath: str, data, **kwargs):
     -------
     data : str or bytes
     """
+    supported_formats = [".csv", ".json", ".jsonl", ".pickle"]
     if filepath.lower().endswith(".csv"):
         return _write_csv(filepath, data, kwargs)
     elif filepath.lower().endswith(".json"):
@@ -149,12 +157,15 @@ def write(filepath: str, data, **kwargs):
         )
     elif filepath.lower().endswith(".h5") or filepath.lower().endswith(".hdf5"):
         raise NotImplementedError(
-            "YAML is not supported. See "
+            "HDF5 is not supported. See "
             "https://stackoverflow.com/a/41586571/562769"
             " as a guide how to use it."
         )
     else:
-        raise NotImplementedError("File '{}' is not known.".format(filepath))
+        raise NotImplementedError(
+            f"File '{filepath}' does not end in one of the "
+            f"supported formats. Supported are: {supported_formats}"
+        )
 
 
 def _write_csv(filepath: str, data, kwargs):