From 32cc2674056b08937d0d0d5e749a79160098ac28 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 8 Jan 2024 17:43:32 +0100
Subject: [PATCH 01/38] First, preliminary decoupling

---
 caterva2/api.py                | 19 +++----
 caterva2/api_utils.py          | 51 ++++++++++++++++++
 caterva2/clients/cli.py        | 30 +++++------
 caterva2/services/bro.py       |  3 +-
 caterva2/services/pub.py       |  8 +--
 caterva2/services/srv_utils.py | 40 +++++++++++++++
 caterva2/services/sub.py       | 65 ++++++++++++++++++++---
 caterva2/tests/test_api.py     |  2 +-
 caterva2/utils.py              | 94 +++++++++-------------------------
 9 files changed, 204 insertions(+), 108 deletions(-)
 create mode 100644 caterva2/api_utils.py
 create mode 100644 caterva2/services/srv_utils.py

diff --git a/caterva2/api.py b/caterva2/api.py
index e2745ecc..9e912718 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -8,7 +8,7 @@
 ###############################################################################
 import pathlib
 
-from caterva2 import utils
+from caterva2 import api_utils
 
 
 # Defaults
@@ -37,18 +37,19 @@ def slice_to_string(indexes):
 
 
 def get_roots(host=sub_host_default):
-    return utils.get(f'http://{host}/api/roots')
+    return api_utils.get(f'http://{host}/api/roots')
+
 
 class Root:
     def __init__(self, name, host=sub_host_default):
         self.name = name
         self.host = host
-        ret = utils.post(f'http://{host}/api/subscribe/{name}')
+        ret = api_utils.post(f'http://{host}/api/subscribe/{name}')
         if ret != 'Ok':
             roots = get_roots(host)
             raise ValueError(f'Could not subscribe to root {name}'
                              f' (only {roots.keys()} available)')
-        self.node_list = utils.get(f'http://{host}/api/list/{name}')
+        self.node_list = api_utils.get(f'http://{host}/api/list/{name}')
 
     def __repr__(self):
         return f'<Root: {self.name}>'
@@ -80,7 +81,7 @@ def download(self, index=None):
             path = self.path.with_suffix('')
             path = pathlib.Path(f'{path}[{slice}]{suffix}')
             params = {'slice': slice_}
-        array, schunk = utils.download(self.host, path, urlpath=path, params=params)
+        array, schunk = api_utils.download(self.host, path, localpath=path, params=params)
 
         if suffix not in {'.b2frame', '.b2nd'}:
             with open(path, 'wb') as f:
@@ -93,16 +94,12 @@ def download(self, index=None):
 class Dataset(File):
     def __init__(self, name, root, host):
         super().__init__(name, root, host)
-        self.json = utils.get(f'http://{host}/api/info/{self.path}')
+        self.json = api_utils.get(f'http://{host}/api/info/{self.path}')
 
     def __repr__(self):
         return f'<Dataset: {self.path}>'
 
     def __getitem__(self, indexes):
         slice_ = slice_to_string(indexes)
-        array, schunk = utils.download(self.host, self.path, {'slice': slice_})
-        if array is not None:
-            data = array[:] if array.ndim > 0 else array[()]
-        else:
-            data = schunk[:]  # byte string
+        data = api_utils.fetch_data(self.host, self.path, {'slice': slice_})
         return data
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
new file mode 100644
index 00000000..4f915cfe
--- /dev/null
+++ b/caterva2/api_utils.py
@@ -0,0 +1,51 @@
+###############################################################################
+# Caterva2 - On demand access to remote Blosc2 data repositories
+#
+# Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
+# https://www.blosc.org
+# License: GNU Affero General Public License v3.0
+# See LICENSE.txt for details about copyright and rights to use.
+###############################################################################
+import pickle
+
+# Requirements
+import httpx
+
+# To remove
+
+
+def parse_slice(string):
+    if not string:
+        return ()
+    obj = []
+    for segment in string.split(','):
+        if ':' not in segment:
+            segment = int(segment)
+        else:
+            segment = [int(x) if x else None for x in segment.split(':')]
+            segment = slice(*segment)
+        obj.append(segment)
+
+    return tuple(obj)
+
+
+def fetch_data(host, dataset, params):
+    data = get(f'http://{host}/api/fetch_data/{dataset}', params=params)
+    # data = zlib.decompress(data)
+    return pickle.loads(data)
+
+#
+# HTTP client helpers
+#
+def get(url, params=None, headers=None, timeout=5, model=None):
+    response = httpx.get(url, params=params, headers=headers, timeout=timeout)
+    response.raise_for_status()
+    json = response.json()
+    return json if model is None else model(**json)
+
+
+def post(url, json=None):
+    response = httpx.post(url, json=json)
+    response.raise_for_status()
+    return response.json()
+
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index ebbdd97d..53832b99 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -16,7 +16,7 @@
 import rich
 
 # Project
-from caterva2 import utils, models
+from caterva2 import utils, api_utils, models
 
 
 def handle_errors(func):
@@ -50,7 +50,7 @@ def url_with_slice(url, slice):
 
 @handle_errors
 def cmd_roots(args):
-    data = utils.get(f'http://{args.host}/api/roots')
+    data = api_utils.get(f'http://{args.host}/api/roots')
     if args.json:
         print(json.dumps(data))
         return
@@ -64,7 +64,7 @@ def cmd_roots(args):
 
 @handle_errors
 def cmd_subscribe(args):
-    data = utils.post(f'http://{args.host}/api/subscribe/{args.root}')
+    data = api_utils.post(f'http://{args.host}/api/subscribe/{args.root}')
     if args.json:
         print(json.dumps(data))
         return
@@ -73,7 +73,7 @@ def cmd_subscribe(args):
 
 @handle_errors
 def cmd_list(args):
-    data = utils.get(f'http://{args.host}/api/list/{args.root}')
+    data = api_utils.get(f'http://{args.host}/api/list/{args.root}')
     if args.json:
         print(json.dumps(data))
         return
@@ -83,7 +83,7 @@ def cmd_list(args):
 
 @handle_errors
 def cmd_url(args):
-    data = utils.get(f'http://{args.host}/api/url/{args.root}')
+    data = api_utils.get(f'http://{args.host}/api/url/{args.root}')
     if args.json:
         print(json.dumps(data))
         return
@@ -95,7 +95,7 @@ def cmd_url(args):
 def cmd_info(args):
     # Get
     dataset, params = args.dataset
-    data = utils.get(f'http://{args.host}/api/info/{dataset}', params=params)
+    data = api_utils.get(f'http://{args.host}/api/info/{dataset}', params=params)
 
     # Print
     if args.json:
@@ -124,27 +124,27 @@ def cmd_show(args):
 
 @handle_errors
 def cmd_download(args):
-    # urlpath
+    # localpath
     dataset, params = args.dataset
     output_dir = args.output_dir.resolve()
-    urlpath = output_dir / dataset
-    urlpath.parent.mkdir(exist_ok=True, parents=True)
+    localpath = output_dir / dataset
+    localpath.parent.mkdir(exist_ok=True, parents=True)
 
-    suffix = urlpath.suffix
+    suffix = localpath.suffix
 
     slice = params.get('slice')
     if slice:
-        urlpath = urlpath.with_suffix('')
-        urlpath = pathlib.Path(f'{urlpath}[{slice}]{suffix}')
+        localpath = localpath.with_suffix('')
+        localpath = pathlib.Path(f'{localpath}[{slice}]{suffix}')
 
     # Download
-    array, schunk = utils.download(args.host, dataset, params, urlpath=urlpath, verbose=True)
+    array, schunk = utils.download(args.host, dataset, params, localpath=localpath, verbose=True)
     if suffix not in {'.b2frame', '.b2nd'}:
-        with open(urlpath, 'wb') as f:
+        with open(localpath, 'wb') as f:
             data = schunk[:]
             f.write(data)
 
-    print(f'Dataset saved to {urlpath}')
+    print(f'Dataset saved to {localpath}')
 
 if __name__ == '__main__':
     parser = utils.get_parser()
diff --git a/caterva2/services/bro.py b/caterva2/services/bro.py
index b8db72cc..a38962f9 100644
--- a/caterva2/services/bro.py
+++ b/caterva2/services/bro.py
@@ -18,6 +18,7 @@
 
 # Project
 from caterva2 import utils, models
+from caterva2.services import srv_utils
 
 
 # State
@@ -52,7 +53,7 @@ async def post_roots(root: models.Root) -> models.Root:
     # Init database
     # roots = {name: <Root>}
     statedir = args.statedir.resolve()
-    database = utils.Database(statedir / 'db.json', models.Broker(roots={}))
+    database = srv_utils.Database(statedir / 'db.json', models.Broker(roots={}))
     print(database.data)
 
     # Run
diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 6e570023..1766fdb8 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -20,7 +20,9 @@
 from watchfiles import awatch
 
 # Project
-from caterva2 import utils, models
+from caterva2 import utils, api_utils, models
+from caterva2.services import srv_utils
+
 
 logger = logging.getLogger('pub')
 
@@ -201,12 +203,12 @@ async def get_download(path: str, nchunk: int = -1):
 
     # Init database
     model = models.Publisher(etags={})
-    database = utils.Database(statedir / 'db.json', model)
+    database = srv_utils.Database(statedir / 'db.json', model)
 
     # Register
     host, port = args.http
     data = {'name': name, 'http': f'{host}:{port}'}
-    utils.post(f'http://{broker}/api/roots', json=data)
+    api_utils.post(f'http://{broker}/api/roots', json=data)
 
     # Run
     host, port = args.http
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
new file mode 100644
index 00000000..6756b782
--- /dev/null
+++ b/caterva2/services/srv_utils.py
@@ -0,0 +1,40 @@
+###############################################################################
+# Caterva2 - On demand access to remote Blosc2 data repositories
+#
+# Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
+# https://www.blosc.org
+# License: GNU Affero General Public License v3.0
+# See LICENSE.txt for details about copyright and rights to use.
+###############################################################################
+
+import json
+import safer
+
+#
+# Facility to persist program state
+#
+
+class Database:
+
+    def __init__(self, path, initial):
+        self.path = path
+        self.model = initial.__class__
+        if path.exists():
+            self.load()
+        else:
+            path.parent.mkdir(exist_ok=True, parents=True)
+            self.data = initial
+            self.save()
+
+    def load(self):
+        with self.path.open() as file:
+            dump = json.load(file)
+            self.data = self.model.model_validate(dump)
+
+    def save(self):
+        dump = self.data.model_dump_json(exclude_none=True)
+        with safer.open(self.path, 'w') as file:
+            file.write(dump)
+
+    def __getattr__(self, name):
+        return getattr(self.data, name)
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 7401f0d5..4dca6b19 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -11,6 +11,7 @@
 import contextlib
 import logging
 import pathlib
+import pickle
 
 # Requirements
 import blosc2
@@ -19,7 +20,9 @@
 import uvicorn
 
 # Project
-from caterva2 import utils, models
+from caterva2 import utils, api_utils, models
+from caterva2.services import srv_utils
+
 
 logger = logging.getLogger('sub')
 
@@ -104,7 +107,7 @@ def follow(name: str):
         rootdir.mkdir(exist_ok=True)
 
     # Initialize the datasets in the cache
-    data = utils.get(f'http://{root.http}/api/list')
+    data = api_utils.get(f'http://{root.http}/api/list')
     for relpath in data:
         # If-None-Match header
         key = f'{name}/{relpath}'
@@ -149,7 +152,7 @@ def lookup_path(path):
 async def lifespan(app: FastAPI):
     # Initialize roots from the broker
     try:
-        data = utils.get(f'http://{broker}/api/roots')
+        data = api_utils.get(f'http://{broker}/api/roots')
     except httpx.ConnectError:
         logger.warning('Broker not available')
         client = None
@@ -238,8 +241,7 @@ async def get_url(path: str):
     return [http]
 
 @app.get('/api/info/{path:path}')
-async def get_info(path: str, slice: str = None):
-    assert slice is None, 'Slices not supported here'
+async def get_info(path: str):
     abspath = lookup_path(path)
     return utils.read_metadata(abspath)
 
@@ -277,6 +279,57 @@ async def get_download(path: str, nchunk: int, slice_: str = None):
     downloader = utils.iterchunk(chunk)
     return responses.StreamingResponse(downloader)
 
+@app.get('/api/fetch_data/{path:path}')
+async def fetch_data(host, dataset, params):
+    data = api_utils.get(f'http://{host}/api/info/{dataset}', params=params)
+
+    # Create array/schunk in memory
+    suffix = dataset.suffix
+    if suffix == '.b2nd':
+        metadata = models.Metadata(**data)
+        array = utils.init_b2nd(metadata)
+        schunk = array.schunk
+    elif suffix == '.b2frame':
+        metadata = models.SChunk(**data)
+        schunk = utils.init_b2frame(metadata)
+        array = None
+    else:
+        metadata = models.SChunk(**data)
+        schunk = utils.init_b2frame(metadata, urlpath=None)
+        array = None
+
+    # Download and update schunk
+    url = f'http://{host}/api/download/{dataset}'
+    iter_chunks = range(schunk.nchunks)
+    for nchunk in iter_chunks:
+        params['nchunk'] = nchunk
+        response = httpx.get(url, params=params, timeout=None)
+        response.raise_for_status()
+        chunk = response.read()
+        schunk.update_chunk(nchunk, chunk)
+
+    if 'slice' in params:
+        slice_ = api_utils.parse_slice(params['slice'])
+        if array:
+            array = array[slice_] if array.ndim > 0 else array[()]
+        else:
+            assert len(slice_) == 1
+            slice_ = slice_[0]
+            if isinstance(slice_, int):
+                slice_ = slice(slice_, slice_ + 1)
+            # TODO: make SChunk support integer as slice
+            schunk = schunk[slice_]
+
+    if array is not None:
+        data = array[:] if array.ndim > 0 else array[()]  # numpy array
+    else:
+        data = schunk[:]  # byte string
+
+    # Pickle and stream response
+    data = pickle.dumps(data, protocol=-1)
+    # data = zlib.compress(data)
+    downloader = utils.iterchunk(data)
+    return responses.StreamingResponse(downloader)
 
 #
 # Command line interface
@@ -297,7 +350,7 @@ async def get_download(path: str, nchunk: int, slice_: str = None):
 
     # Init database
     model = models.Subscriber(roots={}, etags={})
-    database = utils.Database(statedir / 'db.json', model)
+    database = srv_utils.Database(statedir / 'db.json', model)
 
     # Run
     host, port = args.http
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index f7037852..213ec555 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -1,6 +1,6 @@
 ###############################################################################
 # Caterva2 - On demand access to remote Blosc2 data repositories
-#
+#º
 # Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
 # https://www.blosc.org
 # License: GNU Affero General Public License v3.0
diff --git a/caterva2/utils.py b/caterva2/utils.py
index e5df5e48..8e92af25 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -10,7 +10,6 @@
 import argparse
 import asyncio
 import contextlib
-import json
 import logging
 import pathlib
 
@@ -20,12 +19,11 @@
 import fastapi_websocket_pubsub
 import httpx
 import numpy as np
-import safer
 import tqdm
 
 # Project
 from . import models
-
+from . import api_utils
 
 #
 # Blosc2 related functions
@@ -58,6 +56,7 @@ def compress(data, dst=None):
 
     return schunk
 
+
 def init_b2nd(metadata, urlpath=None):
     if urlpath is not None:
         urlpath.parent.mkdir(exist_ok=True, parents=True)
@@ -68,6 +67,7 @@ def init_b2nd(metadata, urlpath=None):
     return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
                          chunks=metadata.chunks, blocks=metadata.blocks)
 
+
 def init_b2frame(metadata, urlpath=None):
     if urlpath is not None:
         urlpath.parent.mkdir(exist_ok=True, parents=True)
@@ -103,16 +103,19 @@ def open_b2(abspath):
 
     return array, schunk
 
+
 def chunk_is_available(schunk, nchunk):
     # Blosc2 flags are at offset 31
     # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
     flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
     return flag != blosc2.SpecialValue.UNINIT.value
 
+
 def iterchunk(chunk):
     # TODO Yield block by block
     yield chunk
 
+
 def get_model_from_obj(obj, model_class, **kwargs):
     if type(obj) is dict:
         getter = lambda o, k: o[k]
@@ -130,6 +133,7 @@ def get_model_from_obj(obj, model_class, **kwargs):
 
     return model_class(**data)
 
+
 def read_metadata(obj):
     # Open dataset
     if isinstance(obj, pathlib.Path):
@@ -161,32 +165,18 @@ def read_metadata(obj):
         raise TypeError(f'unexpected {type(obj)}')
 
 
-def parse_slice(string):
-    if not string:
-        return ()
-    obj = []
-    for segment in string.split(','):
-        if ':' not in segment:
-            segment = int(segment)
-        else:
-            segment = [int(x) if x else None for x in segment.split(':')]
-            segment = slice(*segment)
-        obj.append(segment)
-
-    return tuple(obj)
-
-def download(host, dataset, params, urlpath=None, verbose=False):
-    data = get(f'http://{host}/api/info/{dataset}')
+def download(host, dataset, params, localpath=None, verbose=False):
+    data = api_utils.get(f'http://{host}/api/info/{dataset}')
 
     # Create array/schunk in memory
     suffix = dataset.suffix
     if suffix == '.b2nd':
         metadata = models.Metadata(**data)
-        array = init_b2nd(metadata, urlpath=urlpath)
+        array = init_b2nd(metadata, urlpath=localpath)
         schunk = array.schunk
     elif suffix == '.b2frame':
         metadata = models.SChunk(**data)
-        schunk = init_b2frame(metadata, urlpath=urlpath)
+        schunk = init_b2frame(metadata, urlpath=localpath)
         array = None
     else:
         metadata = models.SChunk(**data)
@@ -206,28 +196,29 @@ def download(host, dataset, params, urlpath=None, verbose=False):
         schunk.update_chunk(nchunk, chunk)
 
     if 'slice' in params:
-        slice_ = parse_slice(params['slice'])
+        slice_ = api_utils.parse_slice(params['slice'])
         if array:
-            if urlpath is not None:
+            if localpath is not None:
                 # We want to save the slice to a file
                 ndarray = array.slice(slice_)  # in memory (compressed)
                 # Remove previous new on-disk array and create a new one
-                ndarray.copy(urlpath=urlpath, mode="w", contiguous=True, cparams=schunk.cparams)
+                ndarray.copy(urlpath=localpath, mode="w", contiguous=True, cparams=schunk.cparams)
             else:
                 array = array[slice_] if array.ndim > 0 else array[()]
         else:
             assert len(slice_) == 1
             slice_ = slice_[0]
-            if urlpath is not None:
+            if localpath is not None:
                 data = schunk[slice_]
                 # TODO: fix the upstream bug in python-blosc2 that prevents this from working
                 #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
-                blosc2.SChunk(data=data, mode="w", urlpath=urlpath,
+                blosc2.SChunk(data=data, mode="w", urlpath=localpath,
                               chunksize=schunk.chunksize,
                               cparams=schunk.cparams)
             else:
                 if isinstance(slice_, int):
                     slice_ = slice(slice_, slice_ + 1)
+                # TODO: make SChunk support integer as slice
                 schunk = schunk[slice_]
 
     return array, schunk
@@ -259,6 +250,7 @@ def walk_files(root, exclude=None):
             if str(relpath) not in exclude:
                 yield path, relpath
 
+
 #
 # Pub/Sub helpers
 #
@@ -268,6 +260,7 @@ def start_client(url):
     client.start_client(url)
     return client
 
+
 async def disconnect_client(client, timeout=5):
     if client is not None:
         # If the broker is down client.disconnect hangs, wo we wrap it in a timeout
@@ -282,6 +275,7 @@ def socket_type(string):
     port = int(port)
     return (host, port)
 
+
 def get_parser(broker=None, http=None):
     parser = argparse.ArgumentParser()
     parser.add_argument('--loglevel', default='warning')
@@ -291,6 +285,7 @@ def get_parser(broker=None, http=None):
         parser.add_argument('--http', default=http, type=socket_type)
     return parser
 
+
 def run_parser(parser):
     args = parser.parse_args()
 
@@ -301,30 +296,17 @@ def run_parser(parser):
     return args
 
 
-#
-# HTTP client helpers
-#
-def get(url, params=None, headers=None, timeout=5, model=None):
-    response = httpx.get(url, params=params, headers=headers, timeout=timeout)
-    response.raise_for_status()
-    json = response.json()
-    return json if model is None else model(**json)
-
-def post(url, json=None):
-    response = httpx.post(url, json=json)
-    response.raise_for_status()
-    return response.json()
-
-
 #
 # HTTP server helpers
 #
 def raise_bad_request(detail):
     raise fastapi.HTTPException(status_code=400, detail=detail)
 
+
 def raise_not_found(detail='Not Found'):
     raise fastapi.HTTPException(status_code=404, detail=detail)
 
+
 def get_abspath(root, path):
     abspath = root / path
 
@@ -337,33 +319,3 @@ def get_abspath(root, path):
         raise_not_found()
 
     return abspath
-
-
-#
-# Facility to persist program state
-#
-
-class Database:
-
-    def __init__(self, path, initial):
-        self.path = path
-        self.model = initial.__class__
-        if path.exists():
-            self.load()
-        else:
-            path.parent.mkdir(exist_ok=True, parents=True)
-            self.data = initial
-            self.save()
-
-    def load(self):
-        with self.path.open() as file:
-            dump = json.load(file)
-            self.data = self.model.model_validate(dump)
-
-    def save(self):
-        dump = self.data.model_dump_json(exclude_none=True)
-        with safer.open(self.path, 'w') as file:
-            file.write(dump)
-
-    def __getattr__(self, name):
-        return getattr(self.data, name)

From c6b03ca6c393c0b50bb414f9a95562d28973023f Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 9 Jan 2024 06:49:35 +0100
Subject: [PATCH 02/38] New /api/fetch REST. Tests are passing now.

---
 caterva2/api.py            |  6 +++--
 caterva2/api_utils.py      |  7 +++--
 caterva2/services/sub.py   | 54 ++++++++++++++++++++------------------
 caterva2/tests/test_api.py |  8 +++---
 caterva2/utils.py          |  3 ++-
 5 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index 9e912718..5aa3b9ae 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -88,7 +88,9 @@ def download(self, index=None):
                 data = schunk[:]
                 f.write(data)
 
-        return path
+        # TODO: how to support downloading on a browser?
+        raise NotImplementedError("TODO: how to support downloading on a browser?")
+        # return path
 
 
 class Dataset(File):
@@ -101,5 +103,5 @@ def __repr__(self):
 
     def __getitem__(self, indexes):
         slice_ = slice_to_string(indexes)
-        data = api_utils.fetch_data(self.host, self.path, {'slice': slice_})
+        data = api_utils.fetch_data(self.host, self.path, {'slice_': slice_})
         return data
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 4f915cfe..f71ea8cd 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -29,8 +29,11 @@ def parse_slice(string):
     return tuple(obj)
 
 
-def fetch_data(host, dataset, params):
-    data = get(f'http://{host}/api/fetch_data/{dataset}', params=params)
+def fetch_data(host, path, params):
+    response = httpx.get(f'http://{host}/api/fetch/{path}', params=params)
+    response.raise_for_status()
+    data = response.content
+    # TODO: decompression is not working yet. HTTPX does this automatically?
     # data = zlib.decompress(data)
     return pickle.loads(data)
 
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 4dca6b19..c0732b53 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -35,6 +35,7 @@
 database = None    # <Database> instance
 locks = {}
 
+
 async def download_chunk(path, schunk, nchunk):
     root, name = path.split('/', 1)
     host = database.roots[root].http
@@ -57,6 +58,7 @@ async def new_root(data, topic):
     database.roots[root.name] = root
     database.save()
 
+
 def init_b2(abspath, metadata):
     suffix = abspath.suffix
     if suffix == '.b2nd':
@@ -70,6 +72,7 @@ def init_b2(abspath, metadata):
         metadata = models.SChunk(**metadata)
         utils.init_b2frame(metadata, abspath)
 
+
 async def updated_dataset(data, topic):
     name = topic
     relpath = data['path']
@@ -136,6 +139,7 @@ def follow(name: str):
         client.subscribe(name, updated_dataset)
         clients[name] = client
 
+
 def lookup_path(path):
     path = pathlib.Path(path)
     if path.suffix not in {'.b2frame', '.b2nd'}:
@@ -177,7 +181,6 @@ async def lifespan(app: FastAPI):
         if changed:
             database.save()
 
-
         # Follow the @new channel to know when a new root is added
         client = utils.start_client(f'ws://{broker}/pubsub')
         client.subscribe('@new', new_root)
@@ -250,12 +253,19 @@ async def get_info(path: str):
 async def get_download(path: str, nchunk: int, slice_: str = None):
     abspath = lookup_path(path)
 
+    chunk = await partial_download(abspath, nchunk, path, slice_)
+    # Stream response
+    downloader = utils.iterchunk(chunk)
+    return responses.StreamingResponse(downloader)
+
+
+async def partial_download(abspath, nchunk, path, slice_):
     # Build the list of chunks we need to download from the publisher
     array, schunk = utils.open_b2(abspath)
     if slice_ is None:
         nchunks = [nchunk]
     else:
-        slice_obj = utils.parse_slice(slice_)
+        slice_obj = api_utils.parse_slice(slice_)
         if not array:
             if isinstance(slice_obj[0], slice):
                 start, stop, _ = slice_obj[0].indices(schunk.nchunks)
@@ -266,50 +276,41 @@ async def get_download(path: str, nchunk: int, slice_: str = None):
             nchunks = blosc2.get_slice_nchunks(schunk, (start, stop))
         else:
             nchunks = blosc2.get_slice_nchunks(array, slice_obj)
-
     # Fetch the chunks
     lock = locks.setdefault(path, asyncio.Lock())
     async with lock:
         for n in nchunks:
             if not utils.chunk_is_available(schunk, n):
                 await download_chunk(path, schunk, n)
-
-    # Stream response
     chunk = schunk.get_chunk(nchunk)
-    downloader = utils.iterchunk(chunk)
-    return responses.StreamingResponse(downloader)
+    return chunk
 
-@app.get('/api/fetch_data/{path:path}')
-async def fetch_data(host, dataset, params):
-    data = api_utils.get(f'http://{host}/api/info/{dataset}', params=params)
+
+@app.get('/api/fetch/{path:path}')
+async def fetch_data(path: str, slice_: str = None):
+    abspath = lookup_path(path)
+    metadata = utils.read_metadata(abspath)
 
     # Create array/schunk in memory
-    suffix = dataset.suffix
+    suffix = abspath.suffix
     if suffix == '.b2nd':
-        metadata = models.Metadata(**data)
-        array = utils.init_b2nd(metadata)
+        array = utils.init_b2nd(metadata, urlpath=None)
         schunk = array.schunk
     elif suffix == '.b2frame':
-        metadata = models.SChunk(**data)
-        schunk = utils.init_b2frame(metadata)
+        schunk = utils.init_b2frame(metadata, urlpath=None)
         array = None
     else:
-        metadata = models.SChunk(**data)
         schunk = utils.init_b2frame(metadata, urlpath=None)
         array = None
 
-    # Download and update schunk
-    url = f'http://{host}/api/download/{dataset}'
-    iter_chunks = range(schunk.nchunks)
-    for nchunk in iter_chunks:
-        params['nchunk'] = nchunk
-        response = httpx.get(url, params=params, timeout=None)
-        response.raise_for_status()
-        chunk = response.read()
+    # Download and update schunk in-memory
+    for nchunk in range(schunk.nchunks):
+        chunk = await partial_download(abspath, nchunk, path, slice_)
         schunk.update_chunk(nchunk, chunk)
 
-    if 'slice' in params:
-        slice_ = api_utils.parse_slice(params['slice'])
+    if slice_:
+        # Additional massage for slices
+        slice_ = api_utils.parse_slice(slice_)
         if array:
             array = array[slice_] if array.ndim > 0 else array[()]
         else:
@@ -327,6 +328,7 @@ async def fetch_data(host, dataset, params):
 
     # Pickle and stream response
     data = pickle.dumps(data, protocol=-1)
+    # TODO: compress data is not working. HTTPX does this automatically?
     # data = zlib.compress(data)
     downloader = utils.iterchunk(data)
     return responses.StreamingResponse(downloader)
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index 213ec555..8f0601fb 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -1,6 +1,6 @@
 ###############################################################################
 # Caterva2 - On demand access to remote Blosc2 data repositories
-#º
+#
 # Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
 # https://www.blosc.org
 # License: GNU Affero General Public License v3.0
@@ -99,7 +99,7 @@ def test_dataset_nd(name, services, examples_dir):
         assert str(e_info.value) == 'Only step=1 is supported'
 
 @pytest.mark.parametrize("name", ['ds-1d.b2nd', 'dir1/ds-2d.b2nd'])
-def test_download_b2nd(name, services, examples_dir):
+def _test_download_b2nd(name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot[name]
     dsd = ds.download()
@@ -112,7 +112,7 @@ def test_download_b2nd(name, services, examples_dir):
     np.testing.assert_array_equal(a[:], b[:])
     os.unlink(dsd)
 
-def test_download_b2frame(services, examples_dir):
+def _test_download_b2frame(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
     dsd = ds.download()
@@ -125,7 +125,7 @@ def test_download_b2frame(services, examples_dir):
     assert a[:] == b[:]
     os.unlink(dsd)
 
-def test_download_regular_file(services, examples_dir):
+def _test_download_regular_file(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['README.md']
     dsd = ds.download()
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 8e92af25..9f1754d0 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -160,7 +160,8 @@ def read_metadata(obj):
     elif isinstance(obj, blosc2.schunk.SChunk):
         schunk = obj
         cparams = get_model_from_obj(schunk.cparams, models.CParams)
-        return get_model_from_obj(schunk, models.SChunk, cparams=cparams)
+        model = get_model_from_obj(schunk, models.SChunk, cparams=cparams)
+        return model
     else:
         raise TypeError(f'unexpected {type(obj)}')
 

From c5518f60fe7c67708bb6bdd73365c02d981458b6 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 9 Jan 2024 07:18:57 +0100
Subject: [PATCH 03/38] [WIP] Decoupling utils.py (eventually removed)

---
 caterva2/api_utils.py          |   1 -
 caterva2/clients/cli.py        |  12 ++--
 caterva2/clients/cli_utils.py  |  40 +++++++++++
 caterva2/services/srv_utils.py | 117 +++++++++++++++++++++++++++++++++
 caterva2/services/sub.py       |  14 ++--
 caterva2/utils.py              | 110 -------------------------------
 6 files changed, 171 insertions(+), 123 deletions(-)
 create mode 100644 caterva2/clients/cli_utils.py

diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index f71ea8cd..3ea05ef2 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -11,7 +11,6 @@
 # Requirements
 import httpx
 
-# To remove
 
 
 def parse_slice(string):
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 53832b99..0155f26b 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -16,7 +16,9 @@
 import rich
 
 # Project
-from caterva2 import utils, api_utils, models
+from caterva2 import api_utils, models
+from caterva2.services import srv_utils
+from caterva2.clients import cli_utils
 
 
 def handle_errors(func):
@@ -109,7 +111,7 @@ def cmd_info(args):
 def cmd_show(args):
     # Download
     dataset, params = args.dataset
-    array, schunk = utils.download(args.host, dataset, params, verbose=True)
+    array, schunk = srv_utils.download(args.host, dataset, params, verbose=True)
 
     # Display
     if array is None:
@@ -138,7 +140,7 @@ def cmd_download(args):
         localpath = pathlib.Path(f'{localpath}[{slice}]{suffix}')
 
     # Download
-    array, schunk = utils.download(args.host, dataset, params, localpath=localpath, verbose=True)
+    array, schunk = srv_utils.download(args.host, dataset, params, localpath=localpath, verbose=True)
     if suffix not in {'.b2frame', '.b2nd'}:
         with open(localpath, 'wb') as f:
             data = schunk[:]
@@ -147,7 +149,7 @@ def cmd_download(args):
     print(f'Dataset saved to {localpath}')
 
 if __name__ == '__main__':
-    parser = utils.get_parser()
+    parser = cli_utils.get_parser()
     parser.add_argument('--host', default='localhost:8002')
     subparsers = parser.add_subparsers(required=True)
 
@@ -201,5 +203,5 @@ def cmd_download(args):
     subparser.set_defaults(func=cmd_download)
 
     # Go
-    args = utils.run_parser(parser)
+    args = cli_utils.run_parser(parser)
     args.func(args)
diff --git a/caterva2/clients/cli_utils.py b/caterva2/clients/cli_utils.py
new file mode 100644
index 00000000..fcccb86e
--- /dev/null
+++ b/caterva2/clients/cli_utils.py
@@ -0,0 +1,40 @@
+###############################################################################
+# Caterva2 - On demand access to remote Blosc2 data repositories
+#
+# Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
+# https://www.blosc.org
+# License: GNU Affero General Public License v3.0
+# See LICENSE.txt for details about copyright and rights to use.
+###############################################################################
+
+import argparse
+import logging
+
+
+#
+# Command line helpers
+#
+def socket_type(string):
+    host, port = string.split(':')
+    port = int(port)
+    return (host, port)
+
+
+def get_parser(broker=None, http=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--loglevel', default='warning')
+    if broker:
+        parser.add_argument('--broker', default=broker)
+    if http:
+        parser.add_argument('--http', default=http, type=socket_type)
+    return parser
+
+
+def run_parser(parser):
+    args = parser.parse_args()
+
+    # Logging
+    loglevel = args.loglevel.upper()
+    logging.basicConfig(level=loglevel)
+
+    return args
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 6756b782..6177eae6 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -10,6 +10,123 @@
 import json
 import safer
 
+# Requirements
+import blosc2
+import httpx
+import tqdm
+import numpy as np
+
+# Project
+from caterva2 import models
+from caterva2 import api_utils
+
+
+def open_b2(abspath):
+    suffix = abspath.suffix
+    if suffix == '.b2nd':
+        array = blosc2.open(abspath)
+        schunk = array.schunk
+    elif suffix == '.b2frame':
+        array = None
+        schunk = blosc2.open(abspath)
+    elif suffix == '.b2':
+        array = None
+        schunk = blosc2.open(abspath)
+    else:
+        raise NotImplementedError()
+
+    return array, schunk
+
+def init_b2nd(metadata, urlpath=None):
+    if urlpath is not None:
+        urlpath.parent.mkdir(exist_ok=True, parents=True)
+        if urlpath.exists():
+            urlpath.unlink()
+
+    dtype = getattr(np, metadata.dtype)
+    return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
+                         chunks=metadata.chunks, blocks=metadata.blocks)
+
+
+def init_b2frame(metadata, urlpath=None):
+    if urlpath is not None:
+        urlpath.parent.mkdir(exist_ok=True, parents=True)
+        if urlpath.exists():
+            urlpath.unlink()
+
+    cparams = metadata.cparams.model_dump()
+    sc = blosc2.SChunk(
+        metadata.chunksize,
+        contiguous=metadata.contiguous,
+        cparams=cparams,
+        dparams={},
+        urlpath=urlpath,
+    )
+    sc.fill_special(metadata.nbytes / metadata.typesize,
+                    special_value=blosc2.SpecialValue.UNINIT)
+    return sc
+
+
+def download(host, dataset, params, localpath=None, verbose=False):
+    data = api_utils.get(f'http://{host}/api/info/{dataset}')
+
+    # Create array/schunk in memory
+    suffix = dataset.suffix
+    if suffix == '.b2nd':
+        metadata = models.Metadata(**data)
+        array = init_b2nd(metadata, urlpath=localpath)
+        schunk = array.schunk
+    elif suffix == '.b2frame':
+        metadata = models.SChunk(**data)
+        schunk = init_b2frame(metadata, urlpath=localpath)
+        array = None
+    else:
+        metadata = models.SChunk(**data)
+        schunk = init_b2frame(metadata, urlpath=None)
+        array = None
+
+    # Download and update schunk
+    url = f'http://{host}/api/download/{dataset}'
+    iter_chunks = range(schunk.nchunks)
+    if verbose:
+        iter_chunks = tqdm.tqdm(iter_chunks, desc='Downloading', unit='chunk')
+    for nchunk in iter_chunks:
+        params['nchunk'] = nchunk
+        response = httpx.get(url, params=params, timeout=None)
+        response.raise_for_status()
+        chunk = response.read()
+        schunk.update_chunk(nchunk, chunk)
+
+    if 'slice' in params:
+        slice_ = api_utils.parse_slice(params['slice'])
+        if array:
+            if localpath is not None:
+                # We want to save the slice to a file
+                ndarray = array.slice(slice_)  # in memory (compressed)
+                # Remove previous new on-disk array and create a new one
+                ndarray.copy(urlpath=localpath, mode="w", contiguous=True, cparams=schunk.cparams)
+            else:
+                array = array[slice_] if array.ndim > 0 else array[()]
+        else:
+            assert len(slice_) == 1
+            slice_ = slice_[0]
+            if localpath is not None:
+                data = schunk[slice_]
+                # TODO: fix the upstream bug in python-blosc2 that prevents this from working
+                #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
+                blosc2.SChunk(data=data, mode="w", urlpath=localpath,
+                              chunksize=schunk.chunksize,
+                              cparams=schunk.cparams)
+            else:
+                if isinstance(slice_, int):
+                    slice_ = slice(slice_, slice_ + 1)
+                # TODO: make SChunk support integer as slice
+                schunk = schunk[slice_]
+
+    return array, schunk
+
+
+
 #
 # Facility to persist program state
 #
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index c0732b53..aedc6fa6 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -63,14 +63,14 @@ def init_b2(abspath, metadata):
     suffix = abspath.suffix
     if suffix == '.b2nd':
         metadata = models.Metadata(**metadata)
-        utils.init_b2nd(metadata, abspath)
+        srv_utils.init_b2nd(metadata, abspath)
     elif suffix == '.b2frame':
         metadata = models.SChunk(**metadata)
-        utils.init_b2frame(metadata, abspath)
+        srv_utils.init_b2frame(metadata, abspath)
     else:
         abspath = pathlib.Path(f'{abspath}.b2')
         metadata = models.SChunk(**metadata)
-        utils.init_b2frame(metadata, abspath)
+        srv_utils.init_b2frame(metadata, abspath)
 
 
 async def updated_dataset(data, topic):
@@ -261,7 +261,7 @@ async def get_download(path: str, nchunk: int, slice_: str = None):
 
 async def partial_download(abspath, nchunk, path, slice_):
     # Build the list of chunks we need to download from the publisher
-    array, schunk = utils.open_b2(abspath)
+    array, schunk = srv_utils.open_b2(abspath)
     if slice_ is None:
         nchunks = [nchunk]
     else:
@@ -294,13 +294,13 @@ async def fetch_data(path: str, slice_: str = None):
     # Create array/schunk in memory
     suffix = abspath.suffix
     if suffix == '.b2nd':
-        array = utils.init_b2nd(metadata, urlpath=None)
+        array = srv_utils.init_b2nd(metadata, urlpath=None)
         schunk = array.schunk
     elif suffix == '.b2frame':
-        schunk = utils.init_b2frame(metadata, urlpath=None)
+        schunk = srv_utils.init_b2frame(metadata, urlpath=None)
         array = None
     else:
-        schunk = utils.init_b2frame(metadata, urlpath=None)
+        schunk = srv_utils.init_b2frame(metadata, urlpath=None)
         array = None
 
     # Download and update schunk in-memory
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 9f1754d0..47f3630f 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -17,13 +17,9 @@
 import blosc2
 import fastapi
 import fastapi_websocket_pubsub
-import httpx
-import numpy as np
-import tqdm
 
 # Project
 from . import models
-from . import api_utils
 
 #
 # Blosc2 related functions
@@ -57,53 +53,6 @@ def compress(data, dst=None):
     return schunk
 
 
-def init_b2nd(metadata, urlpath=None):
-    if urlpath is not None:
-        urlpath.parent.mkdir(exist_ok=True, parents=True)
-        if urlpath.exists():
-            urlpath.unlink()
-
-    dtype = getattr(np, metadata.dtype)
-    return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
-                         chunks=metadata.chunks, blocks=metadata.blocks)
-
-
-def init_b2frame(metadata, urlpath=None):
-    if urlpath is not None:
-        urlpath.parent.mkdir(exist_ok=True, parents=True)
-        if urlpath.exists():
-            urlpath.unlink()
-
-    cparams = metadata.cparams.model_dump()
-    sc = blosc2.SChunk(
-        metadata.chunksize,
-        contiguous=metadata.contiguous,
-        cparams=cparams,
-        dparams={},
-        urlpath=urlpath,
-    )
-    sc.fill_special(metadata.nbytes / metadata.typesize,
-                    special_value=blosc2.SpecialValue.UNINIT)
-    return sc
-
-
-def open_b2(abspath):
-    suffix = abspath.suffix
-    if suffix == '.b2nd':
-        array = blosc2.open(abspath)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        array = None
-        schunk = blosc2.open(abspath)
-    elif suffix == '.b2':
-        array = None
-        schunk = blosc2.open(abspath)
-    else:
-        raise NotImplementedError()
-
-    return array, schunk
-
-
 def chunk_is_available(schunk, nchunk):
     # Blosc2 flags are at offset 31
     # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
@@ -166,65 +115,6 @@ def read_metadata(obj):
         raise TypeError(f'unexpected {type(obj)}')
 
 
-def download(host, dataset, params, localpath=None, verbose=False):
-    data = api_utils.get(f'http://{host}/api/info/{dataset}')
-
-    # Create array/schunk in memory
-    suffix = dataset.suffix
-    if suffix == '.b2nd':
-        metadata = models.Metadata(**data)
-        array = init_b2nd(metadata, urlpath=localpath)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        metadata = models.SChunk(**data)
-        schunk = init_b2frame(metadata, urlpath=localpath)
-        array = None
-    else:
-        metadata = models.SChunk(**data)
-        schunk = init_b2frame(metadata, urlpath=None)
-        array = None
-
-    # Download and update schunk
-    url = f'http://{host}/api/download/{dataset}'
-    iter_chunks = range(schunk.nchunks)
-    if verbose:
-        iter_chunks = tqdm.tqdm(iter_chunks, desc='Downloading', unit='chunk')
-    for nchunk in iter_chunks:
-        params['nchunk'] = nchunk
-        response = httpx.get(url, params=params, timeout=None)
-        response.raise_for_status()
-        chunk = response.read()
-        schunk.update_chunk(nchunk, chunk)
-
-    if 'slice' in params:
-        slice_ = api_utils.parse_slice(params['slice'])
-        if array:
-            if localpath is not None:
-                # We want to save the slice to a file
-                ndarray = array.slice(slice_)  # in memory (compressed)
-                # Remove previous new on-disk array and create a new one
-                ndarray.copy(urlpath=localpath, mode="w", contiguous=True, cparams=schunk.cparams)
-            else:
-                array = array[slice_] if array.ndim > 0 else array[()]
-        else:
-            assert len(slice_) == 1
-            slice_ = slice_[0]
-            if localpath is not None:
-                data = schunk[slice_]
-                # TODO: fix the upstream bug in python-blosc2 that prevents this from working
-                #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
-                blosc2.SChunk(data=data, mode="w", urlpath=localpath,
-                              chunksize=schunk.chunksize,
-                              cparams=schunk.cparams)
-            else:
-                if isinstance(slice_, int):
-                    slice_ = slice(slice_, slice_ + 1)
-                # TODO: make SChunk support integer as slice
-                schunk = schunk[slice_]
-
-    return array, schunk
-
-
 #
 # Context managers
 #

From 09c787fe28b61946f1cacdfc86e065739c39a828 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Tue, 9 Jan 2024 13:52:17 +0100
Subject: [PATCH 04/38] Avoid unneeded second slicing operation

---
 caterva2/services/sub.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index aedc6fa6..ac9f6f6e 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -321,10 +321,7 @@ async def fetch_data(path: str, slice_: str = None):
             # TODO: make SChunk support integer as slice
             schunk = schunk[slice_]
 
-    if array is not None:
-        data = array[:] if array.ndim > 0 else array[()]  # numpy array
-    else:
-        data = schunk[:]  # byte string
+    data = array if array is not None else schunk
 
     # Pickle and stream response
     data = pickle.dumps(data, protocol=-1)

From d00e5055fd12c90f3fd1719030a4e0df6756579f Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 9 Jan 2024 18:09:05 +0100
Subject: [PATCH 05/38] Fix a bug in start, stop calculation

---
 caterva2/services/sub.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index ac9f6f6e..f617a751 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -268,7 +268,9 @@ async def partial_download(abspath, nchunk, path, slice_):
         slice_obj = api_utils.parse_slice(slice_)
         if not array:
             if isinstance(slice_obj[0], slice):
-                start, stop, _ = slice_obj[0].indices(schunk.nchunks)
+                # TODO: support schunk.nitems to avoid computations like these
+                nitems = schunk.nbytes // schunk.typesize
+                start, stop, _ = slice_obj[0].indices(nitems)
             else:
                 start, stop = slice_obj[0], slice_obj[0] + 1
             # get_slice_nchunks() does not support slices for schunks yet

From 5331b90142fd981d3eb988d37f0e44894a95d033 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 10:43:09 +0100
Subject: [PATCH 06/38] Add undeclared dependency on pydantic

Which should be moved to services.
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 1689eda8..6f2ba5de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,7 @@ dependencies = [
     "tqdm",  # TODO: ditto, but move into clients
     "httpx",
     "numpy",
+    "pydantic>=2",  # TODO: ditto
     "pytest",
 ]
 

From 9716acaab55656df88f853d2ed98ede1fedeab2f Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 10:48:21 +0100
Subject: [PATCH 07/38] Avoid client dependency on models/pydantic

By explicitly checking for boolean in JSON object without going through schema
validation.
---
 caterva2/clients/cli.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 0155f26b..2bed6f81 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -16,7 +16,7 @@
 import rich
 
 # Project
-from caterva2 import api_utils, models
+from caterva2 import api_utils
 from caterva2.services import srv_utils
 from caterva2.clients import cli_utils
 
@@ -58,8 +58,7 @@ def cmd_roots(args):
         return
 
     for name, root in data.items():
-        root = models.Root(**root)
-        if root.subscribed:
+        if root['subscribed'] is True:
             print(f'{name} (subscribed)')
         else:
             print(name)

From 79d2fe65cd9ba45bbf670e638352c2c7413eca9f Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 11:40:50 +0100
Subject: [PATCH 08/38] Move read_metadata function to service-specific
 utilities

---
 caterva2/services/pub.py       |  6 ++--
 caterva2/services/srv_utils.py | 51 ++++++++++++++++++++++++++++++++
 caterva2/services/sub.py       |  4 +--
 caterva2/utils.py              | 53 ----------------------------------
 4 files changed, 56 insertions(+), 58 deletions(-)

diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 1766fdb8..c5186802 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -53,12 +53,12 @@ async def worker(queue):
                 print('UPDATE', relpath)
                 # Load metadata
                 if abspath.suffix in {'.b2frame', '.b2nd'}:
-                    metadata = utils.read_metadata(abspath)
+                    metadata = srv_utils.read_metadata(abspath)
                 else:
                     # Compress regular files in publisher's cache
                     b2path = cache / f'{relpath}.b2'
                     utils.compress(abspath, b2path)
-                    metadata = utils.read_metadata(b2path)
+                    metadata = srv_utils.read_metadata(b2path)
 
                 # Publish
                 metadata = metadata.model_dump()
@@ -158,7 +158,7 @@ async def get_info(
 
     # Return
     response.headers['Etag'] = etag
-    return utils.read_metadata(abspath)
+    return srv_utils.read_metadata(abspath)
 
 @app.get("/api/download/{path:path}")
 async def get_download(path: str, nchunk: int = -1):
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 6177eae6..53acbbaa 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -8,6 +8,7 @@
 ###############################################################################
 
 import json
+import pathlib
 import safer
 
 # Requirements
@@ -67,6 +68,56 @@ def init_b2frame(metadata, urlpath=None):
     return sc
 
 
+def get_model_from_obj(obj, model_class, **kwargs):
+    if type(obj) is dict:
+        getter = lambda o, k: o[k]
+    else:
+        getter = getattr
+
+    data = kwargs.copy()
+    for key, info in model_class.model_fields.items():
+        if key not in data:
+            value = getter(obj, key)
+            if info.annotation is str:
+                value = str(value)
+
+            data[key] = value
+
+    return model_class(**data)
+
+
+def read_metadata(obj):
+    # Open dataset
+    if isinstance(obj, pathlib.Path):
+        path = obj
+        if not path.is_file():
+            raise FileNotFoundError('File does not exist or is a directory')
+
+        suffix = path.suffix
+        if suffix in {'.b2frame', '.b2nd', '.b2'}:
+            obj = blosc2.open(path)
+        else:
+            # Special case for regular files
+            stat = path.stat()
+            keys = ['mtime', 'size']
+            data = {key: getattr(stat, f'st_{key}') for key in keys}
+            return get_model_from_obj(data, models.File)
+
+    # Read metadata
+    if isinstance(obj, blosc2.ndarray.NDArray):
+        array = obj
+        cparams = get_model_from_obj(array.schunk.cparams, models.CParams)
+        schunk = get_model_from_obj(array.schunk, models.SChunk, cparams=cparams)
+        return get_model_from_obj(array, models.Metadata, schunk=schunk)
+    elif isinstance(obj, blosc2.schunk.SChunk):
+        schunk = obj
+        cparams = get_model_from_obj(schunk.cparams, models.CParams)
+        model = get_model_from_obj(schunk, models.SChunk, cparams=cparams)
+        return model
+    else:
+        raise TypeError(f'unexpected {type(obj)}')
+
+
 def download(host, dataset, params, localpath=None, verbose=False):
     data = api_utils.get(f'http://{host}/api/info/{dataset}')
 
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index f617a751..90d7550d 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -246,7 +246,7 @@ async def get_url(path: str):
 @app.get('/api/info/{path:path}')
 async def get_info(path: str):
     abspath = lookup_path(path)
-    return utils.read_metadata(abspath)
+    return srv_utils.read_metadata(abspath)
 
 
 @app.get('/api/download/{path:path}')
@@ -291,7 +291,7 @@ async def partial_download(abspath, nchunk, path, slice_):
 @app.get('/api/fetch/{path:path}')
 async def fetch_data(path: str, slice_: str = None):
     abspath = lookup_path(path)
-    metadata = utils.read_metadata(abspath)
+    metadata = srv_utils.read_metadata(abspath)
 
     # Create array/schunk in memory
     suffix = abspath.suffix
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 47f3630f..1128d0b8 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -18,9 +18,6 @@
 import fastapi
 import fastapi_websocket_pubsub
 
-# Project
-from . import models
-
 #
 # Blosc2 related functions
 #
@@ -65,56 +62,6 @@ def iterchunk(chunk):
     yield chunk
 
 
-def get_model_from_obj(obj, model_class, **kwargs):
-    if type(obj) is dict:
-        getter = lambda o, k: o[k]
-    else:
-        getter = getattr
-
-    data = kwargs.copy()
-    for key, info in model_class.model_fields.items():
-        if key not in data:
-            value = getter(obj, key)
-            if info.annotation is str:
-                value = str(value)
-
-            data[key] = value
-
-    return model_class(**data)
-
-
-def read_metadata(obj):
-    # Open dataset
-    if isinstance(obj, pathlib.Path):
-        path = obj
-        if not path.is_file():
-            raise FileNotFoundError('File does not exist or is a directory')
-
-        suffix = path.suffix
-        if suffix in {'.b2frame', '.b2nd', '.b2'}:
-            obj = blosc2.open(path)
-        else:
-            # Special case for regular files
-            stat = path.stat()
-            keys = ['mtime', 'size']
-            data = {key: getattr(stat, f'st_{key}') for key in keys}
-            return get_model_from_obj(data, models.File)
-
-    # Read metadata
-    if isinstance(obj, blosc2.ndarray.NDArray):
-        array = obj
-        cparams = get_model_from_obj(array.schunk.cparams, models.CParams)
-        schunk = get_model_from_obj(array.schunk, models.SChunk, cparams=cparams)
-        return get_model_from_obj(array, models.Metadata, schunk=schunk)
-    elif isinstance(obj, blosc2.schunk.SChunk):
-        schunk = obj
-        cparams = get_model_from_obj(schunk.cparams, models.CParams)
-        model = get_model_from_obj(schunk, models.SChunk, cparams=cparams)
-        return model
-    else:
-        raise TypeError(f'unexpected {type(obj)}')
-
-
 #
 # Context managers
 #

From a9cc2fedc2ca62c39b5fbb0fd8754d473006cb38 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 11:51:42 +0100
Subject: [PATCH 09/38] Move pending Blosc2 functions to service-specific
 utilities

---
 caterva2/services/pub.py       |  4 +--
 caterva2/services/srv_utils.py | 45 ++++++++++++++++++++++++++++++++++
 caterva2/services/sub.py       |  6 ++---
 caterva2/utils.py              | 45 ----------------------------------
 4 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index c5186802..18e26301 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -57,7 +57,7 @@ async def worker(queue):
                 else:
                     # Compress regular files in publisher's cache
                     b2path = cache / f'{relpath}.b2'
-                    utils.compress(abspath, b2path)
+                    srv_utils.compress(abspath, b2path)
                     metadata = srv_utils.read_metadata(b2path)
 
                 # Publish
@@ -179,7 +179,7 @@ async def get_download(path: str, nchunk: int = -1):
         schunk = blosc2.open(b2path)
 
     chunk = schunk.get_chunk(nchunk)
-    downloader = utils.iterchunk(chunk)
+    downloader = srv_utils.iterchunk(chunk)
 
     return responses.StreamingResponse(downloader)
 
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 53acbbaa..25304062 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -22,6 +22,38 @@
 from caterva2 import api_utils
 
 
+#
+# Blosc2 related functions
+#
+
+def compress(data, dst=None):
+    assert isinstance(data, (bytes, pathlib.Path))
+
+    if dst is not None:
+        dst.parent.mkdir(exist_ok=True, parents=True)
+        if dst.exists():
+            dst.unlink()
+
+    # Create schunk
+    cparams = {}
+    dparams = {}
+    storage = {
+        'urlpath': dst,
+        'cparams': cparams,
+        'dparams': dparams,
+    }
+    schunk = blosc2.SChunk(**storage)
+
+    # Append data
+    if isinstance(data, pathlib.Path):
+        with open(data, 'rb') as f:
+            data = f.read()
+
+    schunk.append_data(data)
+
+    return schunk
+
+
 def open_b2(abspath):
     suffix = abspath.suffix
     if suffix == '.b2nd':
@@ -38,6 +70,19 @@ def open_b2(abspath):
 
     return array, schunk
 
+
+def chunk_is_available(schunk, nchunk):
+    # Blosc2 flags are at offset 31
+    # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
+    flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
+    return flag != blosc2.SpecialValue.UNINIT.value
+
+
+def iterchunk(chunk):
+    # TODO Yield block by block
+    yield chunk
+
+
 def init_b2nd(metadata, urlpath=None):
     if urlpath is not None:
         urlpath.parent.mkdir(exist_ok=True, parents=True)
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 90d7550d..9c1cb9e7 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -255,7 +255,7 @@ async def get_download(path: str, nchunk: int, slice_: str = None):
 
     chunk = await partial_download(abspath, nchunk, path, slice_)
     # Stream response
-    downloader = utils.iterchunk(chunk)
+    downloader = srv_utils.iterchunk(chunk)
     return responses.StreamingResponse(downloader)
 
 
@@ -282,7 +282,7 @@ async def partial_download(abspath, nchunk, path, slice_):
     lock = locks.setdefault(path, asyncio.Lock())
     async with lock:
         for n in nchunks:
-            if not utils.chunk_is_available(schunk, n):
+            if not srv_utils.chunk_is_available(schunk, n):
                 await download_chunk(path, schunk, n)
     chunk = schunk.get_chunk(nchunk)
     return chunk
@@ -329,7 +329,7 @@ async def fetch_data(path: str, slice_: str = None):
     data = pickle.dumps(data, protocol=-1)
     # TODO: compress data is not working. HTTPX does this automatically?
     # data = zlib.compress(data)
-    downloader = utils.iterchunk(data)
+    downloader = srv_utils.iterchunk(data)
     return responses.StreamingResponse(downloader)
 
 #
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 1128d0b8..1fb96aac 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -11,56 +11,11 @@
 import asyncio
 import contextlib
 import logging
-import pathlib
 
 # Requirements
-import blosc2
 import fastapi
 import fastapi_websocket_pubsub
 
-#
-# Blosc2 related functions
-#
-
-def compress(data, dst=None):
-    assert isinstance(data, (bytes, pathlib.Path))
-
-    if dst is not None:
-        dst.parent.mkdir(exist_ok=True, parents=True)
-        if dst.exists():
-            dst.unlink()
-
-    # Create schunk
-    cparams = {}
-    dparams = {}
-    storage = {
-        'urlpath': dst,
-        'cparams': cparams,
-        'dparams': dparams,
-    }
-    schunk = blosc2.SChunk(**storage)
-
-    # Append data
-    if isinstance(data, pathlib.Path):
-        with open(data, 'rb') as f:
-            data = f.read()
-
-    schunk.append_data(data)
-
-    return schunk
-
-
-def chunk_is_available(schunk, nchunk):
-    # Blosc2 flags are at offset 31
-    # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
-    flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
-    return flag != blosc2.SpecialValue.UNINIT.value
-
-
-def iterchunk(chunk):
-    # TODO Yield block by block
-    yield chunk
-
 
 #
 # Context managers

From 51a37b188bd5ba6a98e922062e8a59c6694aa15d Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 12:04:13 +0100
Subject: [PATCH 10/38] Move FastAPI server-related functions to
 service-specific utilities

---
 caterva2/services/pub.py       |  8 ++++----
 caterva2/services/srv_utils.py | 25 +++++++++++++++++++++++++
 caterva2/services/sub.py       |  6 +++---
 caterva2/utils.py              | 26 --------------------------
 4 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 18e26301..89d3cdf4 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -145,7 +145,7 @@ async def get_info(
     response: Response,
     if_none_match: typing.Annotated[str | None, Header()] = None
 ):
-    abspath = utils.get_abspath(root, path)
+    abspath = srv_utils.get_abspath(root, path)
 
     # Check etag
     etag = database.etags[path]
@@ -154,7 +154,7 @@ async def get_info(
 
     # Regular files (.b2)
     if abspath.suffix not in {'.b2frame', '.b2nd'}:
-        abspath = utils.get_abspath(cache, f'{path}.b2')
+        abspath = srv_utils.get_abspath(cache, f'{path}.b2')
 
     # Return
     response.headers['Etag'] = etag
@@ -163,9 +163,9 @@ async def get_info(
 @app.get("/api/download/{path:path}")
 async def get_download(path: str, nchunk: int = -1):
     if nchunk < 0:
-        utils.raise_bad_request('Chunk number required')
+        srv_utils.raise_bad_request('Chunk number required')
 
-    abspath = utils.get_abspath(root, path)
+    abspath = srv_utils.get_abspath(root, path)
 
     suffix = abspath.suffix
     if suffix == '.b2nd':
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 25304062..429ba2f3 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -13,6 +13,7 @@
 
 # Requirements
 import blosc2
+import fastapi
 import httpx
 import tqdm
 import numpy as np
@@ -222,6 +223,30 @@ def download(host, dataset, params, localpath=None, verbose=False):
     return array, schunk
 
 
+#
+# HTTP server helpers
+#
+def raise_bad_request(detail):
+    raise fastapi.HTTPException(status_code=400, detail=detail)
+
+
+def raise_not_found(detail='Not Found'):
+    raise fastapi.HTTPException(status_code=404, detail=detail)
+
+
+def get_abspath(root, path):
+    abspath = root / path
+
+    # Security check
+    if root not in abspath.parents:
+        raise_bad_request(f'Invalid path {path}')
+
+    # Existence check
+    if not abspath.is_file():
+        raise_not_found()
+
+    return abspath
+
 
 #
 # Facility to persist program state
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 9c1cb9e7..5fd5d8b3 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -145,7 +145,7 @@ def lookup_path(path):
     if path.suffix not in {'.b2frame', '.b2nd'}:
         path = f'{path}.b2'
 
-    return utils.get_abspath(cache, path)
+    return srv_utils.get_abspath(cache, path)
 
 
 #
@@ -205,7 +205,7 @@ async def get_roots():
 def get_root(name):
     root = database.roots.get(name)
     if root is None:
-        utils.raise_not_found(f'{name} not known by the broker')
+        srv_utils.raise_not_found(f'{name} not known by the broker')
 
     return root
 
@@ -221,7 +221,7 @@ async def get_list(name: str):
 
     rootdir = cache / root.name
     if not rootdir.exists():
-        utils.raise_not_found(f'Not subscribed to {name}')
+        srv_utils.raise_not_found(f'Not subscribed to {name}')
 
     return [
         relpath.with_suffix('') if relpath.suffix == '.b2' else relpath
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 1fb96aac..2ab85735 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -13,7 +13,6 @@
 import logging
 
 # Requirements
-import fastapi
 import fastapi_websocket_pubsub
 
 
@@ -87,28 +86,3 @@ def run_parser(parser):
     logging.basicConfig(level=loglevel)
 
     return args
-
-
-#
-# HTTP server helpers
-#
-def raise_bad_request(detail):
-    raise fastapi.HTTPException(status_code=400, detail=detail)
-
-
-def raise_not_found(detail='Not Found'):
-    raise fastapi.HTTPException(status_code=404, detail=detail)
-
-
-def get_abspath(root, path):
-    abspath = root / path
-
-    # Security check
-    if root not in abspath.parents:
-        raise_bad_request(f'Invalid path {path}')
-
-    # Existence check
-    if not abspath.is_file():
-        raise_not_found()
-
-    return abspath

From f63278d09b44cc94984d6f816ad85a406eee2c92 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 12:11:53 +0100
Subject: [PATCH 11/38] Move FastAPI client-related functions to
 service-specific utilities

---
 caterva2/services/pub.py       |  4 ++--
 caterva2/services/srv_utils.py | 18 ++++++++++++++++++
 caterva2/services/sub.py       |  6 +++---
 caterva2/utils.py              | 20 --------------------
 4 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 89d3cdf4..5bd5b7a2 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -109,7 +109,7 @@ async def watchfiles(queue):
 async def lifespan(app: FastAPI):
     # Connect to broker
     global client
-    client = utils.start_client(f'ws://{broker}/pubsub')
+    client = srv_utils.start_client(f'ws://{broker}/pubsub')
 
     # Create queue and start workers
     queue = asyncio.Queue()
@@ -130,7 +130,7 @@ async def lifespan(app: FastAPI):
     await asyncio.gather(*tasks, return_exceptions=True)
 
     # Disconnect from broker
-    await utils.disconnect_client(client)
+    await srv_utils.disconnect_client(client)
 
 
 app = FastAPI(lifespan=lifespan)
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 429ba2f3..f4a86581 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -7,6 +7,7 @@
 # See LICENSE.txt for details about copyright and rights to use.
 ###############################################################################
 
+import asyncio
 import json
 import pathlib
 import safer
@@ -14,6 +15,7 @@
 # Requirements
 import blosc2
 import fastapi
+import fastapi_websocket_pubsub
 import httpx
 import tqdm
 import numpy as np
@@ -223,6 +225,22 @@ def download(host, dataset, params, localpath=None, verbose=False):
     return array, schunk
 
 
+#
+# Pub/Sub helpers
+#
+
+def start_client(url):
+    client = fastapi_websocket_pubsub.PubSubClient()
+    client.start_client(url)
+    return client
+
+
+async def disconnect_client(client, timeout=5):
+    if client is not None:
+        # If the broker is down client.disconnect hangs, wo we wrap it in a timeout
+        await asyncio.wait_for(client.disconnect(), timeout)
+
+
 #
 # HTTP server helpers
 #
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 5fd5d8b3..a2a4589a 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -135,7 +135,7 @@ def follow(name: str):
 
     # Subscribe to changes in the dataset
     if name not in clients:
-        client = utils.start_client(f'ws://{broker}/pubsub')
+        client = srv_utils.start_client(f'ws://{broker}/pubsub')
         client.subscribe(name, updated_dataset)
         clients[name] = client
 
@@ -182,7 +182,7 @@ async def lifespan(app: FastAPI):
             database.save()
 
         # Follow the @new channel to know when a new root is added
-        client = utils.start_client(f'ws://{broker}/pubsub')
+        client = srv_utils.start_client(f'ws://{broker}/pubsub')
         client.subscribe('@new', new_root)
 
         # Resume following
@@ -194,7 +194,7 @@ async def lifespan(app: FastAPI):
 
     # Disconnect from worker
     if client is not None:
-        await utils.disconnect_client(client)
+        await srv_utils.disconnect_client(client)
 
 app = FastAPI(lifespan=lifespan)
 
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 2ab85735..438f7446 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -8,13 +8,9 @@
 ###############################################################################
 
 import argparse
-import asyncio
 import contextlib
 import logging
 
-# Requirements
-import fastapi_websocket_pubsub
-
 
 #
 # Context managers
@@ -43,22 +39,6 @@ def walk_files(root, exclude=None):
                 yield path, relpath
 
 
-#
-# Pub/Sub helpers
-#
-
-def start_client(url):
-    client = fastapi_websocket_pubsub.PubSubClient()
-    client.start_client(url)
-    return client
-
-
-async def disconnect_client(client, timeout=5):
-    if client is not None:
-        # If the broker is down client.disconnect hangs, wo we wrap it in a timeout
-        await asyncio.wait_for(client.disconnect(), timeout)
-
-
 #
 # Command line helpers
 #

From 6c5f6c6b061650720e5fcd03e151cb364dc50417 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 12:22:31 +0100
Subject: [PATCH 12/38] Move some dependencies into the services extra

---
 pyproject.toml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6f2ba5de..37baf0ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,9 +34,6 @@ classifiers = [
 ]
 dependencies = [
     "blosc2>=2.4.0",  # TODO: try to remove this dependency, and move to services, if possible
-    "fastapi",  # TODO: ditto
-    "fastapi_websocket_pubsub",  # TODO: ditto
-    "safer",  # TODO: ditto
     "tqdm",  # TODO: ditto, but move into clients
     "httpx",
     "numpy",
@@ -51,9 +48,9 @@ path = "caterva2/__init__.py"
 services = [
     # TODO: try to add these dependencies here, and remove them from caterva2, if possible
     # "blosc2>=2.4.0",
-    # "fastapi",
-    # "fastapi_websocket_pubsub",
-    # "safer",
+    "fastapi",
+    "fastapi_websocket_pubsub",
+    "safer",
     "uvicorn",
     "watchfiles",
 ]

From 91c6370c66cfc10c653f98af1f2e0adb36f0d2e9 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 12:50:47 +0100
Subject: [PATCH 13/38] Move Blosc2-related utilities into their own module

As many will be used both by services and clients.
---
 caterva2/b2_utils.py           | 105 +++++++++++++++++++++++++++++++++
 caterva2/services/pub.py       |   6 +-
 caterva2/services/srv_utils.py | 100 ++-----------------------------
 caterva2/services/sub.py       |  22 +++----
 4 files changed, 123 insertions(+), 110 deletions(-)
 create mode 100644 caterva2/b2_utils.py

diff --git a/caterva2/b2_utils.py b/caterva2/b2_utils.py
new file mode 100644
index 00000000..d4126483
--- /dev/null
+++ b/caterva2/b2_utils.py
@@ -0,0 +1,105 @@
+###############################################################################
+# Caterva2 - On demand access to remote Blosc2 data repositories
+#
+# Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
+# https://www.blosc.org
+# License: GNU Affero General Public License v3.0
+# See LICENSE.txt for details about copyright and rights to use.
+###############################################################################
+
+import pathlib
+
+# Requirements
+import blosc2
+import numpy as np
+
+
+#
+# Blosc2 related functions
+#
+
+def compress(data, dst=None):
+    assert isinstance(data, (bytes, pathlib.Path))
+
+    if dst is not None:
+        dst.parent.mkdir(exist_ok=True, parents=True)
+        if dst.exists():
+            dst.unlink()
+
+    # Create schunk
+    cparams = {}
+    dparams = {}
+    storage = {
+        'urlpath': dst,
+        'cparams': cparams,
+        'dparams': dparams,
+    }
+    schunk = blosc2.SChunk(**storage)
+
+    # Append data
+    if isinstance(data, pathlib.Path):
+        with open(data, 'rb') as f:
+            data = f.read()
+
+    schunk.append_data(data)
+
+    return schunk
+
+
+def init_b2nd(metadata, urlpath=None):
+    if urlpath is not None:
+        urlpath.parent.mkdir(exist_ok=True, parents=True)
+        if urlpath.exists():
+            urlpath.unlink()
+
+    dtype = getattr(np, metadata.dtype)
+    return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
+                         chunks=metadata.chunks, blocks=metadata.blocks)
+
+
+def init_b2frame(metadata, urlpath=None):
+    if urlpath is not None:
+        urlpath.parent.mkdir(exist_ok=True, parents=True)
+        if urlpath.exists():
+            urlpath.unlink()
+
+    cparams = metadata.cparams.model_dump()
+    sc = blosc2.SChunk(
+        metadata.chunksize,
+        contiguous=metadata.contiguous,
+        cparams=cparams,
+        dparams={},
+        urlpath=urlpath,
+    )
+    sc.fill_special(metadata.nbytes / metadata.typesize,
+                    special_value=blosc2.SpecialValue.UNINIT)
+    return sc
+
+
+def open_b2(abspath):
+    suffix = abspath.suffix
+    if suffix == '.b2nd':
+        array = blosc2.open(abspath)
+        schunk = array.schunk
+    elif suffix == '.b2frame':
+        array = None
+        schunk = blosc2.open(abspath)
+    elif suffix == '.b2':
+        array = None
+        schunk = blosc2.open(abspath)
+    else:
+        raise NotImplementedError()
+
+    return array, schunk
+
+
+def chunk_is_available(schunk, nchunk):
+    # Blosc2 flags are at offset 31
+    # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
+    flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
+    return flag != blosc2.SpecialValue.UNINIT.value
+
+
+def iterchunk(chunk):
+    # TODO Yield block by block
+    yield chunk
diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 5bd5b7a2..99dcbef1 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -20,7 +20,7 @@
 from watchfiles import awatch
 
 # Project
-from caterva2 import utils, api_utils, models
+from caterva2 import utils, api_utils, b2_utils, models
 from caterva2.services import srv_utils
 
 
@@ -57,7 +57,7 @@ async def worker(queue):
                 else:
                     # Compress regular files in publisher's cache
                     b2path = cache / f'{relpath}.b2'
-                    srv_utils.compress(abspath, b2path)
+                    b2_utils.compress(abspath, b2path)
                     metadata = srv_utils.read_metadata(b2path)
 
                 # Publish
@@ -179,7 +179,7 @@ async def get_download(path: str, nchunk: int = -1):
         schunk = blosc2.open(b2path)
 
     chunk = schunk.get_chunk(nchunk)
-    downloader = srv_utils.iterchunk(chunk)
+    downloader = b2_utils.iterchunk(chunk)
 
     return responses.StreamingResponse(downloader)
 
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index f4a86581..317a70ae 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -18,102 +18,10 @@
 import fastapi_websocket_pubsub
 import httpx
 import tqdm
-import numpy as np
 
 # Project
 from caterva2 import models
-from caterva2 import api_utils
-
-
-#
-# Blosc2 related functions
-#
-
-def compress(data, dst=None):
-    assert isinstance(data, (bytes, pathlib.Path))
-
-    if dst is not None:
-        dst.parent.mkdir(exist_ok=True, parents=True)
-        if dst.exists():
-            dst.unlink()
-
-    # Create schunk
-    cparams = {}
-    dparams = {}
-    storage = {
-        'urlpath': dst,
-        'cparams': cparams,
-        'dparams': dparams,
-    }
-    schunk = blosc2.SChunk(**storage)
-
-    # Append data
-    if isinstance(data, pathlib.Path):
-        with open(data, 'rb') as f:
-            data = f.read()
-
-    schunk.append_data(data)
-
-    return schunk
-
-
-def open_b2(abspath):
-    suffix = abspath.suffix
-    if suffix == '.b2nd':
-        array = blosc2.open(abspath)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        array = None
-        schunk = blosc2.open(abspath)
-    elif suffix == '.b2':
-        array = None
-        schunk = blosc2.open(abspath)
-    else:
-        raise NotImplementedError()
-
-    return array, schunk
-
-
-def chunk_is_available(schunk, nchunk):
-    # Blosc2 flags are at offset 31
-    # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
-    flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
-    return flag != blosc2.SpecialValue.UNINIT.value
-
-
-def iterchunk(chunk):
-    # TODO Yield block by block
-    yield chunk
-
-
-def init_b2nd(metadata, urlpath=None):
-    if urlpath is not None:
-        urlpath.parent.mkdir(exist_ok=True, parents=True)
-        if urlpath.exists():
-            urlpath.unlink()
-
-    dtype = getattr(np, metadata.dtype)
-    return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
-                         chunks=metadata.chunks, blocks=metadata.blocks)
-
-
-def init_b2frame(metadata, urlpath=None):
-    if urlpath is not None:
-        urlpath.parent.mkdir(exist_ok=True, parents=True)
-        if urlpath.exists():
-            urlpath.unlink()
-
-    cparams = metadata.cparams.model_dump()
-    sc = blosc2.SChunk(
-        metadata.chunksize,
-        contiguous=metadata.contiguous,
-        cparams=cparams,
-        dparams={},
-        urlpath=urlpath,
-    )
-    sc.fill_special(metadata.nbytes / metadata.typesize,
-                    special_value=blosc2.SpecialValue.UNINIT)
-    return sc
+from caterva2 import api_utils, b2_utils
 
 
 def get_model_from_obj(obj, model_class, **kwargs):
@@ -173,15 +81,15 @@ def download(host, dataset, params, localpath=None, verbose=False):
     suffix = dataset.suffix
     if suffix == '.b2nd':
         metadata = models.Metadata(**data)
-        array = init_b2nd(metadata, urlpath=localpath)
+        array = b2_utils.init_b2nd(metadata, urlpath=localpath)
         schunk = array.schunk
     elif suffix == '.b2frame':
         metadata = models.SChunk(**data)
-        schunk = init_b2frame(metadata, urlpath=localpath)
+        schunk = b2_utils.init_b2frame(metadata, urlpath=localpath)
         array = None
     else:
         metadata = models.SChunk(**data)
-        schunk = init_b2frame(metadata, urlpath=None)
+        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
         array = None
 
     # Download and update schunk
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index a2a4589a..6fdea281 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -20,7 +20,7 @@
 import uvicorn
 
 # Project
-from caterva2 import utils, api_utils, models
+from caterva2 import utils, api_utils, b2_utils, models
 from caterva2.services import srv_utils
 
 
@@ -63,14 +63,14 @@ def init_b2(abspath, metadata):
     suffix = abspath.suffix
     if suffix == '.b2nd':
         metadata = models.Metadata(**metadata)
-        srv_utils.init_b2nd(metadata, abspath)
+        b2_utils.init_b2nd(metadata, abspath)
     elif suffix == '.b2frame':
         metadata = models.SChunk(**metadata)
-        srv_utils.init_b2frame(metadata, abspath)
+        b2_utils.init_b2frame(metadata, abspath)
     else:
         abspath = pathlib.Path(f'{abspath}.b2')
         metadata = models.SChunk(**metadata)
-        srv_utils.init_b2frame(metadata, abspath)
+        b2_utils.init_b2frame(metadata, abspath)
 
 
 async def updated_dataset(data, topic):
@@ -255,13 +255,13 @@ async def get_download(path: str, nchunk: int, slice_: str = None):
 
     chunk = await partial_download(abspath, nchunk, path, slice_)
     # Stream response
-    downloader = srv_utils.iterchunk(chunk)
+    downloader = b2_utils.iterchunk(chunk)
     return responses.StreamingResponse(downloader)
 
 
 async def partial_download(abspath, nchunk, path, slice_):
     # Build the list of chunks we need to download from the publisher
-    array, schunk = srv_utils.open_b2(abspath)
+    array, schunk = b2_utils.open_b2(abspath)
     if slice_ is None:
         nchunks = [nchunk]
     else:
@@ -282,7 +282,7 @@ async def partial_download(abspath, nchunk, path, slice_):
     lock = locks.setdefault(path, asyncio.Lock())
     async with lock:
         for n in nchunks:
-            if not srv_utils.chunk_is_available(schunk, n):
+            if not b2_utils.chunk_is_available(schunk, n):
                 await download_chunk(path, schunk, n)
     chunk = schunk.get_chunk(nchunk)
     return chunk
@@ -296,13 +296,13 @@ async def fetch_data(path: str, slice_: str = None):
     # Create array/schunk in memory
     suffix = abspath.suffix
     if suffix == '.b2nd':
-        array = srv_utils.init_b2nd(metadata, urlpath=None)
+        array = b2_utils.init_b2nd(metadata, urlpath=None)
         schunk = array.schunk
     elif suffix == '.b2frame':
-        schunk = srv_utils.init_b2frame(metadata, urlpath=None)
+        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
         array = None
     else:
-        schunk = srv_utils.init_b2frame(metadata, urlpath=None)
+        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
         array = None
 
     # Download and update schunk in-memory
@@ -329,7 +329,7 @@ async def fetch_data(path: str, slice_: str = None):
     data = pickle.dumps(data, protocol=-1)
     # TODO: compress data is not working. HTTPX does this automatically?
     # data = zlib.compress(data)
-    downloader = srv_utils.iterchunk(data)
+    downloader = b2_utils.iterchunk(data)
     return responses.StreamingResponse(downloader)
 
 #

From 27967e48301ac8fc200d84842d3ae7a8d10d77e0 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 13:05:56 +0100
Subject: [PATCH 14/38] Move download function into client utilities

Clients are the only users, and it avoids some extra dependencies in services.
---
 caterva2/clients/cli.py        |  5 +--
 caterva2/clients/cli_utils.py  | 71 ++++++++++++++++++++++++++++++++++
 caterva2/services/srv_utils.py | 64 +-----------------------------
 3 files changed, 74 insertions(+), 66 deletions(-)

diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 2bed6f81..5e2f4b9f 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -17,7 +17,6 @@
 
 # Project
 from caterva2 import api_utils
-from caterva2.services import srv_utils
 from caterva2.clients import cli_utils
 
 
@@ -110,7 +109,7 @@ def cmd_info(args):
 def cmd_show(args):
     # Download
     dataset, params = args.dataset
-    array, schunk = srv_utils.download(args.host, dataset, params, verbose=True)
+    array, schunk = cli_utils.download(args.host, dataset, params, verbose=True)
 
     # Display
     if array is None:
@@ -139,7 +138,7 @@ def cmd_download(args):
         localpath = pathlib.Path(f'{localpath}[{slice}]{suffix}')
 
     # Download
-    array, schunk = srv_utils.download(args.host, dataset, params, localpath=localpath, verbose=True)
+    array, schunk = cli_utils.download(args.host, dataset, params, localpath=localpath, verbose=True)
     if suffix not in {'.b2frame', '.b2nd'}:
         with open(localpath, 'wb') as f:
             data = schunk[:]
diff --git a/caterva2/clients/cli_utils.py b/caterva2/clients/cli_utils.py
index fcccb86e..9f13899d 100644
--- a/caterva2/clients/cli_utils.py
+++ b/caterva2/clients/cli_utils.py
@@ -10,6 +10,77 @@
 import argparse
 import logging
 
+# Requirements
+import blosc2
+import httpx
+import tqdm
+
+# Project
+from caterva2 import api_utils, b2_utils, models
+
+
+#
+# Download helper
+#
+
+def download(host, dataset, params, localpath=None, verbose=False):
+    data = api_utils.get(f'http://{host}/api/info/{dataset}')
+
+    # Create array/schunk in memory
+    suffix = dataset.suffix
+    if suffix == '.b2nd':
+        metadata = models.Metadata(**data)
+        array = b2_utils.init_b2nd(metadata, urlpath=localpath)
+        schunk = array.schunk
+    elif suffix == '.b2frame':
+        metadata = models.SChunk(**data)
+        schunk = b2_utils.init_b2frame(metadata, urlpath=localpath)
+        array = None
+    else:
+        metadata = models.SChunk(**data)
+        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
+        array = None
+
+    # Download and update schunk
+    url = f'http://{host}/api/download/{dataset}'
+    iter_chunks = range(schunk.nchunks)
+    if verbose:
+        iter_chunks = tqdm.tqdm(iter_chunks, desc='Downloading', unit='chunk')
+    for nchunk in iter_chunks:
+        params['nchunk'] = nchunk
+        response = httpx.get(url, params=params, timeout=None)
+        response.raise_for_status()
+        chunk = response.read()
+        schunk.update_chunk(nchunk, chunk)
+
+    if 'slice' in params:
+        slice_ = api_utils.parse_slice(params['slice'])
+        if array:
+            if localpath is not None:
+                # We want to save the slice to a file
+                ndarray = array.slice(slice_)  # in memory (compressed)
+                # Remove previous new on-disk array and create a new one
+                ndarray.copy(urlpath=localpath, mode="w", contiguous=True, cparams=schunk.cparams)
+            else:
+                array = array[slice_] if array.ndim > 0 else array[()]
+        else:
+            assert len(slice_) == 1
+            slice_ = slice_[0]
+            if localpath is not None:
+                data = schunk[slice_]
+                # TODO: fix the upstream bug in python-blosc2 that prevents this from working
+                #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
+                blosc2.SChunk(data=data, mode="w", urlpath=localpath,
+                              chunksize=schunk.chunksize,
+                              cparams=schunk.cparams)
+            else:
+                if isinstance(slice_, int):
+                    slice_ = slice(slice_, slice_ + 1)
+                # TODO: make SChunk support integer as slice
+                schunk = schunk[slice_]
+
+    return array, schunk
+
 
 #
 # Command line helpers
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 317a70ae..fe42344e 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -16,12 +16,9 @@
 import blosc2
 import fastapi
 import fastapi_websocket_pubsub
-import httpx
-import tqdm
 
 # Project
-from caterva2 import models
-from caterva2 import api_utils, b2_utils
+from caterva2 import b2_utils, models
 
 
 def get_model_from_obj(obj, model_class, **kwargs):
@@ -74,65 +71,6 @@ def read_metadata(obj):
         raise TypeError(f'unexpected {type(obj)}')
 
 
-def download(host, dataset, params, localpath=None, verbose=False):
-    data = api_utils.get(f'http://{host}/api/info/{dataset}')
-
-    # Create array/schunk in memory
-    suffix = dataset.suffix
-    if suffix == '.b2nd':
-        metadata = models.Metadata(**data)
-        array = b2_utils.init_b2nd(metadata, urlpath=localpath)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        metadata = models.SChunk(**data)
-        schunk = b2_utils.init_b2frame(metadata, urlpath=localpath)
-        array = None
-    else:
-        metadata = models.SChunk(**data)
-        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
-        array = None
-
-    # Download and update schunk
-    url = f'http://{host}/api/download/{dataset}'
-    iter_chunks = range(schunk.nchunks)
-    if verbose:
-        iter_chunks = tqdm.tqdm(iter_chunks, desc='Downloading', unit='chunk')
-    for nchunk in iter_chunks:
-        params['nchunk'] = nchunk
-        response = httpx.get(url, params=params, timeout=None)
-        response.raise_for_status()
-        chunk = response.read()
-        schunk.update_chunk(nchunk, chunk)
-
-    if 'slice' in params:
-        slice_ = api_utils.parse_slice(params['slice'])
-        if array:
-            if localpath is not None:
-                # We want to save the slice to a file
-                ndarray = array.slice(slice_)  # in memory (compressed)
-                # Remove previous new on-disk array and create a new one
-                ndarray.copy(urlpath=localpath, mode="w", contiguous=True, cparams=schunk.cparams)
-            else:
-                array = array[slice_] if array.ndim > 0 else array[()]
-        else:
-            assert len(slice_) == 1
-            slice_ = slice_[0]
-            if localpath is not None:
-                data = schunk[slice_]
-                # TODO: fix the upstream bug in python-blosc2 that prevents this from working
-                #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
-                blosc2.SChunk(data=data, mode="w", urlpath=localpath,
-                              chunksize=schunk.chunksize,
-                              cparams=schunk.cparams)
-            else:
-                if isinstance(slice_, int):
-                    slice_ = slice(slice_, slice_ + 1)
-                # TODO: make SChunk support integer as slice
-                schunk = schunk[slice_]
-
-    return array, schunk
-
-
 #
 # Pub/Sub helpers
 #

From ef528aad1391a1f89f892fc924aabf775c6d9199 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 16:40:34 +0100
Subject: [PATCH 15/38] Have download function caller pass progress report
 function in

So as to decouple reporting from the function itself.

And define tqdm-based reporter right in command-line client.
---
 caterva2/clients/cli.py       | 10 ++++++++--
 caterva2/clients/cli_utils.py |  7 +++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 5e2f4b9f..f178ee06 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -14,6 +14,7 @@
 # Requirements
 import httpx
 import rich
+import tqdm
 
 # Project
 from caterva2 import api_utils
@@ -49,6 +50,9 @@ def url_with_slice(url, slice):
         return f'{url}?slice={args.slice}'
     return url
 
+def chunk_dl_progress(it):
+    return tqdm.tqdm(it, desc='Downloading', unit='chunk')
+
 @handle_errors
 def cmd_roots(args):
     data = api_utils.get(f'http://{args.host}/api/roots')
@@ -109,7 +113,8 @@ def cmd_info(args):
 def cmd_show(args):
     # Download
     dataset, params = args.dataset
-    array, schunk = cli_utils.download(args.host, dataset, params, verbose=True)
+    array, schunk = cli_utils.download(args.host, dataset, params,
+                                       progress=chunk_dl_progress)
 
     # Display
     if array is None:
@@ -138,7 +143,8 @@ def cmd_download(args):
         localpath = pathlib.Path(f'{localpath}[{slice}]{suffix}')
 
     # Download
-    array, schunk = cli_utils.download(args.host, dataset, params, localpath=localpath, verbose=True)
+    array, schunk = cli_utils.download(args.host, dataset, params, localpath=localpath,
+                                       progress=chunk_dl_progress)
     if suffix not in {'.b2frame', '.b2nd'}:
         with open(localpath, 'wb') as f:
             data = schunk[:]
diff --git a/caterva2/clients/cli_utils.py b/caterva2/clients/cli_utils.py
index 9f13899d..58da4db2 100644
--- a/caterva2/clients/cli_utils.py
+++ b/caterva2/clients/cli_utils.py
@@ -13,7 +13,6 @@
 # Requirements
 import blosc2
 import httpx
-import tqdm
 
 # Project
 from caterva2 import api_utils, b2_utils, models
@@ -23,7 +22,7 @@
 # Download helper
 #
 
-def download(host, dataset, params, localpath=None, verbose=False):
+def download(host, dataset, params, localpath=None, progress=None):
     data = api_utils.get(f'http://{host}/api/info/{dataset}')
 
     # Create array/schunk in memory
@@ -44,8 +43,8 @@ def download(host, dataset, params, localpath=None, verbose=False):
     # Download and update schunk
     url = f'http://{host}/api/download/{dataset}'
     iter_chunks = range(schunk.nchunks)
-    if verbose:
-        iter_chunks = tqdm.tqdm(iter_chunks, desc='Downloading', unit='chunk')
+    if progress is not None:
+        iter_chunks = progress(iter_chunks)
     for nchunk in iter_chunks:
         params['nchunk'] = nchunk
         response = httpx.get(url, params=params, timeout=None)

From df70fa069976f41a20dd86c1826cc5b85e1150ac Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 16:42:40 +0100
Subject: [PATCH 16/38] Move dependency on tqdm to clients extra

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 37baf0ef..3cc92169 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,6 @@ classifiers = [
 ]
 dependencies = [
     "blosc2>=2.4.0",  # TODO: try to remove this dependency, and move to services, if possible
-    "tqdm",  # TODO: ditto, but move into clients
     "httpx",
     "numpy",
     "pydantic>=2",  # TODO: ditto
@@ -55,7 +54,7 @@ services = [
     "watchfiles",
 ]
 clients = [
-    # "tqdm",
+    "tqdm",
     "rich",
     "textual",
 ]

From b2ad877a008ab9b9747b88f1449e0258aa8a5337 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 17:16:10 +0100
Subject: [PATCH 17/38] Fix invocation of download function from file API call

Although this may be completely dropped later on.
---
 caterva2/api.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index 5aa3b9ae..705c2447 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -81,7 +81,9 @@ def download(self, index=None):
             path = self.path.with_suffix('')
             path = pathlib.Path(f'{path}[{slice}]{suffix}')
             params = {'slice': slice_}
-        array, schunk = api_utils.download(self.host, path, localpath=path, params=params)
+        # TODO: besides the circular import, cli_utils depends on blosc2 & pydantic.
+        from caterva2.clients import cli_utils
+        array, schunk = cli_utils.download(self.host, path, localpath=path, params=params)
 
         if suffix not in {'.b2frame', '.b2nd'}:
             with open(path, 'wb') as f:

From 4bd9c7b702519cc2166d4a097caa7a256857a5f9 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Wed, 10 Jan 2024 17:17:10 +0100
Subject: [PATCH 18/38] Notes on possible outcome of moving dependencies to
 services/clients

---
 pyproject.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3cc92169..5b05e466 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ classifiers = [
     "Operating System :: Unix",
 ]
 dependencies = [
-    "blosc2>=2.4.0",  # TODO: try to remove this dependency, and move to services, if possible
+    "blosc2>=2.4.0",  # TODO: try to move this dependency to the extras below, if possible
     "httpx",
     "numpy",
     "pydantic>=2",  # TODO: ditto
@@ -49,11 +49,15 @@ services = [
     # "blosc2>=2.4.0",
     "fastapi",
     "fastapi_websocket_pubsub",
+    # "pydantic>=2",  # TODO: ditto
     "safer",
     "uvicorn",
     "watchfiles",
 ]
 clients = [
+    # TODO: try to add these dependencies here, and remove them from caterva2, if possible
+    # "blosc2>=2.4.0",
+    # "pydantic>=2",  # TODO: ditto
     "tqdm",
     "rich",
     "textual",

From 8e1343d6df466c149a2d4c89f5c99f7f0e8390fe Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Fri, 12 Jan 2024 19:19:43 +0100
Subject: [PATCH 19/38] No more dependency of blosc2 (and other) in clients

---
 caterva2/api.py               | 49 ++++++------------
 caterva2/api_utils.py         | 27 +++++++---
 caterva2/clients/cli.py       | 38 ++++----------
 caterva2/clients/cli_utils.py | 71 --------------------------
 caterva2/services/sub.py      | 95 +++++++++++++++++++----------------
 caterva2/tests/test_api.py    | 47 ++++++++++++++---
 6 files changed, 142 insertions(+), 185 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index 705c2447..d95035a5 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -17,13 +17,13 @@
 sub_host_default = 'localhost:8002'
 
 
-def slice_to_string(indexes):
-    if indexes is None:
-        return None
+def slice_to_string(key):
+    if key is None or key == () or key == slice(None):
+        return ''
     slice_parts = []
-    if not isinstance(indexes, tuple):
-        indexes = (indexes,)
-    for index in indexes:
+    if not isinstance(key, tuple):
+        key = (key,)
+    for index in key:
         if isinstance(index, int):
             slice_parts.append(str(index))
         elif isinstance(index, slice):
@@ -31,8 +31,8 @@ def slice_to_string(indexes):
             stop = index.stop or ''
             if index.step not in (1, None):
                 raise IndexError('Only step=1 is supported')
-            step = index.step or ''
-            slice_parts.append(f"{start}:{stop}:{step}")
+            # step = index.step or ''
+            slice_parts.append(f"{start}:{stop}")
     return ", ".join(slice_parts)
 
 
@@ -71,28 +71,11 @@ def __init__(self, name, root, host):
     def __repr__(self):
         return f'<File: {self.path}>'
 
-    def download(self, index=None):
-        path = self.path
-        suffix = self.path.suffix
-
-        slice_ = slice_to_string(index)
-        params = {}
-        if slice_:
-            path = self.path.with_suffix('')
-            path = pathlib.Path(f'{path}[{slice}]{suffix}')
-            params = {'slice': slice_}
-        # TODO: besides the circular import, cli_utils depends on blosc2 & pydantic.
-        from caterva2.clients import cli_utils
-        array, schunk = cli_utils.download(self.host, path, localpath=path, params=params)
-
-        if suffix not in {'.b2frame', '.b2nd'}:
-            with open(path, 'wb') as f:
-                data = schunk[:]
-                f.write(data)
-
-        # TODO: how to support downloading on a browser?
-        raise NotImplementedError("TODO: how to support downloading on a browser?")
-        # return path
+    def download(self, key=None):
+        slice_ = slice_to_string(key)
+        download_path = api_utils.download(
+            self.host, self.path, {'slice_': slice_, 'download': True})
+        return download_path
 
 
 class Dataset(File):
@@ -103,7 +86,7 @@ def __init__(self, name, root, host):
     def __repr__(self):
         return f'<Dataset: {self.path}>'
 
-    def __getitem__(self, indexes):
-        slice_ = slice_to_string(indexes)
-        data = api_utils.fetch_data(self.host, self.path, {'slice_': slice_})
+    def __getitem__(self, key):
+        slice_ = slice_to_string(key)
+        data = api_utils.download(self.host, self.path, {'slice_': slice_})
         return data
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 3ea05ef2..64329e47 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -6,6 +6,7 @@
 # License: GNU Affero General Public License v3.0
 # See LICENSE.txt for details about copyright and rights to use.
 ###############################################################################
+import pathlib
 import pickle
 
 # Requirements
@@ -15,7 +16,7 @@
 
 def parse_slice(string):
     if not string:
-        return ()
+        return None
     obj = []
     for segment in string.split(','):
         if ':' not in segment:
@@ -28,13 +29,27 @@ def parse_slice(string):
     return tuple(obj)
 
 
-def fetch_data(host, path, params):
-    response = httpx.get(f'http://{host}/api/fetch/{path}', params=params)
+def download(host, path, params):
+    response = httpx.get(f'http://{host}/api/download/{path}', params=params)
     response.raise_for_status()
     data = response.content
-    # TODO: decompression is not working yet. HTTPX does this automatically?
-    # data = zlib.decompress(data)
-    return pickle.loads(data)
+    download = params.get('download', False)
+    slice_ = params.get('slice_', None)
+    if not download:
+        # TODO: decompression is not working yet. HTTPX does this automatically?
+        # data = zlib.decompress(data)
+        return pickle.loads(data)
+    else:
+        path = pathlib.Path(path)
+        if slice_:
+            suffix = path.suffix
+            path = path.with_suffix('')
+            path = pathlib.Path(f'{path}[{slice_}]{suffix}')
+        # TODO: save chunk by chunk
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, 'wb') as f:
+            f.write(data)
+        return path
 
 #
 # HTTP client helpers
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index f178ee06..89a25d10 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -111,46 +111,30 @@ def cmd_info(args):
 
 @handle_errors
 def cmd_show(args):
-    # Download
     dataset, params = args.dataset
-    array, schunk = cli_utils.download(args.host, dataset, params,
-                                       progress=chunk_dl_progress)
+    data = api_utils.download(args.host, dataset, params)
+                              # TODO: support tqdm again
+                              # progress=chunk_dl_progress)
 
     # Display
-    if array is None:
-        data = schunk[:]  # byte string
+    if isinstance(data, bytes):
         try:
             print(data.decode())
         except UnicodeDecodeError:
             print('Binary data')
     else:
-        data = array[:] if array.ndim > 0 else array[()]
         print(data)
 
 @handle_errors
 def cmd_download(args):
-    # localpath
     dataset, params = args.dataset
-    output_dir = args.output_dir.resolve()
-    localpath = output_dir / dataset
-    localpath.parent.mkdir(exist_ok=True, parents=True)
-
-    suffix = localpath.suffix
-
-    slice = params.get('slice')
-    if slice:
-        localpath = localpath.with_suffix('')
-        localpath = pathlib.Path(f'{localpath}[{slice}]{suffix}')
-
-    # Download
-    array, schunk = cli_utils.download(args.host, dataset, params, localpath=localpath,
-                                       progress=chunk_dl_progress)
-    if suffix not in {'.b2frame', '.b2nd'}:
-        with open(localpath, 'wb') as f:
-            data = schunk[:]
-            f.write(data)
-
-    print(f'Dataset saved to {localpath}')
+    params['download'] = True
+    path = api_utils.download(args.host, dataset, params)
+                              # TODO: support tqdm again
+                              # progress=chunk_dl_progress)
+
+    print(f'Dataset saved to {path}')
+
 
 if __name__ == '__main__':
     parser = cli_utils.get_parser()
diff --git a/caterva2/clients/cli_utils.py b/caterva2/clients/cli_utils.py
index 58da4db2..9f18b6ec 100644
--- a/caterva2/clients/cli_utils.py
+++ b/caterva2/clients/cli_utils.py
@@ -10,77 +10,6 @@
 import argparse
 import logging
 
-# Requirements
-import blosc2
-import httpx
-
-# Project
-from caterva2 import api_utils, b2_utils, models
-
-
-#
-# Download helper
-#
-
-def download(host, dataset, params, localpath=None, progress=None):
-    data = api_utils.get(f'http://{host}/api/info/{dataset}')
-
-    # Create array/schunk in memory
-    suffix = dataset.suffix
-    if suffix == '.b2nd':
-        metadata = models.Metadata(**data)
-        array = b2_utils.init_b2nd(metadata, urlpath=localpath)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        metadata = models.SChunk(**data)
-        schunk = b2_utils.init_b2frame(metadata, urlpath=localpath)
-        array = None
-    else:
-        metadata = models.SChunk(**data)
-        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
-        array = None
-
-    # Download and update schunk
-    url = f'http://{host}/api/download/{dataset}'
-    iter_chunks = range(schunk.nchunks)
-    if progress is not None:
-        iter_chunks = progress(iter_chunks)
-    for nchunk in iter_chunks:
-        params['nchunk'] = nchunk
-        response = httpx.get(url, params=params, timeout=None)
-        response.raise_for_status()
-        chunk = response.read()
-        schunk.update_chunk(nchunk, chunk)
-
-    if 'slice' in params:
-        slice_ = api_utils.parse_slice(params['slice'])
-        if array:
-            if localpath is not None:
-                # We want to save the slice to a file
-                ndarray = array.slice(slice_)  # in memory (compressed)
-                # Remove previous new on-disk array and create a new one
-                ndarray.copy(urlpath=localpath, mode="w", contiguous=True, cparams=schunk.cparams)
-            else:
-                array = array[slice_] if array.ndim > 0 else array[()]
-        else:
-            assert len(slice_) == 1
-            slice_ = slice_[0]
-            if localpath is not None:
-                data = schunk[slice_]
-                # TODO: fix the upstream bug in python-blosc2 that prevents this from working
-                #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
-                blosc2.SChunk(data=data, mode="w", urlpath=localpath,
-                              chunksize=schunk.chunksize,
-                              cparams=schunk.cparams)
-            else:
-                if isinstance(slice_, int):
-                    slice_ = slice(slice_, slice_ + 1)
-                # TODO: make SChunk support integer as slice
-                schunk = schunk[slice_]
-
-    return array, schunk
-
-
 #
 # Command line helpers
 #
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 6fdea281..d49c28e3 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -249,22 +249,10 @@ async def get_info(path: str):
     return srv_utils.read_metadata(abspath)
 
 
-@app.get('/api/download/{path:path}')
-async def get_download(path: str, nchunk: int, slice_: str = None):
-    abspath = lookup_path(path)
-
-    chunk = await partial_download(abspath, nchunk, path, slice_)
-    # Stream response
-    downloader = b2_utils.iterchunk(chunk)
-    return responses.StreamingResponse(downloader)
-
-
-async def partial_download(abspath, nchunk, path, slice_):
+async def partial_download(abspath, path, slice_):
     # Build the list of chunks we need to download from the publisher
     array, schunk = b2_utils.open_b2(abspath)
-    if slice_ is None:
-        nchunks = [nchunk]
-    else:
+    if slice_:
         slice_obj = api_utils.parse_slice(slice_)
         if not array:
             if isinstance(slice_obj[0], slice):
@@ -278,54 +266,77 @@ async def partial_download(abspath, nchunk, path, slice_):
             nchunks = blosc2.get_slice_nchunks(schunk, (start, stop))
         else:
             nchunks = blosc2.get_slice_nchunks(array, slice_obj)
+    else:
+        nchunks = range(schunk.nchunks)
+
     # Fetch the chunks
     lock = locks.setdefault(path, asyncio.Lock())
     async with lock:
         for n in nchunks:
             if not b2_utils.chunk_is_available(schunk, n):
                 await download_chunk(path, schunk, n)
-    chunk = schunk.get_chunk(nchunk)
-    return chunk
 
 
-@app.get('/api/fetch/{path:path}')
-async def fetch_data(path: str, slice_: str = None):
+@app.get('/api/download/{path:path}')
+async def download_data(path: str, slice_: str = None, download: bool = False):
     abspath = lookup_path(path)
-    metadata = srv_utils.read_metadata(abspath)
-
-    # Create array/schunk in memory
     suffix = abspath.suffix
-    if suffix == '.b2nd':
-        array = b2_utils.init_b2nd(metadata, urlpath=None)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
-        array = None
-    else:
-        schunk = b2_utils.init_b2frame(metadata, urlpath=None)
-        array = None
 
-    # Download and update schunk in-memory
-    for nchunk in range(schunk.nchunks):
-        chunk = await partial_download(abspath, nchunk, path, slice_)
-        schunk.update_chunk(nchunk, chunk)
+    # Download and update the schunk in cache
+    await partial_download(abspath, path, slice_)
+
+    download_path = None
+    if download:
+        # Let's store the data in the downloads directory
+        download_path = cache / pathlib.Path('downloads') / pathlib.Path(path)
+        if slice_:
+            download_path = download_path.with_suffix('')
+            download_path = pathlib.Path(f'{download_path}[{slice_}]{suffix}')
+        else:
+            # By here, we already have the complete schunk in cache
+            download_path = abspath
+        download_path.parent.mkdir(parents=True, exist_ok=True)
 
+    # Interesting data has been downloaded, let's use it
+    array, schunk = b2_utils.open_b2(abspath)
+    slice_ = api_utils.parse_slice(slice_)
     if slice_:
-        # Additional massage for slices
-        slice_ = api_utils.parse_slice(slice_)
         if array:
-            array = array[slice_] if array.ndim > 0 else array[()]
+            if download_path:
+                # We want to save the slice to a file
+                array.slice(slice_, urlpath=download_path, mode="w", contiguous=True,
+                            cparams=schunk.cparams)
+            else:
+                array = array[slice_] if array.ndim > 0 else array[()]
         else:
             assert len(slice_) == 1
             slice_ = slice_[0]
             if isinstance(slice_, int):
+                # TODO: make SChunk support integer as slice
                 slice_ = slice(slice_, slice_ + 1)
-            # TODO: make SChunk support integer as slice
-            schunk = schunk[slice_]
-
+            if download_path:
+                # TODO: fix the upstream bug in python-blosc2 that prevents this from working
+                #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
+                blosc2.SChunk(data=schunk[slice_], mode="w", urlpath=download_path,
+                              chunksize=schunk.chunksize,
+                              cparams=schunk.cparams)
+            else:
+                schunk = schunk[slice_]
+
+    if download:
+        if suffix == '.b2':
+            # Decompress before delivering
+            # TODO: support context manager in blosc2.open()
+            schunk = blosc2.open(download_path, 'wb')
+            data = schunk[:]
+            downloader = b2_utils.iterchunk(data)
+            return responses.StreamingResponse(downloader)
+        return responses.FileResponse(download_path)
+
+    # Pickle and stream response of the NumPy array
     data = array if array is not None else schunk
-
-    # Pickle and stream response
+    if not slice_:
+        data = data[:]
     data = pickle.dumps(data, protocol=-1)
     # TODO: compress data is not working. HTTPX does this automatically?
     # data = zlib.compress(data)
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index 8f0601fb..bb8f5768 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -99,7 +99,7 @@ def test_dataset_nd(name, services, examples_dir):
         assert str(e_info.value) == 'Only step=1 is supported'
 
 @pytest.mark.parametrize("name", ['ds-1d.b2nd', 'dir1/ds-2d.b2nd'])
-def _test_download_b2nd(name, services, examples_dir):
+def test_download_b2nd(name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot[name]
     dsd = ds.download()
@@ -110,9 +110,28 @@ def _test_download_b2nd(name, services, examples_dir):
     a = blosc2.open(example)
     b = blosc2.open(dsd)
     np.testing.assert_array_equal(a[:], b[:])
-    os.unlink(dsd)
+    # os.unlink(dsd)
 
-def _test_download_b2frame(services, examples_dir):
+# TODO: test slices that exceed the array dimensions
+@pytest.mark.parametrize("slice_", [slice(1,10), slice(4,8), slice(None), 1])
+@pytest.mark.parametrize("name", ['ds-1d.b2nd', 'dir1/ds-2d.b2nd'])
+def test_download_b2nd_slice(slice_, name, services, examples_dir):
+    myroot = cat2.Root(published_root, host=cat2.sub_host_default)
+    ds = myroot[name]
+    dsd = ds.download(slice_)
+    #assert dsd == ds.path
+
+    # Data contents
+    example = examples_dir / name
+    a = blosc2.open(example)
+    b = blosc2.open(dsd)
+    if isinstance(slice_, int):
+        np.testing.assert_array_equal(a[slice_], b[()])
+    else:
+        np.testing.assert_array_equal(a[slice_], b[:])
+    # os.unlink(dsd)
+
+def test_download_b2frame(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
     dsd = ds.download()
@@ -123,9 +142,25 @@ def _test_download_b2frame(services, examples_dir):
     a = blosc2.open(example)
     b = blosc2.open(dsd)
     assert a[:] == b[:]
-    os.unlink(dsd)
+    # os.unlink(dsd)
+
+# TODO: add an integer slice test when it is supported in blosc2
+@pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
+def test_download_b2frame_slice(slice_, services, examples_dir):
+    myroot = cat2.Root(published_root, host=cat2.sub_host_default)
+    ds = myroot['ds-hello.b2frame']
+    dsd = ds.download(slice_)
+    # TODO: fix the test below
+    # assert dsd == ds.path
+
+    # Data contents
+    example = examples_dir / ds.name
+    a = blosc2.open(example)
+    b = blosc2.open(dsd)
+    assert a[slice_] == b[:]
+    # os.unlink(dsd)
 
-def _test_download_regular_file(services, examples_dir):
+def test_download_regular_file(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['README.md']
     dsd = ds.download()
@@ -136,4 +171,4 @@ def _test_download_regular_file(services, examples_dir):
     a = open(example).read()
     b = open(dsd).read()
     assert a[:] == b[:]
-    os.unlink(dsd)
+    # os.unlink(dsd)

From 5db802b3e9d9f58bb12101c363ef1912366f3e0e Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Sat, 13 Jan 2024 08:52:34 +0100
Subject: [PATCH 20/38] Mount sub cache files in /files

---
 caterva2/services/sub.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index d49c28e3..4dff2405 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -16,6 +16,7 @@
 # Requirements
 import blosc2
 from fastapi import FastAPI, responses
+from fastapi.staticfiles import StaticFiles
 import httpx
 import uvicorn
 
@@ -359,6 +360,7 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
     statedir = args.statedir.resolve()
     cache = statedir / 'cache'
     cache.mkdir(exist_ok=True, parents=True)
+    app.mount("/files", StaticFiles(directory=cache), name="files")
 
     # Init database
     model = models.Subscriber(roots={}, etags={})

From 1cb6517f2c9770e240d58c200caec3edcb219c1d Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Sun, 14 Jan 2024 07:19:21 +0100
Subject: [PATCH 21/38] Introduced a 2-step download. 1) get url, 2) download
 from url

---
 caterva2/api.py            | 32 ++++---------
 caterva2/api_utils.py      | 61 +++++++++++++++++-------
 caterva2/clients/cli.py    |  4 +-
 caterva2/services/sub.py   | 12 +++--
 caterva2/tests/test_api.py | 97 +++++++++++++++++++++++++++++---------
 5 files changed, 138 insertions(+), 68 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index d95035a5..83183c69 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -17,25 +17,6 @@
 sub_host_default = 'localhost:8002'
 
 
-def slice_to_string(key):
-    if key is None or key == () or key == slice(None):
-        return ''
-    slice_parts = []
-    if not isinstance(key, tuple):
-        key = (key,)
-    for index in key:
-        if isinstance(index, int):
-            slice_parts.append(str(index))
-        elif isinstance(index, slice):
-            start = index.start or ''
-            stop = index.stop or ''
-            if index.step not in (1, None):
-                raise IndexError('Only step=1 is supported')
-            # step = index.step or ''
-            slice_parts.append(f"{start}:{stop}")
-    return ", ".join(slice_parts)
-
-
 def get_roots(host=sub_host_default):
     return api_utils.get(f'http://{host}/api/roots')
 
@@ -71,12 +52,15 @@ def __init__(self, name, root, host):
     def __repr__(self):
         return f'<File: {self.path}>'
 
-    def download(self, key=None):
-        slice_ = slice_to_string(key)
-        download_path = api_utils.download(
+    def get_download_url(self, key=None):
+        slice_ = api_utils.slice_to_string(key)
+        download_path = api_utils.get_download_url(
             self.host, self.path, {'slice_': slice_, 'download': True})
         return download_path
 
+    def download(self, key=None):
+        url = self.get_download_url(key)
+        return api_utils.download_url(url, self.path)
 
 class Dataset(File):
     def __init__(self, name, root, host):
@@ -87,6 +71,6 @@ def __repr__(self):
         return f'<Dataset: {self.path}>'
 
     def __getitem__(self, key):
-        slice_ = slice_to_string(key)
-        data = api_utils.download(self.host, self.path, {'slice_': slice_})
+        slice_ = api_utils.slice_to_string(key)
+        data = api_utils.get_download_url(self.host, self.path, {'slice_': slice_})
         return data
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 64329e47..12591202 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -13,6 +13,24 @@
 import httpx
 
 
+def slice_to_string(key):
+    if key is None or key == () or key == slice(None):
+        return ''
+    slice_parts = []
+    if not isinstance(key, tuple):
+        key = (key,)
+    for index in key:
+        if isinstance(index, int):
+            slice_parts.append(str(index))
+        elif isinstance(index, slice):
+            start = index.start or ''
+            stop = index.stop or ''
+            if index.step not in (1, None):
+                raise IndexError('Only step=1 is supported')
+            # step = index.step or ''
+            slice_parts.append(f"{start}:{stop}")
+    return ", ".join(slice_parts)
+
 
 def parse_slice(string):
     if not string:
@@ -29,27 +47,38 @@ def parse_slice(string):
     return tuple(obj)
 
 
-def download(host, path, params):
+def get_download_url(host, path, params):
     response = httpx.get(f'http://{host}/api/download/{path}', params=params)
     response.raise_for_status()
-    data = response.content
-    download = params.get('download', False)
-    slice_ = params.get('slice_', None)
-    if not download:
+
+    download_ = params.get('download', False)
+    if not download_:
+        data = response.content
         # TODO: decompression is not working yet. HTTPX does this automatically?
         # data = zlib.decompress(data)
         return pickle.loads(data)
-    else:
-        path = pathlib.Path(path)
-        if slice_:
-            suffix = path.suffix
-            path = path.with_suffix('')
-            path = pathlib.Path(f'{path}[{slice_}]{suffix}')
-        # TODO: save chunk by chunk
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with open(path, 'wb') as f:
-            f.write(data)
-        return path
+
+    path = pathlib.Path(path)
+    suffix = path.suffix
+    slice_ = params.get('slice_', None)
+    if slice_:
+        path = 'downloads' / path.with_suffix('')
+        path = pathlib.Path(f'{path}[{slice_}]{suffix}')
+    elif suffix not in ('.b2frame', '.b2nd'):
+        # Other suffixes are to be found decompressed in the downloads folder
+        path = 'downloads' / path
+
+    return f'http://{host}/files/{path}'
+
+def download_url(url, path):
+    # Store the file locally
+    with httpx.stream("GET", url) as r:
+        r.raise_for_status()
+        with open(path, "wb") as f:
+            for data in r.iter_bytes():
+                f.write(data)
+    return path
+
 
 #
 # HTTP client helpers
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 89a25d10..5e95cb7d 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -112,7 +112,7 @@ def cmd_info(args):
 @handle_errors
 def cmd_show(args):
     dataset, params = args.dataset
-    data = api_utils.download(args.host, dataset, params)
+    data = api_utils.get_download_url(args.host, dataset, params)
                               # TODO: support tqdm again
                               # progress=chunk_dl_progress)
 
@@ -129,7 +129,7 @@ def cmd_show(args):
 def cmd_download(args):
     dataset, params = args.dataset
     params['download'] = True
-    path = api_utils.download(args.host, dataset, params)
+    path = api_utils.get_download_url(args.host, dataset, params)
                               # TODO: support tqdm again
                               # progress=chunk_dl_progress)
 
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 4dff2405..aedc4271 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -289,8 +289,8 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
     download_path = None
     if download:
         # Let's store the data in the downloads directory
-        download_path = cache / pathlib.Path('downloads') / pathlib.Path(path)
         if slice_:
+            download_path = cache / pathlib.Path('downloads') / pathlib.Path(path)
             download_path = download_path.with_suffix('')
             download_path = pathlib.Path(f'{download_path}[{slice_}]{suffix}')
         else:
@@ -330,9 +330,13 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
             # TODO: support context manager in blosc2.open()
             schunk = blosc2.open(download_path, 'wb')
             data = schunk[:]
-            downloader = b2_utils.iterchunk(data)
-            return responses.StreamingResponse(downloader)
-        return responses.FileResponse(download_path)
+            # Remove the .b2 extension, and save the data in the downloads directory
+            download_path = cache / pathlib.Path('downloads') / pathlib.Path(path)
+            with open(download_path, 'wb') as f:
+                f.write(data)
+        # We don't need to return anything, the file is already in the static files/
+        # directory and the client can download it from there.
+        return
 
     # Pickle and stream response of the NumPy array
     data = array if array is not None else schunk
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index bb8f5768..40badc6f 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -6,7 +6,9 @@
 # License: GNU Affero General Public License v3.0
 # See LICENSE.txt for details about copyright and rights to use.
 ###############################################################################
-import os
+import pathlib
+
+import httpx
 
 import blosc2
 import pytest
@@ -15,6 +17,18 @@
 import numpy as np
 
 from .services import TEST_PUBLISHED_ROOT as published_root
+from .. import api_utils
+
+
+def my_urlpath(ds, slice_):
+    path = pathlib.Path(ds.path)
+    suffix = path.suffix
+    slice2 = api_utils.slice_to_string(slice_)
+    if slice2:
+        path = 'downloads' / path.with_suffix('')
+        path = pathlib.Path(f'{path}[{slice2}]{suffix}')
+    path = f"http://{cat2.sub_host_default}/files/{path}"
+    return path
 
 
 def test_roots(services):
@@ -102,15 +116,22 @@ def test_dataset_nd(name, services, examples_dir):
 def test_download_b2nd(name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot[name]
-    dsd = ds.download()
-    assert dsd == ds.path
+    path = ds.download()
+    assert path == ds.path
 
     # Data contents
     example = examples_dir / name
     a = blosc2.open(example)
-    b = blosc2.open(dsd)
+    b = blosc2.open(path)
+    np.testing.assert_array_equal(a[:], b[:])
+
+    # Using 2-step download
+    urlpath = ds.get_download_url()
+    assert urlpath == my_urlpath(ds, None)
+    data = httpx.get(urlpath)
+    assert data.status_code == 200
+    b = blosc2.ndarray_from_cframe(data.content)
     np.testing.assert_array_equal(a[:], b[:])
-    # os.unlink(dsd)
 
 # TODO: test slices that exceed the array dimensions
 @pytest.mark.parametrize("slice_", [slice(1,10), slice(4,8), slice(None), 1])
@@ -118,57 +139,89 @@ def test_download_b2nd(name, services, examples_dir):
 def test_download_b2nd_slice(slice_, name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot[name]
-    dsd = ds.download(slice_)
-    #assert dsd == ds.path
+    path = ds.download(slice_)
+    assert path == ds.path
 
     # Data contents
     example = examples_dir / name
     a = blosc2.open(example)
-    b = blosc2.open(dsd)
+    b = blosc2.open(path)
+    if isinstance(slice_, int):
+        np.testing.assert_array_equal(a[slice_], b[()])
+    else:
+        np.testing.assert_array_equal(a[slice_], b[:])
+
+    # Using 2-step download
+    urlpath = ds.get_download_url(slice_)
+    path = my_urlpath(ds, slice_)
+    assert urlpath == path
+    data = httpx.get(urlpath)
+    assert data.status_code == 200
+    b = blosc2.ndarray_from_cframe(data.content)
     if isinstance(slice_, int):
         np.testing.assert_array_equal(a[slice_], b[()])
     else:
         np.testing.assert_array_equal(a[slice_], b[:])
-    # os.unlink(dsd)
 
 def test_download_b2frame(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
-    dsd = ds.download()
-    assert dsd == ds.path
+    path = ds.download()
+    assert path == ds.path
 
     # Data contents
     example = examples_dir / ds.name
     a = blosc2.open(example)
-    b = blosc2.open(dsd)
+    b = blosc2.open(path)
+    assert a[:] == b[:]
+
+    # Using 2-step download
+    urlpath = ds.get_download_url()
+    assert urlpath == f"http://{cat2.sub_host_default}/files/{ds.path}"
+    data = httpx.get(urlpath)
+    assert data.status_code == 200
+    b = blosc2.schunk_from_cframe(data.content)
     assert a[:] == b[:]
-    # os.unlink(dsd)
 
 # TODO: add an integer slice test when it is supported in blosc2
 @pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
 def test_download_b2frame_slice(slice_, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
-    dsd = ds.download(slice_)
-    # TODO: fix the test below
-    # assert dsd == ds.path
+    path = ds.download(slice_)
+    assert path == ds.path
 
     # Data contents
     example = examples_dir / ds.name
     a = blosc2.open(example)
-    b = blosc2.open(dsd)
+    b = blosc2.open(path)
+    assert a[slice_] == b[:]
+
+    # Using 2-step download
+    urlpath = ds.get_download_url(slice_)
+    path = my_urlpath(ds, slice_)
+    assert urlpath == path
+    data = httpx.get(urlpath)
+    assert data.status_code == 200
+    b = blosc2.schunk_from_cframe(data.content)
     assert a[slice_] == b[:]
-    # os.unlink(dsd)
 
 def test_download_regular_file(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['README.md']
-    dsd = ds.download()
-    assert dsd == ds.path
+    path = ds.download()
+    assert path == ds.path
 
     # Data contents
     example = examples_dir / ds.name
     a = open(example).read()
-    b = open(dsd).read()
+    b = open(path).read()
+    assert a[:] == b[:]
+
+    # Using 2-step download
+    urlpath = ds.get_download_url()
+    assert urlpath == f"http://{cat2.sub_host_default}/files/downloads/{ds.path}"
+    data = httpx.get(urlpath)
+    assert data.status_code == 200
+    b = data.content.decode()
     assert a[:] == b[:]
-    # os.unlink(dsd)

From 2ad5bfe9b980e0c74c7dc02735570017f433cbaf Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Sun, 14 Jan 2024 07:32:32 +0100
Subject: [PATCH 22/38] Make sure intermediate dirs are created

---
 caterva2/api_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 12591202..c31cf7b3 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -74,6 +74,7 @@ def download_url(url, path):
     # Store the file locally
     with httpx.stream("GET", url) as r:
         r.raise_for_status()
+        path.parent.mkdir(parents=True, exist_ok=True)
         with open(path, "wb") as f:
             for data in r.iter_bytes():
                 f.write(data)

From 22d0b5985ce25c6ec7ab82556069933135ee706c Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Sun, 14 Jan 2024 07:45:44 +0100
Subject: [PATCH 23/38] b2_utils.py merged into srv_utils.py

---
 caterva2/b2_utils.py           | 105 ---------------------------------
 caterva2/services/pub.py       |   6 +-
 caterva2/services/srv_utils.py |  93 ++++++++++++++++++++++++++++-
 caterva2/services/sub.py       |  16 ++---
 4 files changed, 103 insertions(+), 117 deletions(-)
 delete mode 100644 caterva2/b2_utils.py

diff --git a/caterva2/b2_utils.py b/caterva2/b2_utils.py
deleted file mode 100644
index d4126483..00000000
--- a/caterva2/b2_utils.py
+++ /dev/null
@@ -1,105 +0,0 @@
-###############################################################################
-# Caterva2 - On demand access to remote Blosc2 data repositories
-#
-# Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
-# https://www.blosc.org
-# License: GNU Affero General Public License v3.0
-# See LICENSE.txt for details about copyright and rights to use.
-###############################################################################
-
-import pathlib
-
-# Requirements
-import blosc2
-import numpy as np
-
-
-#
-# Blosc2 related functions
-#
-
-def compress(data, dst=None):
-    assert isinstance(data, (bytes, pathlib.Path))
-
-    if dst is not None:
-        dst.parent.mkdir(exist_ok=True, parents=True)
-        if dst.exists():
-            dst.unlink()
-
-    # Create schunk
-    cparams = {}
-    dparams = {}
-    storage = {
-        'urlpath': dst,
-        'cparams': cparams,
-        'dparams': dparams,
-    }
-    schunk = blosc2.SChunk(**storage)
-
-    # Append data
-    if isinstance(data, pathlib.Path):
-        with open(data, 'rb') as f:
-            data = f.read()
-
-    schunk.append_data(data)
-
-    return schunk
-
-
-def init_b2nd(metadata, urlpath=None):
-    if urlpath is not None:
-        urlpath.parent.mkdir(exist_ok=True, parents=True)
-        if urlpath.exists():
-            urlpath.unlink()
-
-    dtype = getattr(np, metadata.dtype)
-    return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
-                         chunks=metadata.chunks, blocks=metadata.blocks)
-
-
-def init_b2frame(metadata, urlpath=None):
-    if urlpath is not None:
-        urlpath.parent.mkdir(exist_ok=True, parents=True)
-        if urlpath.exists():
-            urlpath.unlink()
-
-    cparams = metadata.cparams.model_dump()
-    sc = blosc2.SChunk(
-        metadata.chunksize,
-        contiguous=metadata.contiguous,
-        cparams=cparams,
-        dparams={},
-        urlpath=urlpath,
-    )
-    sc.fill_special(metadata.nbytes / metadata.typesize,
-                    special_value=blosc2.SpecialValue.UNINIT)
-    return sc
-
-
-def open_b2(abspath):
-    suffix = abspath.suffix
-    if suffix == '.b2nd':
-        array = blosc2.open(abspath)
-        schunk = array.schunk
-    elif suffix == '.b2frame':
-        array = None
-        schunk = blosc2.open(abspath)
-    elif suffix == '.b2':
-        array = None
-        schunk = blosc2.open(abspath)
-    else:
-        raise NotImplementedError()
-
-    return array, schunk
-
-
-def chunk_is_available(schunk, nchunk):
-    # Blosc2 flags are at offset 31
-    # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
-    flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
-    return flag != blosc2.SpecialValue.UNINIT.value
-
-
-def iterchunk(chunk):
-    # TODO Yield block by block
-    yield chunk
diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 99dcbef1..5bd5b7a2 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -20,7 +20,7 @@
 from watchfiles import awatch
 
 # Project
-from caterva2 import utils, api_utils, b2_utils, models
+from caterva2 import utils, api_utils, models
 from caterva2.services import srv_utils
 
 
@@ -57,7 +57,7 @@ async def worker(queue):
                 else:
                     # Compress regular files in publisher's cache
                     b2path = cache / f'{relpath}.b2'
-                    b2_utils.compress(abspath, b2path)
+                    srv_utils.compress(abspath, b2path)
                     metadata = srv_utils.read_metadata(b2path)
 
                 # Publish
@@ -179,7 +179,7 @@ async def get_download(path: str, nchunk: int = -1):
         schunk = blosc2.open(b2path)
 
     chunk = schunk.get_chunk(nchunk)
-    downloader = b2_utils.iterchunk(chunk)
+    downloader = srv_utils.iterchunk(chunk)
 
     return responses.StreamingResponse(downloader)
 
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index fe42344e..017d417e 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -16,9 +16,10 @@
 import blosc2
 import fastapi
 import fastapi_websocket_pubsub
+import numpy as np
 
 # Project
-from caterva2 import b2_utils, models
+from caterva2 import models
 
 
 def get_model_from_obj(obj, model_class, **kwargs):
@@ -111,6 +112,96 @@ def get_abspath(root, path):
 
     return abspath
 
+#
+# Blosc2 related helpers
+#
+
+def compress(data, dst=None):
+    assert isinstance(data, (bytes, pathlib.Path))
+
+    if dst is not None:
+        dst.parent.mkdir(exist_ok=True, parents=True)
+        if dst.exists():
+            dst.unlink()
+
+    # Create schunk
+    cparams = {}
+    dparams = {}
+    storage = {
+        'urlpath': dst,
+        'cparams': cparams,
+        'dparams': dparams,
+    }
+    schunk = blosc2.SChunk(**storage)
+
+    # Append data
+    if isinstance(data, pathlib.Path):
+        with open(data, 'rb') as f:
+            data = f.read()
+
+    schunk.append_data(data)
+
+    return schunk
+
+
+def init_b2nd(metadata, urlpath=None):
+    if urlpath is not None:
+        urlpath.parent.mkdir(exist_ok=True, parents=True)
+        if urlpath.exists():
+            urlpath.unlink()
+
+    dtype = getattr(np, metadata.dtype)
+    return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
+                         chunks=metadata.chunks, blocks=metadata.blocks)
+
+
+def init_b2frame(metadata, urlpath=None):
+    if urlpath is not None:
+        urlpath.parent.mkdir(exist_ok=True, parents=True)
+        if urlpath.exists():
+            urlpath.unlink()
+
+    cparams = metadata.cparams.model_dump()
+    sc = blosc2.SChunk(
+        metadata.chunksize,
+        contiguous=metadata.contiguous,
+        cparams=cparams,
+        dparams={},
+        urlpath=urlpath,
+    )
+    sc.fill_special(metadata.nbytes / metadata.typesize,
+                    special_value=blosc2.SpecialValue.UNINIT)
+    return sc
+
+
+def open_b2(abspath):
+    suffix = abspath.suffix
+    if suffix == '.b2nd':
+        array = blosc2.open(abspath)
+        schunk = array.schunk
+    elif suffix == '.b2frame':
+        array = None
+        schunk = blosc2.open(abspath)
+    elif suffix == '.b2':
+        array = None
+        schunk = blosc2.open(abspath)
+    else:
+        raise NotImplementedError()
+
+    return array, schunk
+
+
+def chunk_is_available(schunk, nchunk):
+    # Blosc2 flags are at offset 31
+    # (see https://github.com/Blosc/c-blosc2/blob/main/README_CHUNK_FORMAT.rst)
+    flag = (schunk.get_lazychunk(nchunk)[31] & 0b01110000) >> 4
+    return flag != blosc2.SpecialValue.UNINIT.value
+
+
+def iterchunk(chunk):
+    # TODO Yield block by block
+    yield chunk
+
 
 #
 # Facility to persist program state
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index aedc4271..f952a26d 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -21,7 +21,7 @@
 import uvicorn
 
 # Project
-from caterva2 import utils, api_utils, b2_utils, models
+from caterva2 import utils, api_utils, models
 from caterva2.services import srv_utils
 
 
@@ -64,14 +64,14 @@ def init_b2(abspath, metadata):
     suffix = abspath.suffix
     if suffix == '.b2nd':
         metadata = models.Metadata(**metadata)
-        b2_utils.init_b2nd(metadata, abspath)
+        srv_utils.init_b2nd(metadata, abspath)
     elif suffix == '.b2frame':
         metadata = models.SChunk(**metadata)
-        b2_utils.init_b2frame(metadata, abspath)
+        srv_utils.init_b2frame(metadata, abspath)
     else:
         abspath = pathlib.Path(f'{abspath}.b2')
         metadata = models.SChunk(**metadata)
-        b2_utils.init_b2frame(metadata, abspath)
+        srv_utils.init_b2frame(metadata, abspath)
 
 
 async def updated_dataset(data, topic):
@@ -252,7 +252,7 @@ async def get_info(path: str):
 
 async def partial_download(abspath, path, slice_):
     # Build the list of chunks we need to download from the publisher
-    array, schunk = b2_utils.open_b2(abspath)
+    array, schunk = srv_utils.open_b2(abspath)
     if slice_:
         slice_obj = api_utils.parse_slice(slice_)
         if not array:
@@ -274,7 +274,7 @@ async def partial_download(abspath, path, slice_):
     lock = locks.setdefault(path, asyncio.Lock())
     async with lock:
         for n in nchunks:
-            if not b2_utils.chunk_is_available(schunk, n):
+            if not srv_utils.chunk_is_available(schunk, n):
                 await download_chunk(path, schunk, n)
 
 
@@ -299,7 +299,7 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
         download_path.parent.mkdir(parents=True, exist_ok=True)
 
     # Interesting data has been downloaded, let's use it
-    array, schunk = b2_utils.open_b2(abspath)
+    array, schunk = srv_utils.open_b2(abspath)
     slice_ = api_utils.parse_slice(slice_)
     if slice_:
         if array:
@@ -345,7 +345,7 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
     data = pickle.dumps(data, protocol=-1)
     # TODO: compress data is not working. HTTPX does this automatically?
     # data = zlib.compress(data)
-    downloader = b2_utils.iterchunk(data)
+    downloader = srv_utils.iterchunk(data)
     return responses.StreamingResponse(downloader)
 
 #

From 31576a7b9f811fbaadfc7ed7ac76617c3268566a Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Sun, 14 Jan 2024 10:11:48 +0100
Subject: [PATCH 24/38] Add docstrings to API

---
 caterva2/api.py            | 138 ++++++++++++++++++++++++++++++++++++-
 caterva2/api_utils.py      |  11 ++-
 caterva2/tests/test_api.py |  16 ++++-
 3 files changed, 160 insertions(+), 5 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index 83183c69..e7d480c7 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -6,6 +6,10 @@
 # License: GNU Affero General Public License v3.0
 # See LICENSE.txt for details about copyright and rights to use.
 ###############################################################################
+"""
+This module provides a Python API to Caterva2.
+"""
+
 import pathlib
 
 from caterva2 import api_utils
@@ -18,10 +22,28 @@
 
 
 def get_roots(host=sub_host_default):
+    """
+    Get the list of available roots.
+
+    Parameters
+    ----------
+
+    host : str
+        The host to query.
+
+    Returns
+    -------
+    dict
+        The list of available roots.
+
+    """
     return api_utils.get(f'http://{host}/api/roots')
 
 
 class Root:
+    """
+    A root is a remote repository that can be subscribed to.
+    """
     def __init__(self, name, host=sub_host_default):
         self.name = name
         self.host = host
@@ -36,6 +58,9 @@ def __repr__(self):
         return f'<Root: {self.name}>'
 
     def __getitem__(self, node):
+        """
+        Get a file or dataset from the root.
+        """
         if node.endswith((".b2nd", ".b2frame")):
             return Dataset(node, root=self.name, host=self.host)
         else:
@@ -43,6 +68,28 @@ def __getitem__(self, node):
 
 
 class File:
+    """
+    A file is either a Blosc2 dataset or a regular file.
+
+    Parameters
+    ----------
+    name : str
+        The name of the file.
+    root : str
+        The name of the root.
+    host : str
+        The host to query.
+
+    Examples
+    --------
+    >>> file = root['README.md']
+    >>> file.name
+    'README.md'
+    >>> file.host
+    'localhost:8002'
+    >>> file.path
+    PosixPath('foo/README.md')
+    """
     def __init__(self, name, root, host):
         self.root = root
         self.name = name
@@ -53,16 +100,82 @@ def __repr__(self):
         return f'<File: {self.path}>'
 
     def get_download_url(self, key=None):
+        """
+        Get the download URL for a slice of the file.
+
+        Parameters
+        ----------
+        key : int or slice
+            The slice to get.
+
+        Returns
+        -------
+        str
+            The download URL.
+
+        Examples
+        --------
+        >>> file = root['ds-1d.b2nd']
+        >>> file.get_download_url()
+        'http://localhost:8002/files/foo/ds-1d.b2nd'
+        >>> file.get_download_url(1)
+        'http://localhost:8002/files/downloads/foo/ds-1d[1].b2nd'
+        >>> file.get_download_url(slice(0, 10))
+        'http://localhost:8002/files/downloads/foo/ds-1d[:10].b2nd'
+        """
         slice_ = api_utils.slice_to_string(key)
         download_path = api_utils.get_download_url(
             self.host, self.path, {'slice_': slice_, 'download': True})
         return download_path
 
     def download(self, key=None):
+        """
+        Download a slice of the file.
+
+        Parameters
+        ----------
+        key : int or slice
+            The slice to get.
+
+        Returns
+        -------
+        PosixPath
+            The path to the downloaded file.
+
+        Examples
+        --------
+        >>> file = root['ds-1d.b2nd']
+        >>> file.download()
+        PosixPath('foo/ds-1d.b2nd')
+        >>> file.download(1)
+        PosixPath('foo/ds-1d[1].b2nd')
+        >>> file.download(slice(0, 10))
+        PosixPath('foo/ds-1d[:10].b2nd')
+        """
         url = self.get_download_url(key)
-        return api_utils.download_url(url, self.path)
+        return api_utils.download_url(url, self.path, slice_=key)
 
 class Dataset(File):
+    """
+    A dataset is a Blosc2 container in a file.
+
+    Parameters
+    ----------
+    name : str
+        The name of the dataset.
+    root : str
+        The name of the root.
+    host : str
+        The host to query.
+
+    Examples
+    --------
+    >>> ds = root['ds-1d.b2nd']
+    >>> ds.name
+    'ds-1d.b2nd'
+    >>> ds[1:10]
+    array([1, 2, 3, 4, 5, 6, 7, 8, 9])
+    """
     def __init__(self, name, root, host):
         super().__init__(name, root, host)
         self.json = api_utils.get(f'http://{host}/api/info/{self.path}')
@@ -71,6 +184,29 @@ def __repr__(self):
         return f'<Dataset: {self.path}>'
 
     def __getitem__(self, key):
+        """
+        Get a slice of the dataset.
+
+        Parameters
+        ----------
+        key : int or slice
+            The slice to get.
+
+        Returns
+        -------
+        numpy.ndarray
+            The slice.
+
+        Examples
+        --------
+        >>> ds = root['ds-1d.b2nd']
+        >>> ds[1]
+        array(1)
+        >>> ds[:1]
+        array([0])
+        >>> ds[0:10]
+        array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+        """
         slice_ = api_utils.slice_to_string(key)
         data = api_utils.get_download_url(self.host, self.path, {'slice_': slice_})
         return data
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index c31cf7b3..e685212d 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -70,8 +70,15 @@ def get_download_url(host, path, params):
 
     return f'http://{host}/files/{path}'
 
-def download_url(url, path):
-    # Store the file locally
+def download_url(url, path, slice_=None):
+    # Build the local filepath
+    path = pathlib.Path(path)
+    suffix = path.suffix
+    slice_ = slice_to_string(slice_)
+    if slice_:
+        path = path.with_suffix('')
+        path = pathlib.Path(f'{path}[{slice_}]{suffix}')
+
     with httpx.stream("GET", url) as r:
         r.raise_for_status()
         path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index 40badc6f..3c4717a6 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -20,6 +20,15 @@
 from .. import api_utils
 
 
+def my_path(dspath, slice_):
+    slice_ = api_utils.slice_to_string(slice_)
+    if slice_:
+        suffix = dspath.suffix
+        dspath = dspath.with_suffix('')
+        dspath = pathlib.Path(f'{dspath}[{slice_}]{suffix}')
+    return dspath
+
+
 def my_urlpath(ds, slice_):
     path = pathlib.Path(ds.path)
     suffix = path.suffix
@@ -140,7 +149,8 @@ def test_download_b2nd_slice(slice_, name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot[name]
     path = ds.download(slice_)
-    assert path == ds.path
+    dspath = my_path(ds.path, slice_)
+    assert path == dspath
 
     # Data contents
     example = examples_dir / name
@@ -189,7 +199,8 @@ def test_download_b2frame_slice(slice_, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
     path = ds.download(slice_)
-    assert path == ds.path
+    dspath = my_path(ds.path, slice_)
+    assert path == dspath
 
     # Data contents
     example = examples_dir / ds.name
@@ -206,6 +217,7 @@ def test_download_b2frame_slice(slice_, services, examples_dir):
     b = blosc2.schunk_from_cframe(data.content)
     assert a[slice_] == b[:]
 
+
 def test_download_regular_file(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['README.md']

From f1e2be82c2de18e78db9223fc52d209785bec8c9 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Sun, 14 Jan 2024 17:47:15 +0100
Subject: [PATCH 25/38] Refine downloads *and* slicing

---
 caterva2/api.py            |  3 ++-
 caterva2/api_utils.py      |  1 -
 caterva2/clients/cli.py    | 23 +++++++----------------
 caterva2/services/sub.py   | 33 ++++++++++++++++++---------------
 caterva2/tests/test_api.py | 28 ++++++++++++++++++++++++++--
 pyproject.toml             | 11 ++---------
 root-example/README.md     |  2 +-
 7 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index e7d480c7..f6fb5a20 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -153,7 +153,8 @@ def download(self, key=None):
         PosixPath('foo/ds-1d[:10].b2nd')
         """
         url = self.get_download_url(key)
-        return api_utils.download_url(url, self.path, slice_=key)
+        slice_ = api_utils.slice_to_string(key)
+        return api_utils.download_url(url, self.path, slice_=slice_)
 
 class Dataset(File):
     """
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index e685212d..219636a1 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -74,7 +74,6 @@ def download_url(url, path, slice_=None):
     # Build the local filepath
     path = pathlib.Path(path)
     suffix = path.suffix
-    slice_ = slice_to_string(slice_)
     if slice_:
         path = path.with_suffix('')
         path = pathlib.Path(f'{path}[{slice_}]{suffix}')
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 5e95cb7d..089bff93 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -14,11 +14,11 @@
 # Requirements
 import httpx
 import rich
-import tqdm
 
 # Project
 from caterva2 import api_utils
 from caterva2.clients import cli_utils
+import caterva2 as cat2
 
 
 def handle_errors(func):
@@ -41,18 +41,10 @@ def dataset_with_slice(dataset):
         params = {}
     else:
         dataset, slice = match.groups()
-        params = {'slice': slice}
+        params = {'slice_': slice}
 
     return pathlib.Path(dataset), params
 
-def url_with_slice(url, slice):
-    if slice is not None:
-        return f'{url}?slice={args.slice}'
-    return url
-
-def chunk_dl_progress(it):
-    return tqdm.tqdm(it, desc='Downloading', unit='chunk')
-
 @handle_errors
 def cmd_roots(args):
     data = api_utils.get(f'http://{args.host}/api/roots')
@@ -113,8 +105,6 @@ def cmd_info(args):
 def cmd_show(args):
     dataset, params = args.dataset
     data = api_utils.get_download_url(args.host, dataset, params)
-                              # TODO: support tqdm again
-                              # progress=chunk_dl_progress)
 
     # Display
     if isinstance(data, bytes):
@@ -128,10 +118,11 @@ def cmd_show(args):
 @handle_errors
 def cmd_download(args):
     dataset, params = args.dataset
-    params['download'] = True
-    path = api_utils.get_download_url(args.host, dataset, params)
-                              # TODO: support tqdm again
-                              # progress=chunk_dl_progress)
+    root, dsname = str(dataset).split('/')
+    root = cat2.Root(root, host=args.host)
+    dataset = root[dsname]
+    slice_ = api_utils.parse_slice(params.get('slice_', None))
+    path = dataset.download(slice_)
 
     print(f'Dataset saved to {path}')
 
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index f952a26d..3f3eb652 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -289,10 +289,13 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
     download_path = None
     if download:
         # Let's store the data in the downloads directory
-        if slice_:
+        if slice_ or suffix == '.b2':
             download_path = cache / pathlib.Path('downloads') / pathlib.Path(path)
+            # Save data in the downloads directory (removing the '.b2' suffix, if needed)
+            suffix2 = download_path.suffix if suffix == '.b2' else suffix
             download_path = download_path.with_suffix('')
-            download_path = pathlib.Path(f'{download_path}[{slice_}]{suffix}')
+            slice2 = f"[{slice_}]" if slice_ else ""
+            download_path = pathlib.Path(f'{download_path}{slice2}{suffix2}')
         else:
             # By here, we already have the complete schunk in cache
             download_path = abspath
@@ -300,38 +303,38 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
 
     # Interesting data has been downloaded, let's use it
     array, schunk = srv_utils.open_b2(abspath)
-    slice_ = api_utils.parse_slice(slice_)
-    if slice_:
+    slice2 = api_utils.parse_slice(slice_)
+    if slice2:
         if array:
             if download_path:
                 # We want to save the slice to a file
-                array.slice(slice_, urlpath=download_path, mode="w", contiguous=True,
+                array.slice(slice2, urlpath=download_path, mode="w", contiguous=True,
                             cparams=schunk.cparams)
             else:
-                array = array[slice_] if array.ndim > 0 else array[()]
+                array = array[slice2] if array.ndim > 0 else array[()]
         else:
-            assert len(slice_) == 1
-            slice_ = slice_[0]
-            if isinstance(slice_, int):
+            assert len(slice2) == 1
+            slice2 = slice2[0]
+            if isinstance(slice2, int):
                 # TODO: make SChunk support integer as slice
-                slice_ = slice(slice_, slice_ + 1)
+                slice2 = slice(slice2, slice2 + 1)
             if download_path:
+                data = schunk[slice2]
                 # TODO: fix the upstream bug in python-blosc2 that prevents this from working
                 #  when not specifying chunksize (uses `data.size` instead of `len(data)`).
-                blosc2.SChunk(data=schunk[slice_], mode="w", urlpath=download_path,
+                blosc2.SChunk(data=data, mode="w", urlpath=download_path,
                               chunksize=schunk.chunksize,
                               cparams=schunk.cparams)
+                abspath = download_path
             else:
-                schunk = schunk[slice_]
+                schunk = schunk[slice2]
 
     if download:
         if suffix == '.b2':
             # Decompress before delivering
             # TODO: support context manager in blosc2.open()
-            schunk = blosc2.open(download_path, 'wb')
+            schunk = blosc2.open(abspath, 'rb')
             data = schunk[:]
-            # Remove the .b2 extension, and save the data in the downloads directory
-            download_path = cache / pathlib.Path('downloads') / pathlib.Path(path)
             with open(download_path, 'wb') as f:
                 f.write(data)
         # We don't need to return anything, the file is already in the static files/
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index 3c4717a6..748c7c06 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -33,9 +33,10 @@ def my_urlpath(ds, slice_):
     path = pathlib.Path(ds.path)
     suffix = path.suffix
     slice2 = api_utils.slice_to_string(slice_)
-    if slice2:
+    if slice2 or suffix not in {'.b2frame', '.b2nd'}:
         path = 'downloads' / path.with_suffix('')
-        path = pathlib.Path(f'{path}[{slice2}]{suffix}')
+        slice3 = f"[{slice2}]" if slice2 else ""
+        path = pathlib.Path(f'{path}{slice3}{suffix}')
     path = f"http://{cat2.sub_host_default}/files/{path}"
     return path
 
@@ -237,3 +238,26 @@ def test_download_regular_file(services, examples_dir):
     assert data.status_code == 200
     b = data.content.decode()
     assert a[:] == b[:]
+
+@pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
+def test_download_regular_file_slice(slice_, services, examples_dir):
+    myroot = cat2.Root(published_root, host=cat2.sub_host_default)
+    ds = myroot['README.md']
+    path = ds.download(slice_)
+    dspath = my_path(ds.path, slice_)
+    assert path == dspath
+
+    # Data contents
+    example = examples_dir / ds.name
+    a = open(example).read()
+    b = open(path).read()
+    assert a[slice_] == b[:]
+
+    # Using 2-step download
+    urlpath = ds.get_download_url(slice_)
+    path = my_urlpath(ds, slice_)
+    assert urlpath == path
+    data = httpx.get(urlpath)
+    assert data.status_code == 200
+    b = data.content.decode()
+    assert a[slice_] == b[:]
diff --git a/pyproject.toml b/pyproject.toml
index 5b05e466..67cb9059 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,10 +33,8 @@ classifiers = [
     "Operating System :: Unix",
 ]
 dependencies = [
-    "blosc2>=2.4.0",  # TODO: try to move this dependency to the extras below, if possible
     "httpx",
     "numpy",
-    "pydantic>=2",  # TODO: ditto
     "pytest",
 ]
 
@@ -45,20 +43,15 @@ path = "caterva2/__init__.py"
 
 [project.optional-dependencies]
 services = [
-    # TODO: try to add these dependencies here, and remove them from caterva2, if possible
-    # "blosc2>=2.4.0",
+    "blosc2>=2.4.0",
+    "pydantic>=2",
     "fastapi",
     "fastapi_websocket_pubsub",
-    # "pydantic>=2",  # TODO: ditto
     "safer",
     "uvicorn",
     "watchfiles",
 ]
 clients = [
-    # TODO: try to add these dependencies here, and remove them from caterva2, if possible
-    # "blosc2>=2.4.0",
-    # "pydantic>=2",  # TODO: ditto
-    "tqdm",
     "rich",
     "textual",
 ]
diff --git a/root-example/README.md b/root-example/README.md
index 9dd79141..3d707124 100644
--- a/root-example/README.md
+++ b/root-example/README.md
@@ -1,3 +1,3 @@
-This is simple example,
+This is a simple example,
 with several lines,
 for showing purposes.

From fe86dd5c555afcf6a70cbae0de6677724633cc50 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 09:05:45 +0100
Subject: [PATCH 26/38] __getitem__() goes to File, and more tests

---
 caterva2/api.py            | 68 ++++++++++++++++++++++----------------
 caterva2/tests/test_api.py | 16 ++++++++-
 2 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index f6fb5a20..3d33d129 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -89,12 +89,21 @@ class File:
     'localhost:8002'
     >>> file.path
     PosixPath('foo/README.md')
+    >>> file.meta['cparams']
+    {'codec': 5, 'typesize': 1, 'blocksize': 32768}
+    >>> file[:25]
+    b'This is a simple example,'
+    >>> file[0]
+    b'T'
     """
     def __init__(self, name, root, host):
         self.root = root
         self.name = name
         self.host = host
         self.path = pathlib.Path(f'{self.root}/{self.name}')
+        self.meta = api_utils.get(f'http://{host}/api/info/{self.path}')
+        # TODO: 'cparams' is not always present (e.g. for .b2nd files)
+        # print(f"self.meta: {self.meta['cparams']}")
 
     def __repr__(self):
         return f'<File: {self.path}>'
@@ -128,6 +137,34 @@ def get_download_url(self, key=None):
             self.host, self.path, {'slice_': slice_, 'download': True})
         return download_path
 
+    def __getitem__(self, key):
+        """
+        Get a slice of the dataset.
+
+        Parameters
+        ----------
+        key : int or slice
+            The slice to get.
+
+        Returns
+        -------
+        numpy.ndarray
+            The slice.
+
+        Examples
+        --------
+        >>> ds = root['ds-1d.b2nd']
+        >>> ds[1]
+        array(1)
+        >>> ds[:1]
+        array([0])
+        >>> ds[0:10]
+        array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+        """
+        slice_ = api_utils.slice_to_string(key)
+        data = api_utils.get_download_url(self.host, self.path, {'slice_': slice_})
+        return data
+
     def download(self, key=None):
         """
         Download a slice of the file.
@@ -156,6 +193,7 @@ def download(self, key=None):
         slice_ = api_utils.slice_to_string(key)
         return api_utils.download_url(url, self.path, slice_=slice_)
 
+
 class Dataset(File):
     """
     A dataset is a Blosc2 container in a file.
@@ -179,35 +217,7 @@ class Dataset(File):
     """
     def __init__(self, name, root, host):
         super().__init__(name, root, host)
-        self.json = api_utils.get(f'http://{host}/api/info/{self.path}')
 
     def __repr__(self):
+        # TODO: add more info about dims, types, etc.
         return f'<Dataset: {self.path}>'
-
-    def __getitem__(self, key):
-        """
-        Get a slice of the dataset.
-
-        Parameters
-        ----------
-        key : int or slice
-            The slice to get.
-
-        Returns
-        -------
-        numpy.ndarray
-            The slice.
-
-        Examples
-        --------
-        >>> ds = root['ds-1d.b2nd']
-        >>> ds[1]
-        array(1)
-        >>> ds[:1]
-        array([0])
-        >>> ds[0:10]
-        array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-        """
-        slice_ = api_utils.slice_to_string(key)
-        data = api_utils.get_download_url(self.host, self.path, {'slice_': slice_})
-        return data
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index 748c7c06..bdcf804b 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -72,7 +72,7 @@ def test_dataset_frame(services, examples_dir):
 
     example = examples_dir / ds.name
     a = blosc2.open(example)[:]
-    # assert ds[1] == a[1]  # TODO: this test does not work yet
+    assert ord(ds[1]) == a[1]  # TODO: why do we need ord() here?
     assert ds[:1] == a[:1]
     assert ds[0:10] == a[0:10]
     assert ds[10:20] == a[10:20]
@@ -219,6 +219,20 @@ def test_download_b2frame_slice(slice_, services, examples_dir):
     assert a[slice_] == b[:]
 
 
+def test_index_regular_file(services, examples_dir):
+    myroot = cat2.Root(published_root, host=cat2.sub_host_default)
+    ds = myroot['README.md']
+
+    # Data contents
+    example = examples_dir / ds.name
+    a = open(example).read().encode()
+    assert ds[:] == a[:]
+    assert ord(ds[1]) == a[1]     # TODO: why do we need ord() here?
+    assert ds[:1] == a[:1]
+    assert ds[0:10] == a[0:10]
+    assert ds[10:20] == a[10:20]
+
+
 def test_download_regular_file(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['README.md']

From 5e8d6497358da7ce547211c8b8cc6a08add436d8 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 09:26:02 +0100
Subject: [PATCH 27/38] cli show is using the API now

---
 caterva2/api_utils.py   |  7 +++++++
 caterva2/clients/cli.py | 10 +++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 219636a1..58f7c595 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -13,6 +13,13 @@
 import httpx
 
 
+def split_dsname(dataset):
+    ds = str(dataset)
+    root_sep = ds.find('/')
+    root, dsname = ds[:root_sep], ds[root_sep + 1:]
+    return dsname, root
+
+
 def slice_to_string(key):
     if key is None or key == () or key == slice(None):
         return ''
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 089bff93..9ff27722 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -100,11 +100,14 @@ def cmd_info(args):
 
     rich.print(data)
 
-
 @handle_errors
 def cmd_show(args):
     dataset, params = args.dataset
-    data = api_utils.get_download_url(args.host, dataset, params)
+    dsname, root = api_utils.split_dsname(dataset)
+    root = cat2.Root(root, host=args.host)
+    dataset = root[dsname]
+    slice_ = api_utils.parse_slice(params.get('slice_', None))
+    data = dataset[slice_]
 
     # Display
     if isinstance(data, bytes):
@@ -118,7 +121,7 @@ def cmd_show(args):
 @handle_errors
 def cmd_download(args):
     dataset, params = args.dataset
-    root, dsname = str(dataset).split('/')
+    dsname, root = api_utils.split_dsname(dataset)
     root = cat2.Root(root, host=args.host)
     dataset = root[dsname]
     slice_ = api_utils.parse_slice(params.get('slice_', None))
@@ -127,6 +130,7 @@ def cmd_download(args):
     print(f'Dataset saved to {path}')
 
 
+
 if __name__ == '__main__':
     parser = cli_utils.get_parser()
     parser.add_argument('--host', default='localhost:8002')

From 778561e5de6430d1e4f67963e99c41649bd99cc2 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 13:25:22 +0100
Subject: [PATCH 28/38] Add new API functions and use them in cli

---
 caterva2/__init__.py       |   8 ++-
 caterva2/api.py            | 128 +++++++++++++++++++++++++++++++++----
 caterva2/api_utils.py      |  28 ++++----
 caterva2/clients/cli.py    |  31 ++++-----
 caterva2/tests/test_api.py |   9 +++
 5 files changed, 159 insertions(+), 45 deletions(-)

diff --git a/caterva2/__init__.py b/caterva2/__init__.py
index 9c87b8b9..87f57489 100644
--- a/caterva2/__init__.py
+++ b/caterva2/__init__.py
@@ -12,7 +12,8 @@
 __version__ = "0.1"
 
 from .api import bro_host_default, pub_host_default, sub_host_default
-from .api import get_roots, Root, File, Dataset
+from .api import get_roots, subscribe, list, info, fetch, download
+from .api import Root, File, Dataset
 
 import pytest
 import pathlib
@@ -39,6 +40,11 @@ def test(verbose=False):
     'pub_host_default',
     'sub_host_default',
     'get_roots',
+    'subscribe',
+    'list',
+    'info',
+    'fetch',
+    'download',
     'Root',
     'File',
     'Dataset',
diff --git a/caterva2/api.py b/caterva2/api.py
index 3d33d129..dc653c31 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -40,6 +40,105 @@ def get_roots(host=sub_host_default):
     return api_utils.get(f'http://{host}/api/roots')
 
 
+def subscribe(root, host=sub_host_default):
+    """
+    Subscribe to a root.
+
+    Parameters
+    ----------
+    root : str
+        The name of the root to subscribe to.
+    host : str
+        The host to query.
+
+    Returns
+    -------
+    str
+        The response from the server.
+    """
+    return api_utils.post(f'http://{host}/api/subscribe/{root}')
+
+
+def list(root, host=sub_host_default):
+    """
+    List the nodes in a root.
+
+    Parameters
+    ----------
+    root : str
+        The name of the root to list.
+    host : str
+        The host to query.
+
+    Returns
+    -------
+    list
+        The list of nodes in the root.
+    """
+    return api_utils.get(f'http://{host}/api/list/{root}')
+
+def info(dataset, host=sub_host_default):
+    """
+    Get information about a dataset.
+
+    Parameters
+    ----------
+    dataset : str
+        The name of the dataset.
+    host : str
+        The host to query.
+
+    Returns
+    -------
+    dict
+        The information about the dataset.
+    """
+    return api_utils.get(f'http://{host}/api/info/{dataset}')
+
+def fetch(dataset, host=sub_host_default, slice_=None):
+    """
+    Fetch a slice of a dataset.
+
+    Parameters
+    ----------
+    dataset : str
+        The name of the dataset.
+    host : str
+        The host to query.
+    slice_ : str
+        The slice to fetch.
+
+    Returns
+    -------
+    numpy.ndarray
+        The slice of the dataset.
+    """
+    data = api_utils.get_download_url(dataset, host, {'slice_': slice_})
+    return data
+
+
+def download(dataset, host=sub_host_default, slice_=None):
+    """
+    Download a dataset.
+
+    Parameters
+    ----------
+    dataset : str
+        The name of the dataset.
+    host : str
+        The host to query.
+    slice_ : str
+        The slice to download.
+
+    Returns
+    -------
+    str
+        The path to the downloaded file.
+    """
+    url = api_utils.get_download_url(dataset, host, {'slice_': slice_, 'download': True})
+    return api_utils.download_url(url, dataset, slice_=slice_)
+
+
 class Root:
     """
     A root is a remote repository that can be subscribed to.
@@ -82,6 +181,7 @@ class File:
 
     Examples
     --------
+    >>> root = cat2.Root('foo')
     >>> file = root['README.md']
     >>> file.name
     'README.md'
@@ -108,13 +208,13 @@ def __init__(self, name, root, host):
     def __repr__(self):
         return f'<File: {self.path}>'
 
-    def get_download_url(self, key=None):
+    def get_download_url(self, slice_=None):
         """
         Get the download URL for a slice of the file.
 
         Parameters
         ----------
-        key : int or slice
+        slice_ : int or slice
             The slice to get.
 
         Returns
@@ -124,6 +224,7 @@ def get_download_url(self, key=None):
 
         Examples
         --------
+        >>> root = cat2.Root('foo')
         >>> file = root['ds-1d.b2nd']
         >>> file.get_download_url()
         'http://localhost:8002/files/foo/ds-1d.b2nd'
@@ -132,18 +233,18 @@ def get_download_url(self, key=None):
         >>> file.get_download_url(slice(0, 10))
         'http://localhost:8002/files/downloads/foo/ds-1d[:10].b2nd'
         """
-        slice_ = api_utils.slice_to_string(key)
-        download_path = api_utils.get_download_url(
-            self.host, self.path, {'slice_': slice_, 'download': True})
+        slice_ = api_utils.slice_to_string(slice_)
+        download_path = api_utils.get_download_url(self.path, self.host,
+                                                   {'slice_': slice_, 'download': True})
         return download_path
 
-    def __getitem__(self, key):
+    def __getitem__(self, slice_):
         """
         Get a slice of the dataset.
 
         Parameters
         ----------
-        key : int or slice
+        slice_ : int or slice
             The slice to get.
 
         Returns
@@ -161,17 +262,17 @@ def __getitem__(self, key):
         >>> ds[0:10]
         array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
         """
-        slice_ = api_utils.slice_to_string(key)
-        data = api_utils.get_download_url(self.host, self.path, {'slice_': slice_})
+        slice_ = api_utils.slice_to_string(slice_)
+        data = api_utils.get_download_url(self.path, self.host, {'slice_': slice_})
         return data
 
-    def download(self, key=None):
+    def download(self, slice_=None):
         """
         Download a slice of the file.
 
         Parameters
         ----------
-        key : int or slice
+        slice_ : int or slice
             The slice to get.
 
         Returns
@@ -189,8 +290,8 @@ def download(self, key=None):
         >>> file.download(slice(0, 10))
         PosixPath('foo/ds-1d[:10].b2nd')
         """
-        url = self.get_download_url(key)
-        slice_ = api_utils.slice_to_string(key)
+        url = self.get_download_url(slice_)
+        slice_ = api_utils.slice_to_string(slice_)
         return api_utils.download_url(url, self.path, slice_=slice_)
 
 
@@ -209,6 +310,7 @@ class Dataset(File):
 
     Examples
     --------
+    >>> root = cat2.Root('foo')
     >>> ds = root['ds-1d.b2nd']
     >>> ds.name
     'ds-1d.b2nd'
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 58f7c595..63feccfc 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -20,13 +20,13 @@ def split_dsname(dataset):
     return dsname, root
 
 
-def slice_to_string(key):
-    if key is None or key == () or key == slice(None):
+def slice_to_string(slice_):
+    if slice_ is None or slice_ == () or slice_ == slice(None):
         return ''
     slice_parts = []
-    if not isinstance(key, tuple):
-        key = (key,)
-    for index in key:
+    if not isinstance(slice_, tuple):
+        slice_ = (slice_,)
+    for index in slice_:
         if isinstance(index, int):
             slice_parts.append(str(index))
         elif isinstance(index, slice):
@@ -54,7 +54,7 @@ def parse_slice(string):
     return tuple(obj)
 
 
-def get_download_url(host, path, params):
+def get_download_url(path, host, params):
     response = httpx.get(f'http://{host}/api/download/{path}', params=params)
     response.raise_for_status()
 
@@ -77,21 +77,21 @@ def get_download_url(host, path, params):
 
     return f'http://{host}/files/{path}'
 
-def download_url(url, path, slice_=None):
+def download_url(url, localpath, slice_=None):
     # Build the local filepath
-    path = pathlib.Path(path)
-    suffix = path.suffix
+    localpath = pathlib.Path(localpath)
+    suffix = localpath.suffix
     if slice_:
-        path = path.with_suffix('')
-        path = pathlib.Path(f'{path}[{slice_}]{suffix}')
+        localpath = localpath.with_suffix('')
+        localpath = pathlib.Path(f'{localpath}[{slice_}]{suffix}')
 
     with httpx.stream("GET", url) as r:
         r.raise_for_status()
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with open(path, "wb") as f:
+        localpath.parent.mkdir(parents=True, exist_ok=True)
+        with open(localpath, "wb") as f:
             for data in r.iter_bytes():
                 f.write(data)
-    return path
+    return localpath
 
 
 #
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 9ff27722..6bcfff7e 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -47,7 +47,7 @@ def dataset_with_slice(dataset):
 
 @handle_errors
 def cmd_roots(args):
-    data = api_utils.get(f'http://{args.host}/api/roots')
+    data = cat2.get_roots(host=args.host)
     if args.json:
         print(json.dumps(data))
         return
@@ -60,7 +60,7 @@ def cmd_roots(args):
 
 @handle_errors
 def cmd_subscribe(args):
-    data = api_utils.post(f'http://{args.host}/api/subscribe/{args.root}')
+    data = cat2.subscribe(args.root, host=args.host)
     if args.json:
         print(json.dumps(data))
         return
@@ -69,7 +69,7 @@ def cmd_subscribe(args):
 
 @handle_errors
 def cmd_list(args):
-    data = api_utils.get(f'http://{args.host}/api/list/{args.root}')
+    data = cat2.list(args.root, host=args.host)
     if args.json:
         print(json.dumps(data))
         return
@@ -79,6 +79,8 @@ def cmd_list(args):
 
 @handle_errors
 def cmd_url(args):
+    # TODO: provide a url that can be used to open the dataset in blosc2
+    # TODO: add a new function to the API that returns the url
     data = api_utils.get(f'http://{args.host}/api/url/{args.root}')
     if args.json:
         print(json.dumps(data))
@@ -89,9 +91,8 @@ def cmd_url(args):
 
 @handle_errors
 def cmd_info(args):
-    # Get
-    dataset, params = args.dataset
-    data = api_utils.get(f'http://{args.host}/api/info/{dataset}', params=params)
+    print(f"Getting info for {args.dataset}")
+    data = cat2.info(args.dataset, host=args.host)
 
     # Print
     if args.json:
@@ -103,11 +104,8 @@ def cmd_info(args):
 @handle_errors
 def cmd_show(args):
     dataset, params = args.dataset
-    dsname, root = api_utils.split_dsname(dataset)
-    root = cat2.Root(root, host=args.host)
-    dataset = root[dsname]
-    slice_ = api_utils.parse_slice(params.get('slice_', None))
-    data = dataset[slice_]
+    slice_ = params.get('slice_', None)
+    data = cat2.fetch(dataset, host=args.host, slice_=slice_)
 
     # Display
     if isinstance(data, bytes):
@@ -117,15 +115,14 @@ def cmd_show(args):
             print('Binary data')
     else:
         print(data)
+        # TODO: make rich optional in command line
+        # rich.print(data)
 
 @handle_errors
 def cmd_download(args):
     dataset, params = args.dataset
-    dsname, root = api_utils.split_dsname(dataset)
-    root = cat2.Root(root, host=args.host)
-    dataset = root[dsname]
-    slice_ = api_utils.parse_slice(params.get('slice_', None))
-    path = dataset.download(slice_)
+    slice_ = params.get('slice_', None)
+    path = cat2.download(dataset, host=args.host, slice_=slice_)
 
     print(f'Dataset saved to {path}')
 
@@ -167,7 +164,7 @@ def cmd_download(args):
     help = 'Get metadata about a dataset.'
     subparser = subparsers.add_parser('info', help=help)
     subparser.add_argument('--json', action='store_true')
-    subparser.add_argument('dataset', type=dataset_with_slice)
+    subparser.add_argument('dataset', type=str)
     subparser.set_defaults(func=cmd_info)
 
     # show
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index bdcf804b..d66ea5aa 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -46,17 +46,20 @@ def test_roots(services):
     assert roots[published_root]['name'] == published_root
     assert roots[published_root]['http'] == cat2.pub_host_default
 
+
 def test_root(services):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     assert myroot.name == published_root
     assert myroot.host == cat2.sub_host_default
 
+
 def test_list(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     example = examples_dir
     nodes = set(str(f.relative_to(str(example))) for f in example.rglob("*") if f.is_file())
     assert set(myroot.node_list) == nodes
 
+
 def test_file(services):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     file = myroot['README.md']
@@ -84,6 +87,7 @@ def test_dataset_frame(services, examples_dir):
         assert ds[::2] == a[::2]
         assert str(e_info.value) == 'Only step=1 is supported'
 
+
 def test_dataset_1d(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-1d.b2nd']
@@ -122,6 +126,7 @@ def test_dataset_nd(name, services, examples_dir):
         np.testing.assert_array_equal(ds[::2], a[::2])
         assert str(e_info.value) == 'Only step=1 is supported'
 
+
 @pytest.mark.parametrize("name", ['ds-1d.b2nd', 'dir1/ds-2d.b2nd'])
 def test_download_b2nd(name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
@@ -143,6 +148,7 @@ def test_download_b2nd(name, services, examples_dir):
     b = blosc2.ndarray_from_cframe(data.content)
     np.testing.assert_array_equal(a[:], b[:])
 
+
 # TODO: test slices that exceed the array dimensions
 @pytest.mark.parametrize("slice_", [slice(1,10), slice(4,8), slice(None), 1])
 @pytest.mark.parametrize("name", ['ds-1d.b2nd', 'dir1/ds-2d.b2nd'])
@@ -174,6 +180,7 @@ def test_download_b2nd_slice(slice_, name, services, examples_dir):
     else:
         np.testing.assert_array_equal(a[slice_], b[:])
 
+
 def test_download_b2frame(services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
@@ -194,6 +201,7 @@ def test_download_b2frame(services, examples_dir):
     b = blosc2.schunk_from_cframe(data.content)
     assert a[:] == b[:]
 
+
 # TODO: add an integer slice test when it is supported in blosc2
 @pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
 def test_download_b2frame_slice(slice_, services, examples_dir):
@@ -253,6 +261,7 @@ def test_download_regular_file(services, examples_dir):
     b = data.content.decode()
     assert a[:] == b[:]
 
+
 @pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
 def test_download_regular_file_slice(slice_, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)

From 0fe8b00bbb17675abfa6a9f7fe8428844b5101b5 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 13:50:38 +0100
Subject: [PATCH 29/38] Use names that don't collide with Python names

---
 caterva2/__init__.py    | 6 +++---
 caterva2/api.py         | 4 ++--
 caterva2/clients/cli.py | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/caterva2/__init__.py b/caterva2/__init__.py
index 87f57489..59fdbc61 100644
--- a/caterva2/__init__.py
+++ b/caterva2/__init__.py
@@ -12,7 +12,7 @@
 __version__ = "0.1"
 
 from .api import bro_host_default, pub_host_default, sub_host_default
-from .api import get_roots, subscribe, list, info, fetch, download
+from .api import get_roots, subscribe, get_list, get_info, fetch, download
 from .api import Root, File, Dataset
 
 import pytest
@@ -41,8 +41,8 @@ def test(verbose=False):
     'sub_host_default',
     'get_roots',
     'subscribe',
-    'list',
-    'info',
+    'get_list',
+    'get_info',
     'fetch',
     'download',
     'Root',
diff --git a/caterva2/api.py b/caterva2/api.py
index dc653c31..e9310947 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -59,7 +59,7 @@ def subscribe(root, host=sub_host_default):
     return api_utils.post(f'http://{host}/api/subscribe/{root}')
 
 
-def list(root, host=sub_host_default):
+def get_list(root, host=sub_host_default):
     """
     List the nodes in a root.
 
@@ -77,7 +77,7 @@ def list(root, host=sub_host_default):
     """
     return api_utils.get(f'http://{host}/api/list/{root}')
 
-def info(dataset, host=sub_host_default):
+def get_info(dataset, host=sub_host_default):
     """
     Get information about a dataset.
 
diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index 6bcfff7e..f92b9392 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -69,7 +69,7 @@ def cmd_subscribe(args):
 
 @handle_errors
 def cmd_list(args):
-    data = cat2.list(args.root, host=args.host)
+    data = cat2.get_list(args.root, host=args.host)
     if args.json:
         print(json.dumps(data))
         return
@@ -92,7 +92,7 @@ def cmd_url(args):
 @handle_errors
 def cmd_info(args):
     print(f"Getting info for {args.dataset}")
-    data = cat2.info(args.dataset, host=args.host)
+    data = cat2.get_info(args.dataset, host=args.host)
 
     # Print
     if args.json:

From 4edab935cf5c52f6a546ff42d1e9baf4ebdb13ed Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 17:57:04 +0100
Subject: [PATCH 30/38] Documented HTTP API for sub; code beautification.

---
 caterva2/services/srv_utils.py |  14 ++++
 caterva2/services/sub.py       | 120 +++++++++++++++++++++++++++------
 2 files changed, 115 insertions(+), 19 deletions(-)

diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 017d417e..60e39e57 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -231,3 +231,17 @@ def save(self):
 
     def __getattr__(self, name):
         return getattr(self.data, name)
+
+
+def init_b2(abspath, metadata):
+    suffix = abspath.suffix
+    if suffix == '.b2nd':
+        metadata = models.Metadata(**metadata)
+        init_b2nd(metadata, abspath)
+    elif suffix == '.b2frame':
+        metadata = models.SChunk(**metadata)
+        init_b2frame(metadata, abspath)
+    else:
+        abspath = pathlib.Path(f'{abspath}.b2')
+        metadata = models.SChunk(**metadata)
+        init_b2frame(metadata, abspath)
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 3f3eb652..da93dacf 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -24,7 +24,7 @@
 from caterva2 import utils, api_utils, models
 from caterva2.services import srv_utils
 
-
+# Logging
 logger = logging.getLogger('sub')
 
 # Configuration
@@ -60,20 +60,6 @@ async def new_root(data, topic):
     database.save()
 
 
-def init_b2(abspath, metadata):
-    suffix = abspath.suffix
-    if suffix == '.b2nd':
-        metadata = models.Metadata(**metadata)
-        srv_utils.init_b2nd(metadata, abspath)
-    elif suffix == '.b2frame':
-        metadata = models.SChunk(**metadata)
-        srv_utils.init_b2frame(metadata, abspath)
-    else:
-        abspath = pathlib.Path(f'{abspath}.b2')
-        metadata = models.SChunk(**metadata)
-        srv_utils.init_b2frame(metadata, abspath)
-
-
 async def updated_dataset(data, topic):
     name = topic
     relpath = data['path']
@@ -87,7 +73,7 @@ async def updated_dataset(data, topic):
         if abspath.is_file():
             abspath.unlink()
     else:
-        init_b2(abspath, metadata)
+        srv_utils.init_b2(abspath, metadata)
 
 
 #
@@ -128,7 +114,7 @@ def follow(name: str):
 
         # Save metadata
         abspath = rootdir / relpath
-        init_b2(abspath, metadata)
+        srv_utils.init_b2(abspath, metadata)
 
         # Save etag
         database.etags[key] = response.headers['etag']
@@ -201,6 +187,14 @@ async def lifespan(app: FastAPI):
 
 @app.get('/api/roots')
 async def get_roots():
+    """
+    Get the list of roots.
+
+    Returns
+    -------
+    dict
+        The list of roots.
+    """
     return database.roots
 
 def get_root(name):
@@ -212,12 +206,38 @@ def get_root(name):
 
 @app.post('/api/subscribe/{name}')
 async def post_subscribe(name: str):
+    """
+    Subscribe to a root.
+
+    Parameters
+    ----------
+    name : str
+        The name of the root.
+
+    Returns
+    -------
+    str
+        'Ok' if successful.
+    """
     get_root(name)  # Not Found
     follow(name)
     return 'Ok'
 
 @app.get('/api/list/{name}')
 async def get_list(name: str):
+    """
+    List the datasets in a root.
+
+    Parameters
+    ----------
+    name : str
+        The name of the root.
+
+    Returns
+    -------
+    list
+        The list of datasets in the root.
+    """
     root = get_root(name)
 
     rootdir = cache / root.name
@@ -231,6 +251,19 @@ async def get_list(name: str):
 
 @app.get('/api/url/{path:path}')
 async def get_url(path: str):
+    """
+    Get the URLs to access a dataset.
+
+    Parameters
+    ----------
+    path : str
+        The path to the dataset.
+
+    Returns
+    -------
+    list
+        The URLs to access the dataset.
+    """
     root, *dataset = path.split('/', 1)
     scheme = 'http'
     http = get_root(root).http
@@ -246,11 +279,41 @@ async def get_url(path: str):
 
 @app.get('/api/info/{path:path}')
 async def get_info(path: str):
+    """
+    Get the metadata of a dataset.
+
+    Parameters
+    ----------
+    path : str
+        The path to the dataset.
+
+    Returns
+    -------
+    dict
+        The metadata of the dataset.
+    """
     abspath = lookup_path(path)
     return srv_utils.read_metadata(abspath)
 
 
 async def partial_download(abspath, path, slice_):
+    """
+    Download the necessary chunks of a dataset.
+
+    Parameters
+    ----------
+    abspath : pathlib.Path
+        The absolute path to the dataset.
+    path : str
+        The path to the dataset.
+    slice_ : str
+        The slice to fetch.
+
+    Returns
+    -------
+    None
+        When finished, the dataset is available in cache.
+    """
     # Build the list of chunks we need to download from the publisher
     array, schunk = srv_utils.open_b2(abspath)
     if slice_:
@@ -280,10 +343,29 @@ async def partial_download(abspath, path, slice_):
 
 @app.get('/api/download/{path:path}')
 async def download_data(path: str, slice_: str = None, download: bool = False):
+    """
+    Download or fetch a dataset.
+
+    Parameters
+    ----------
+    path : str
+        The path to the dataset.
+    slice_ : str
+        The slice to fetch.
+    download : bool
+        Whether to download the dataset in the downloads dir.  If False, the data is
+        returned as a StreamingResponse (it is 'fetched').
+
+    Returns
+    -------
+    None or StreamingResponse
+        The data in case of a fetch, None otherwise.
+
+    """
     abspath = lookup_path(path)
     suffix = abspath.suffix
 
-    # Download and update the schunk in cache
+    # Download and update the necessary chunks of the schunk in cache
     await partial_download(abspath, path, slice_)
 
     download_path = None
@@ -333,7 +415,7 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
         if suffix == '.b2':
             # Decompress before delivering
             # TODO: support context manager in blosc2.open()
-            schunk = blosc2.open(abspath, 'rb')
+            schunk = blosc2.open(abspath)
             data = schunk[:]
             with open(download_path, 'wb') as f:
                 f.write(data)

From 45763fdcb4f0777332e98c1074824b0b94cfa11b Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 18:07:30 +0100
Subject: [PATCH 31/38] Fixes for some PEP8 style suggestions

---
 caterva2/api.py                |  4 ++++
 caterva2/api_utils.py          |  2 +-
 caterva2/services/srv_utils.py | 34 ++++++++++++++++++----------------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/caterva2/api.py b/caterva2/api.py
index e9310947..fe25dd67 100644
--- a/caterva2/api.py
+++ b/caterva2/api.py
@@ -77,6 +77,7 @@ def get_list(root, host=sub_host_default):
     """
     return api_utils.get(f'http://{host}/api/list/{root}')
 
+
 def get_info(dataset, host=sub_host_default):
     """
     Get information about a dataset.
@@ -95,6 +96,7 @@ def get_info(dataset, host=sub_host_default):
     """
     return api_utils.get(f'http://{host}/api/info/{dataset}')
 
+
 def fetch(dataset, host=sub_host_default, slice_=None):
     """
     Fetch a slice of a dataset.
@@ -254,6 +256,7 @@ def __getitem__(self, slice_):
 
         Examples
         --------
+        >>> root = cat2.Root('foo')
         >>> ds = root['ds-1d.b2nd']
         >>> ds[1]
         array(1)
@@ -282,6 +285,7 @@ def download(self, slice_=None):
 
         Examples
         --------
+        >>> root = cat2.Root('foo')
         >>> file = root['ds-1d.b2nd']
         >>> file.download()
         PosixPath('foo/ds-1d.b2nd')
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 63feccfc..53992d9a 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -77,6 +77,7 @@ def get_download_url(path, host, params):
 
     return f'http://{host}/files/{path}'
 
+
 def download_url(url, localpath, slice_=None):
     # Build the local filepath
     localpath = pathlib.Path(localpath)
@@ -108,4 +109,3 @@ def post(url, json=None):
     response = httpx.post(url, json=json)
     response.raise_for_status()
     return response.json()
-
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 60e39e57..20ca4988 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -23,8 +23,9 @@
 
 
 def get_model_from_obj(obj, model_class, **kwargs):
-    if type(obj) is dict:
-        getter = lambda o, k: o[k]
+    if isinstance(obj, dict):
+        def getter(o, k):
+            return o[k]
     else:
         getter = getattr
 
@@ -116,6 +117,7 @@ def get_abspath(root, path):
 # Blosc2 related helpers
 #
 
+
 def compress(data, dst=None):
     assert isinstance(data, (bytes, pathlib.Path))
 
@@ -174,6 +176,20 @@ def init_b2frame(metadata, urlpath=None):
     return sc
 
 
+def init_b2(abspath, metadata):
+    suffix = abspath.suffix
+    if suffix == '.b2nd':
+        metadata = models.Metadata(**metadata)
+        init_b2nd(metadata, abspath)
+    elif suffix == '.b2frame':
+        metadata = models.SChunk(**metadata)
+        init_b2frame(metadata, abspath)
+    else:
+        abspath = pathlib.Path(f'{abspath}.b2')
+        metadata = models.SChunk(**metadata)
+        init_b2frame(metadata, abspath)
+
+
 def open_b2(abspath):
     suffix = abspath.suffix
     if suffix == '.b2nd':
@@ -231,17 +247,3 @@ def save(self):
 
     def __getattr__(self, name):
         return getattr(self.data, name)
-
-
-def init_b2(abspath, metadata):
-    suffix = abspath.suffix
-    if suffix == '.b2nd':
-        metadata = models.Metadata(**metadata)
-        init_b2nd(metadata, abspath)
-    elif suffix == '.b2frame':
-        metadata = models.SChunk(**metadata)
-        init_b2frame(metadata, abspath)
-    else:
-        abspath = pathlib.Path(f'{abspath}.b2')
-        metadata = models.SChunk(**metadata)
-        init_b2frame(metadata, abspath)

From 8f687ee516c3b6425f100f927a8a4c16a62fd906 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 18:10:02 +0100
Subject: [PATCH 32/38] Remove duplicated code

---
 caterva2/clients/cli.py       |  6 +++---
 caterva2/clients/cli_utils.py | 39 -----------------------------------
 2 files changed, 3 insertions(+), 42 deletions(-)
 delete mode 100644 caterva2/clients/cli_utils.py

diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index f92b9392..a4e523a0 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -17,7 +17,7 @@
 
 # Project
 from caterva2 import api_utils
-from caterva2.clients import cli_utils
+from caterva2 import utils
 import caterva2 as cat2
 
 
@@ -129,7 +129,7 @@ def cmd_download(args):
 
 
 if __name__ == '__main__':
-    parser = cli_utils.get_parser()
+    parser = utils.get_parser()
     parser.add_argument('--host', default='localhost:8002')
     subparsers = parser.add_subparsers(required=True)
 
@@ -183,5 +183,5 @@ def cmd_download(args):
     subparser.set_defaults(func=cmd_download)
 
     # Go
-    args = cli_utils.run_parser(parser)
+    args = utils.run_parser(parser)
     args.func(args)
diff --git a/caterva2/clients/cli_utils.py b/caterva2/clients/cli_utils.py
deleted file mode 100644
index 9f18b6ec..00000000
--- a/caterva2/clients/cli_utils.py
+++ /dev/null
@@ -1,39 +0,0 @@
-###############################################################################
-# Caterva2 - On demand access to remote Blosc2 data repositories
-#
-# Copyright (c) 2023 The Blosc Developers <blosc@blosc.org>
-# https://www.blosc.org
-# License: GNU Affero General Public License v3.0
-# See LICENSE.txt for details about copyright and rights to use.
-###############################################################################
-
-import argparse
-import logging
-
-#
-# Command line helpers
-#
-def socket_type(string):
-    host, port = string.split(':')
-    port = int(port)
-    return (host, port)
-
-
-def get_parser(broker=None, http=None):
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--loglevel', default='warning')
-    if broker:
-        parser.add_argument('--broker', default=broker)
-    if http:
-        parser.add_argument('--http', default=http, type=socket_type)
-    return parser
-
-
-def run_parser(parser):
-    args = parser.parse_args()
-
-    # Logging
-    loglevel = args.loglevel.upper()
-    logging.basicConfig(level=loglevel)
-
-    return args

From 1bb0a4cd9377f2ce4c51542d09c7e30307b8dc93 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Mon, 15 Jan 2024 18:13:03 +0100
Subject: [PATCH 33/38] Fixes for some PEP8 style suggestions

---
 caterva2/clients/cli.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/caterva2/clients/cli.py b/caterva2/clients/cli.py
index a4e523a0..bc66488b 100644
--- a/caterva2/clients/cli.py
+++ b/caterva2/clients/cli.py
@@ -35,16 +35,18 @@ def wrapper(*args):
 
     return wrapper
 
+
 def dataset_with_slice(dataset):
     match = re.match('(.*)\\[(.*)]', dataset)
     if match is None:
         params = {}
     else:
-        dataset, slice = match.groups()
-        params = {'slice_': slice}
+        dataset, slice_ = match.groups()
+        params = {'slice_': slice_}
 
     return pathlib.Path(dataset), params
 
+
 @handle_errors
 def cmd_roots(args):
     data = cat2.get_roots(host=args.host)
@@ -58,6 +60,7 @@ def cmd_roots(args):
         else:
             print(name)
 
+
 @handle_errors
 def cmd_subscribe(args):
     data = cat2.subscribe(args.root, host=args.host)
@@ -67,6 +70,7 @@ def cmd_subscribe(args):
 
     print(data)
 
+
 @handle_errors
 def cmd_list(args):
     data = cat2.get_list(args.root, host=args.host)
@@ -77,6 +81,7 @@ def cmd_list(args):
     for item in data:
         print(f'{args.root}/{item}')
 
+
 @handle_errors
 def cmd_url(args):
     # TODO: provide a url that can be used to open the dataset in blosc2
@@ -89,6 +94,7 @@ def cmd_url(args):
     for url in data:
         print(url)
 
+
 @handle_errors
 def cmd_info(args):
     print(f"Getting info for {args.dataset}")
@@ -101,6 +107,7 @@ def cmd_info(args):
 
     rich.print(data)
 
+
 @handle_errors
 def cmd_show(args):
     dataset, params = args.dataset
@@ -118,6 +125,7 @@ def cmd_show(args):
         # TODO: make rich optional in command line
         # rich.print(data)
 
+
 @handle_errors
 def cmd_download(args):
     dataset, params = args.dataset
@@ -127,7 +135,6 @@ def cmd_download(args):
     print(f'Dataset saved to {path}')
 
 
-
 if __name__ == '__main__':
     parser = utils.get_parser()
     parser.add_argument('--host', default='localhost:8002')

From 1647a5adf206ae58140dddf1f1b9e18dccc70750 Mon Sep 17 00:00:00 2001
From: Ivan Vilata-i-Balaguer <ivan@selidor.net>
Date: Mon, 15 Jan 2024 18:34:48 +0100
Subject: [PATCH 34/38] Fix subscription with ND datasets consisting of strings

The old code failed to get a proper dtype from metadata to build the
uninitialized dataset.

Allow testing by adding such a dataset to example files.
---
 SPECS.md                       |   5 +++++
 caterva2/services/srv_utils.py |   2 +-
 root-example/ds-1d-b.b2nd      | Bin 0 -> 3969 bytes
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 root-example/ds-1d-b.b2nd

diff --git a/SPECS.md b/SPECS.md
index f09e155f..e656fd69 100644
--- a/SPECS.md
+++ b/SPECS.md
@@ -185,6 +185,11 @@ You can find an example of a data root in the `root-example` folder.  It contain
       a = np.arange(1000, dtype="int64"))
       blosc2.asarray(a, chunks=(100,), blocks=(10,), urlpath="ds-1d.b2nd", mode="w")
 
+- `ds-1d-b.b2nd`: A 1D array (6-byte strings). Constructed as:
+
+      a = np.array([b'foobar'] * 1000)
+      blosc2.asarray(a, chunks=(100,), blocks=(10,), urlpath="ds-1d-b.b2nd", mode="w")
+
 - `dir1/ds-2d.b2nd`: A 2D array (uint16).  Constructed as:
 
       a = np.arange(200, dtype="uint16").reshape(10, 20)
diff --git a/caterva2/services/srv_utils.py b/caterva2/services/srv_utils.py
index 20ca4988..9b05713a 100644
--- a/caterva2/services/srv_utils.py
+++ b/caterva2/services/srv_utils.py
@@ -152,7 +152,7 @@ def init_b2nd(metadata, urlpath=None):
         if urlpath.exists():
             urlpath.unlink()
 
-    dtype = getattr(np, metadata.dtype)
+    dtype = np.dtype(metadata.dtype)
     return blosc2.uninit(metadata.shape, dtype, urlpath=urlpath,
                          chunks=metadata.chunks, blocks=metadata.blocks)
 
diff --git a/root-example/ds-1d-b.b2nd b/root-example/ds-1d-b.b2nd
new file mode 100644
index 0000000000000000000000000000000000000000..a8be5bc25bc8d844abcbc67050e22be88a48afce
GIT binary patch
literal 3969
zcmbQYBFQMNC^0vc;SvJ_!=&>-0tomUmk2S4GF*o6#0#Jdz7CKgHV|P0B$y&DGB5$r
zp&M*qg^a8a^+0m+Sq8y-42(;XjPg=I%Cqk<FdhTaO4At_CqhkQelZawl>#QY7;XdE
z%r(Jgtc<N}5llcEfcz3hAOQkMMtcCc7C`YdAl(AQi-7n55Z?phKR_%6lnVpmS^xk4
zXPgKp=7CuAK|~*j7?nj14PwF^UwDJ!V>HYeNlIhHggGc(BZoV38XpaFaOS3Jn1jL^
zRK9@96;OJ{R<?k`eYEsIE<Nyv1~KUzIgK9xhW%(dr>uq=4RcT(PkQ-)KPBUr9S!r*
zFb5_&lIkP;DQa-aj;8a`bUrfDxhO{fuq^_@E<g+hz_uR@r~nx<Ky1QL@*?Jf%MP6d
qA`Fa7EUfGtT->}24-6Q9YNs#)JHl+h*32CShI2r*$~R!T;WPjp+0<<S

literal 0
HcmV?d00001


From b3a6c3ee601c7998bc519b10f1b67e0d1aed45ad Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 16 Jan 2024 09:22:50 +0100
Subject: [PATCH 35/38] More PEP8 style fixes

---
 caterva2/__init__.py       | 2 ++
 caterva2/api_utils.py      | 1 +
 caterva2/services/bro.py   | 2 ++
 caterva2/services/pub.py   | 4 ++++
 caterva2/services/sub.py   | 8 +++++++-
 caterva2/tests/conftest.py | 5 ++++-
 caterva2/tests/test_api.py | 6 +++---
 caterva2/tests/test_cli.py | 4 ++--
 caterva2/utils.py          | 2 +-
 9 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/caterva2/__init__.py b/caterva2/__init__.py
index 59fdbc61..f2d80293 100644
--- a/caterva2/__init__.py
+++ b/caterva2/__init__.py
@@ -18,6 +18,7 @@
 import pytest
 import pathlib
 
+
 def test(verbose=False):
     """Run the test suite.
 
@@ -35,6 +36,7 @@ def test(verbose=False):
     verb = "-v" if verbose else ""
     return pytest.main([verb, test_dir])
 
+
 __all__ = [
     'bro_host_default',
     'pub_host_default',
diff --git a/caterva2/api_utils.py b/caterva2/api_utils.py
index 53992d9a..7ae4a9a3 100644
--- a/caterva2/api_utils.py
+++ b/caterva2/api_utils.py
@@ -6,6 +6,7 @@
 # License: GNU Affero General Public License v3.0
 # See LICENSE.txt for details about copyright and rights to use.
 ###############################################################################
+
 import pathlib
 import pickle
 
diff --git a/caterva2/services/bro.py b/caterva2/services/bro.py
index a38962f9..91f87ac8 100644
--- a/caterva2/services/bro.py
+++ b/caterva2/services/bro.py
@@ -28,10 +28,12 @@
 # API
 app = FastAPI()
 
+
 @app.get('/api/roots', response_model_exclude_none=True)
 async def get_roots() -> typing.Dict[str, models.Root]:
     return database.roots
 
+
 @app.post('/api/roots')
 async def post_roots(root: models.Root) -> models.Root:
     database.roots[root.name] = root
diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 5bd5b7a2..182c293b 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -42,6 +42,7 @@ def get_etag(abspath):
     stat = abspath.stat()
     return f'{stat.st_mtime}:{stat.st_size}'
 
+
 async def worker(queue):
     while True:
         abspath = await queue.get()
@@ -135,10 +136,12 @@ async def lifespan(app: FastAPI):
 
 app = FastAPI(lifespan=lifespan)
 
+
 @app.get("/api/list")
 async def get_list():
     return [relpath for abspath, relpath in utils.walk_files(root)]
 
+
 @app.get("/api/info/{path:path}")
 async def get_info(
     path: str,
@@ -160,6 +163,7 @@ async def get_info(
     response.headers['Etag'] = etag
     return srv_utils.read_metadata(abspath)
 
+
 @app.get("/api/download/{path:path}")
 async def get_download(path: str, nchunk: int = -1):
     if nchunk < 0:
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index da93dacf..23ba4e4f 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -185,6 +185,7 @@ async def lifespan(app: FastAPI):
 
 app = FastAPI(lifespan=lifespan)
 
+
 @app.get('/api/roots')
 async def get_roots():
     """
@@ -197,6 +198,7 @@ async def get_roots():
     """
     return database.roots
 
+
 def get_root(name):
     root = database.roots.get(name)
     if root is None:
@@ -204,6 +206,7 @@ def get_root(name):
 
     return root
 
+
 @app.post('/api/subscribe/{name}')
 async def post_subscribe(name: str):
     """
@@ -223,6 +226,7 @@ async def post_subscribe(name: str):
     follow(name)
     return 'Ok'
 
+
 @app.get('/api/list/{name}')
 async def get_list(name: str):
     """
@@ -249,6 +253,7 @@ async def get_list(name: str):
         for path, relpath in utils.walk_files(rootdir)
     ]
 
+
 @app.get('/api/url/{path:path}')
 async def get_url(path: str):
     """
@@ -277,6 +282,7 @@ async def get_url(path: str):
 
     return [http]
 
+
 @app.get('/api/info/{path:path}')
 async def get_info(path: str):
     """
@@ -353,7 +359,7 @@ async def download_data(path: str, slice_: str = None, download: bool = False):
     slice_ : str
         The slice to fetch.
     download : bool
-        Whether to download the dataset in the downloads dir.  If False, the data is
+        Whether to download the dataset in the downloads/ dir.  If False, the data is
         returned as a StreamingResponse (it is 'fetched').
 
     Returns
diff --git a/caterva2/tests/conftest.py b/caterva2/tests/conftest.py
index 58b1fd4f..2a2ef69f 100644
--- a/caterva2/tests/conftest.py
+++ b/caterva2/tests/conftest.py
@@ -6,11 +6,14 @@
 import numpy as np
 import sys
 import platform
-try: # Python-Blosc2 is optional
+
+
+try:  # Python-Blosc2 is optional
     import blosc2
 except ImportError:
     blosc2 = None
 
+
 def pytest_configure(config):
     print('\n' + '-=' * 38)
     print("Caterva2 version:      %s" % cat2.__version__)
diff --git a/caterva2/tests/test_api.py b/caterva2/tests/test_api.py
index d66ea5aa..4e42aa7d 100644
--- a/caterva2/tests/test_api.py
+++ b/caterva2/tests/test_api.py
@@ -150,7 +150,7 @@ def test_download_b2nd(name, services, examples_dir):
 
 
 # TODO: test slices that exceed the array dimensions
-@pytest.mark.parametrize("slice_", [slice(1,10), slice(4,8), slice(None), 1])
+@pytest.mark.parametrize("slice_", [slice(1, 10), slice(4, 8), slice(None), 1])
 @pytest.mark.parametrize("name", ['ds-1d.b2nd', 'dir1/ds-2d.b2nd'])
 def test_download_b2nd_slice(slice_, name, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
@@ -203,7 +203,7 @@ def test_download_b2frame(services, examples_dir):
 
 
 # TODO: add an integer slice test when it is supported in blosc2
-@pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
+@pytest.mark.parametrize("slice_", [slice(1, 10), slice(15, 20), slice(None)])
 def test_download_b2frame_slice(slice_, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['ds-hello.b2frame']
@@ -262,7 +262,7 @@ def test_download_regular_file(services, examples_dir):
     assert a[:] == b[:]
 
 
-@pytest.mark.parametrize("slice_", [slice(1,10), slice(15,20), slice(None)])
+@pytest.mark.parametrize("slice_", [slice(1, 10), slice(15, 20), slice(None)])
 def test_download_regular_file_slice(slice_, services, examples_dir):
     myroot = cat2.Root(published_root, host=cat2.sub_host_default)
     ds = myroot['README.md']
diff --git a/caterva2/tests/test_cli.py b/caterva2/tests/test_cli.py
index 68a5e0b4..3e04ee54 100644
--- a/caterva2/tests/test_cli.py
+++ b/caterva2/tests/test_cli.py
@@ -9,8 +9,6 @@
 
 
 import caterva2 as cat2
-import os
-import pathlib
 import json
 import subprocess
 import sys
@@ -34,10 +32,12 @@ def test_roots(services):
     assert roots[root_default]['name'] == root_default
     assert roots[root_default]['http'] == cat2.pub_host_default
 
+
 def test_url(services):
     out = cli(['url', root_default])
     assert out == ['http://localhost:8001']
 
+
 def test_subscribe(services):
     # Subscribe once
     out = cli(['subscribe', root_default])
diff --git a/caterva2/utils.py b/caterva2/utils.py
index 438f7446..5a812346 100644
--- a/caterva2/utils.py
+++ b/caterva2/utils.py
@@ -45,7 +45,7 @@ def walk_files(root, exclude=None):
 def socket_type(string):
     host, port = string.split(':')
     port = int(port)
-    return (host, port)
+    return host, port
 
 
 def get_parser(broker=None, http=None):

From 8f5df8a7bff3ae7dba829d88922dff555150a97a Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 16 Jan 2024 09:57:18 +0100
Subject: [PATCH 36/38] Small improvements

---
 caterva2/services/pub.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/caterva2/services/pub.py b/caterva2/services/pub.py
index 182c293b..6fcbe148 100644
--- a/caterva2/services/pub.py
+++ b/caterva2/services/pub.py
@@ -92,6 +92,7 @@ async def watchfiles(queue):
 
     # The etags left are those that were deleted
     for key in etags:
+        abspath = root / key
         queue.put_nowait(abspath)
         del database.etags[key]
         database.save()
@@ -121,10 +122,13 @@ async def lifespan(app: FastAPI):
 
     # Watch dataset files (must wait before publishing)
     await client.wait_until_ready()
-    asyncio.create_task(watchfiles(queue))
+    watch_task = asyncio.create_task(watchfiles(queue))
 
     yield
 
+    # Cancel watch task
+    watch_task.cancel()
+
     # Cancel worker tasks
     for task in tasks:
         task.cancel()

From b0b66c703ba06fd7037ac2ceff77eb5351785698 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 16 Jan 2024 09:57:30 +0100
Subject: [PATCH 37/38] More PEP8 style fixes

---
 caterva2/models.py       | 9 ++++++++-
 caterva2/services/sub.py | 3 +--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/caterva2/models.py b/caterva2/models.py
index b292a94d..1db965e8 100644
--- a/caterva2/models.py
+++ b/caterva2/models.py
@@ -19,6 +19,7 @@ class CParams(pydantic.BaseModel):
     typesize: int
     blocksize: int
 
+
 class SChunk(pydantic.BaseModel):
     blocksize: int
     cbytes: int
@@ -35,6 +36,7 @@ class SChunk(pydantic.BaseModel):
 #   vlmeta
     nchunks: int
 
+
 class Metadata(pydantic.BaseModel):
     dtype: str
     ndim: int
@@ -48,21 +50,26 @@ class Metadata(pydantic.BaseModel):
     schunk: SChunk
     size: int
 
+
 class File(pydantic.BaseModel):
     mtime: float
     size: int
 
+
 class Root(pydantic.BaseModel):
     name: str
     http: str
-    subscribed: typing.Optional[bool] = None # Used only by the subscriber program
+    subscribed: typing.Optional[bool] = None  # Used only by the subscriber program
+
 
 class Broker(pydantic.BaseModel):
     roots: typing.Dict[str, Root]
 
+
 class Publisher(pydantic.BaseModel):
     etags: typing.Dict[str, str]
 
+
 class Subscriber(pydantic.BaseModel):
     roots: typing.Dict[str, Root]
     etags: typing.Dict[str, str]
diff --git a/caterva2/services/sub.py b/caterva2/services/sub.py
index 23ba4e4f..ef748098 100644
--- a/caterva2/services/sub.py
+++ b/caterva2/services/sub.py
@@ -83,8 +83,7 @@ async def updated_dataset(data, topic):
 def follow(name: str):
     root = database.roots.get(name)
     if root is None:
-        errors = {}
-        errors[name] = 'This dataset does not exist in the network'
+        errors = {name: 'This dataset does not exist in the network'}
         return errors
 
     if not root.subscribed:

From ce32899f833136c748e223850ebd76b50be70443 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Tue, 16 Jan 2024 09:59:20 +0100
Subject: [PATCH 38/38] Fix for function call in api_utils

---
 caterva2/clients/tbrowser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/caterva2/clients/tbrowser.py b/caterva2/clients/tbrowser.py
index cd2b0494..88e9a63b 100644
--- a/caterva2/clients/tbrowser.py
+++ b/caterva2/clients/tbrowser.py
@@ -12,7 +12,7 @@
 import pathlib
 
 # Project
-from caterva2 import utils
+from caterva2 import utils, api_utils
 
 from textual.app import App, ComposeResult
 from textual.widgets import Tree
@@ -23,7 +23,7 @@ class TreeApp(App):
     def __init__(self, args):
         super().__init__()
         self.root = args.root
-        self.data = utils.get(f'http://{args.host}/api/list/{args.root}')
+        self.data = api_utils.get(f'http://{args.host}/api/list/{args.root}')
 
     def compose(self) -> ComposeResult:
         path = self.root / pathlib.Path(self.data[0])