Skip to content

Commit

Permalink
Merge pull request #64 from CINPLA/feature-git-lfs-plugin
Browse files Browse the repository at this point in the history
Add Git LFS plugin and before_load in plugin_interface.Dataset
  • Loading branch information
dragly authored Dec 11, 2018
2 parents 1fef94f + 50eb8c7 commit 2314f0d
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 1 deletion.
17 changes: 16 additions & 1 deletion exdir/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,27 @@ def __setitem__(self, args, value):
self.meta._set_data(meta)

def _reload_data(self):
for plugin in self.plugin_manager.dataset_plugins.write_order:
plugin.before_load(self.data_filename)

if self.io_mode == self.OpenMode.READ_ONLY:
mmap_mode = "r"
else:
mmap_mode = "r+"

self._data_memmap = np.load(self.data_filename, mmap_mode=mmap_mode)
try:
self._data_memmap = np.load(self.data_filename, mmap_mode=mmap_mode, allow_pickle=False)
except ValueError as e:
# Could be that it is a Git LFS file. Let's see if that is the case and warn if so.
with open(self.data_filename, "r") as f:
test_string = "version https://git-lfs.github.com/spec/v1"
contents = f.read(len(test_string))
if contents == test_string:
raise IOError("The file '{}' is a Git LFS placeholder. "
"Open the the Exdir File with the Git LFS plugin or run "
"`git lfs fetch` first. ".format(self.data_filename))
else:
raise e

def _reset_data(self, value, attrs, meta):
self._data_memmap = np.lib.format.open_memmap(
Expand Down
11 changes: 11 additions & 0 deletions exdir/plugin_interface/plugin_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@ def __init__(self, attrs, meta):


class Dataset:
def before_load(self, dataset_path):
"""
Overload this function in your plugin implementation.
It receives the path to the dataset NumPy file and allows you to
perform operations before Exdir attempts loading the file.
The function should not return anything.
"""
pass

def prepare_read(self, dataset_data):
"""
Overload this function in your plugin implementation.
Expand Down
52 changes: 52 additions & 0 deletions exdir/plugins/git_lfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import subprocess
import exdir.plugin_interface
import pathlib
import sys

class DatasetPlugin(exdir.plugin_interface.Dataset):
def __init__(self, verbose):
self.verbose = verbose

def before_load(self, dataset_path):
path = pathlib.Path(dataset_path)
parent_path = path.parent
with open(dataset_path, "rb") as f:
test_string = b"version https://git-lfs.github.com/spec/v1"
contents = f.read(len(test_string))
if contents == test_string:
command = ['git', 'rev-parse', '--show-toplevel']
git_path = subprocess.check_output(command, cwd=parent_path, stderr=subprocess.STDOUT)
git_path = pathlib.Path(git_path.decode('utf-8').rstrip())
relative_path = path.relative_to(git_path)
if self.verbose:
print("Fetching Git LFS object for {}".format(relative_path))
command = ['git', '-c', 'lfs.fetchexclude=""', 'lfs', 'pull', '-I', str(relative_path)]
process = subprocess.Popen(command, cwd=git_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if self.verbose:
while not process.poll():
# Git LFS has fancy loading output - this doesn't work well in Jupyter,
# so just replace carriage return with newline
contents = process.stdout.read(1).decode('utf-8').replace('\r', '\n')
if not contents:
break
sys.stdout.write(contents)
sys.stdout.flush()

process.communicate()


class Plugin(exdir.plugin_interface.Plugin):
def __init__(self, verbose=False):
super(Plugin, self).__init__("git_lfs", dataset_plugins=[DatasetPlugin(verbose)])

def plugins():
return _plugins(verbose=False)


def _plugins(verbose):
return [
exdir.plugin_interface.Plugin(
"git_lfs",
dataset_plugins=[DatasetPlugin(verbose)]
)
]

0 comments on commit 2314f0d

Please sign in to comment.