Merge pull request #5 from jtgilbert/dev

Dev
jtgilbert · Sep 19, 2022 · de89dd8 · de89dd8
2 parents 66f29ec + e32716d
commit de89dd8
Show file tree

Hide file tree

Showing 7 changed files with 639 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,42 @@
 # grain-size
 Estimate grain size distributions across a drainage network using field measurements.
+
+## Setup
+Download the .zip of the repository from the latest release. Navigate to the `scripts` folder, open a shell 
+terminal, and run the bootstrap.sh script. This will set up a virtual environment with all of the necessary 
+Python packages.
+
+## Running the model
+The simplest way to run the model is via command line. Open a command line terminal in the model folder and activate the 
+python environment. (These instructions apply to UNIX systems, Windows still to come).
+
+```commandline
+source .venv/bin/activate
+```
+Then change directories into the `grain_size_distribution` folder, where the python scripts are located.
+
+```commandline
+cd grain_size_distribution
+```
+
+and run the model using the usage below.
+```commandline
+usage: grain_size.py [-h] --measurements MEASUREMENTS [MEASUREMENTS ...] --reach_ids REACH_IDS [REACH_IDS ...] stream_network
+
+positional arguments:
+  stream_network        Path to stream network feature class
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --measurements MEASUREMENTS [MEASUREMENTS ...]
+                        A list of paths to csv files containing grain size measurements; should have header "D" at top of column followed by individual grain size measurements
+  --reach_ids REACH_IDS [REACH_IDS ...]
+                        A list containing the reach IDs from the stream network feature classassociated with the grain size measurements, in the same order as the measurements
+```
+
+for example, if my stream network was 'NHDPlus_Woods_Creek.shp', and I had two measurements,
+associated with segments 46 and 68 of the drainage network, I would enter:
+
+```commandline
+python grain_size.py path_to_NHDPlus_Woods_Creek.shp --measurements path_to_meas_1.csv path_to_meas_2.csv --reachids 46 68
+```
diff --git a/grain_size_distribution/__init__.py b/grain_size_distribution/__init__.py
diff --git a/grain_size_distribution/__version__.py b/grain_size_distribution/__version__.py
@@ -0,0 +1 @@
+__version__ = "1.0.0"
diff --git a/grain_size_distribution/grain_size.py b/grain_size_distribution/grain_size.py
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
@@ -0,0 +1,10 @@
+#! /bin/bash
+
+set -eu
+
+python3 --version
+python3 -m venv ../.venv
+
+../.venv/bin/python -m pip install --upgrade pip
+
+../.venv/bin/pip install -e ../
diff --git a/setup.py b/setup.py
@@ -0,0 +1,37 @@
+#!usr/bin/env python
+
+from setuptools import setup
+import re
+
+# https://packaging.python.org/discussions/install-requires-vs-requirements/
+install_requires = [
+    'numpy>=1.21', 'pandas>=1.4', 'scipy>=1.9', 'scikit-learn>=1.1.2', 'geopandas>=0.11'
+]
+
+with open("README.md", "rb") as f:
+    long_descr = f.read().decode("utf-8")
+
+version = re.search(
+    '^__version__\\s*=\\s*"(.*)"',
+    open('grain_size_distribution/__version__.py').read(),
+    re.M
+).group(1)
+
+setup(name='grain_size_distributions',
+      version=version,
+      author='Jordan Gilbert',
+      license='MIT',
+      python_requires='>=3.8',
+      long_description=long_descr,
+      author_email='[email protected]',
+      install_requires=install_requires,
+      zip_safe=False,
+      entry_points={
+          "console_scripts": [
+              'grain_size = grain_size_distribution.grain_size:main'
+          ]
+      },
+      url='https://github.com/jtgilbert/grain-size',
+      packages=[
+          'grain_size_distribution'
+      ])
diff --git a/testing.py b/testing.py
@@ -0,0 +1,88 @@
+from scipy import stats
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from scipy.optimize import minimize, fmin
+
+
+def med_err(loc, scale, median_obs):
+    dist = stats.norm(loc, scale).rvs(10000)
+    dist = dist**2
+    dist = dist[dist > 0.0005]
+    median_est = np.median(dist)
+    err = (median_est - median_obs)**2
+
+    return err
+
+
+def std_err(scale, loc, d16, d84):
+    if scale <= 0:
+        scale = abs(scale)
+    dist = stats.norm(loc, scale).rvs(10000)
+    dist = dist**2
+    dist = dist[dist > 0.0005]
+    err = (np.percentile(dist, 84)-d84)**2 + (np.percentile(dist, 16)-d16)**2
+
+    return err
+
+
+def shape_err(a, scale, loc, dif):
+    dist = stats.skewnorm(a, loc, scale).rvs(10000)
+    dist = dist[dist > 0.0005]
+    err = (np.percentile(dist, 84) - np.percentile(dist, 16))**2
+
+    return err
+
+
+a, loc, scale = 5, 0.015, 0.05
+
+data = pd.read_csv('./Input_data/Woods_D.csv')
+data = np.sqrt(data['D']/1000)
+
+params = stats.norm.fit(data)
+print(params)
+
+k2, pval = stats.normaltest(data)
+if pval < 1e-3:
+    print('p-value: ', pval, ' suggests distribution may not be normal')
+
+# plt.figure()
+# plt.hist(data, bins=15, density=True, alpha=0.6, color='g')
+# p = stats.norm.pdf(data)
+# plt.plot(data, stats.norm.pdf(data), 'k', linewidth=2)
+# plt.show()
+
+median_d = 0.061
+d_16 = 0.031
+d_84 = 0.111
+
+res = fmin(med_err, params[0], args=(params[1], median_d))
+loc_opt = res[0]
+#print(res.message)
+
+res2 = fmin(std_err, params[1], args=(loc_opt, d_16, d_84))
+scale_opt = res2[0]
+#print(res2.message)
+
+#res3 = fmin(shape_err, ae, args=(scale_opt, loc_opt, d_84-d_16))
+#a_opt = res3[0]
+#print(res3.message)
+
+print(loc_opt, scale_opt)
+new_data = stats.norm(loc_opt, scale_opt).rvs(10000)
+new_data = new_data**2
+new_data = new_data[new_data >= 0.005]
+print(len(new_data))
+plt.figure()
+plt.hist(new_data, bins=15, density=True, alpha=0.6, color='g')
+#xmin, xmax = plt.xlim()
+#x = np.linspace(xmin, xmax, 100)
+#p = stats.skewnorm.pdf(x, ae, loc_opt, scale_opt)
+#plt.plot(x, p, 'k', linewidth=2)
+plt.show()
+
+print(np.percentile(new_data, 50), np.percentile(new_data, 16), np.percentile(new_data, 84))
+
+# use alpha to maintain shape then optimize loc until the mean matches the estimated mean
+# and scale until d16 and d84 match estimated - change function to include mean D and flow
+# required to move it