From cf8be16806495f4bf2c3cf6b9b19612b486e94a2 Mon Sep 17 00:00:00 2001 From: moink Date: Tue, 7 May 2019 08:35:04 +0200 Subject: [PATCH] Fixed setup.py. Added to readme how to run scripts --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ run_epiclass.py | 2 +- setup.py | 5 +++-- 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 095421b..ec0ea45 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,65 @@ In order to demonstrate how one might deploy the model, I wrote a [web API](api. I also wrote a [few tests to ensure the models are working](/test_deployment.py) They only test the PCA-SVM models since these were the best models I developed. For each I test a single prediction, and then test that the confusion matrix has not changed. +## Working with epiclass + +### Installing + +Epiclass requires python 3. To get epiclass, make sure your python is python 3, and run + +`git clone https://github.com/moink/epiclass/` + +`cd epiclass` + +`python setup.py` + +### Running + +There are three scripts to run, run_epiclass.py, api.py, and test_deployment.py + +#### run_epiclass.py + +This script is included to show how I created the plots in this readme, trained the models, and tuned the hyperparameters. Running it with all options may take a long time, depending on your hardware -- on the order of hours. + +To run it, provide one or more of the following actions as a parameter to the python script: + +* explore - make several plots of the data, including of the + Principal Component Analysis (PCA) transformation of + the features +* pca_svm2 - train a binary classifier (seizure vs + non-seizure) using a pipeline of PCA and a support + vector machine +* pca_svm5 - train a multiclass classifier using a pipeline + of PCA and a support vector machine +* rf - train a multiclass classifier using a random decision + forest +* nn - train a multiclass classifier using an artificial + neural network + All the training methods save the models to the model + directory and additionally save a confusion matrix to the + outputs directory. + +For example, to create the data exploration plots and train the neural network, run + +`python run_epiclass.py explore nn` + +#### test_deployment.py + +This runs four tests to ensure that the PCA-SVM models are giving expected results. To run it run: + +`python test_deployment.py` + +It will show some text in the console indicating whether the tests ran. + +#### api.py + +This starts a process serving an API with access to the binary PCA-SVM model. +To run it run: + +`python api.py` + +Your python will give you a URL - visit it with your browser. + ## Conclusion I think I could improve the models with further tuning (for example, a finer grid search) and by trying other models (e.g. different neural network architectures, logistic regression, fourier transforms). However I think the improvements would be marginal and that I have gotten pretty close to the limits of what the data can provide. diff --git a/run_epiclass.py b/run_epiclass.py index 24278a0..296969c 100644 --- a/run_epiclass.py +++ b/run_epiclass.py @@ -2,7 +2,7 @@ # import modules used here -- sys is a very standard one import sys, argparse, logging -from epiclass import run +from epiclass.epiclass import run if __name__ == '__main__': parser = argparse.ArgumentParser( diff --git a/setup.py b/setup.py index b9f6116..7540371 100644 --- a/setup.py +++ b/setup.py @@ -3,13 +3,14 @@ setup( name='epiclass', version='1.0.0', - packages=['.'], + packages=['epiclass'], url='https://github.com/moink/epiclass', license='MIT', author='moink', author_email='', description='Visualization and prediction of epileptic seizure data set', install_requires=['keras', 'pandas', 'joblib', 'matplotlib', 'seaborn', - 'scikit-learn', 'flask', 'flask_restful', 'tensorflow'], + 'scikit-learn', 'flask>=1.0.2', 'flask_restful', + 'tensorflow'], scripts=['run_epiclass', 'api', 'test_deployment'] ) \ No newline at end of file