-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions.py
79 lines (68 loc) · 3.4 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import plot_confusion_matrix, classification_report, precision_score, recall_score, accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
def metrics(model, x_train, y_train, x_test, y_test):
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 6))
plot_confusion_matrix(model, x_train, y_train, ax=ax[0], cmap=plt.cm.Blues, xticks_rotation='vertical')
ax[0].set_title('Train Confusion Matrix')
plot_confusion_matrix(model, x_test, y_test, ax=ax[1], cmap=plt.cm.Blues, xticks_rotation='vertical')
ax[1].set_title('Test Confusion Matrix')
plt.show()
print(classification_report(y_test, model.predict(x_test)))
print('\n')
def make_model(model, x_train, y_train):
return model.fit(x_train, y_train)
def all_models(x_train, x_test, y_train, y_test,
objects = [LogisticRegression(fit_intercept=False, C=1e12),
Pipeline([('ss', StandardScaler()), ('knn', KNeighborsClassifier())]),
GaussianNB(),
DecisionTreeClassifier(),
RandomForestClassifier(),
XGBClassifier(),
Pipeline([('ss', StandardScaler()), ('svm', SVC())])],
index = ['Logistic Regression', 'K-Nearest Neighbors', 'Naive Bayes', 'Decision Tree',
'Random Forest', 'XGBoost', 'Support Vector Machine']
):
models = []
precision = []
recall = []
accuracy = []
f1 = []
for i, o in enumerate(objects):
print(f'{index[i]} Results:')
models.append(make_model(o, x_train, y_train))
metrics(models[-1], x_train, y_train, x_test, y_test)
for i in models:
prediction = i.predict(x_test)
precision.append(precision_score(y_test, prediction, average='macro'))
recall.append(recall_score(y_test, prediction, average='macro'))
accuracy.append(accuracy_score(y_test, prediction))
f1.append(f1_score(y_test, prediction, average='macro'))
df = pd.DataFrame(np.array([precision, recall, accuracy, f1]).T,
index = index, columns = ['Precision Score', 'Recall Score', 'Accuracy Score', 'F1 Score']).style.format('{:.2%}')
display(df)
print(f'The model with the highest precision score is {df.data.idxmax()[0]}.')
print(f'The model with the highest recall score is {df.data.idxmax()[1]}.')
print(f'The model with the highest accuracy score is {df.data.idxmax()[2]}.')
print(f'The model with the highest F1 score is {df.data.idxmax()[3]}.')
return models
def plot_importances(model, index, title='Feature Importances'):
f_import = pd.Series(model.feature_importances_, index=index)
plot = f_import.nlargest(20).to_frame()
fig, ax = plt.subplots(figsize=(15, 8))
sns.barplot(data=plot, y=plot.index, x=0)
ax.set_title(title)
ax.set_xlabel('Importance')
ax.set_ylabel('Feature')
plt.show();