Skip to content

Commit 0e15990

Browse files
author
WandrilleD
committed
update of nb03,04,05 - better imbalance presentation + RF regression
1 parent bf6bef2 commit 0e15990

10 files changed

+15653
-1284
lines changed

images/ROC_curve.png

22.9 KB
Loading
316 KB
Loading

images/precision_recall_curve.png

16.2 KB
Loading

images/stringKernel.png

259 KB
Loading

python_notebooks/Chapter_3_Machine_Learning_routine__distance_based_model_for_classification.ipynb

+755-894
Large diffs are not rendered by default.

python_notebooks/Chapter_4_Machine_Learning_based_on_decision_trees_for_classification.ipynb

+374-212
Large diffs are not rendered by default.

python_notebooks/Chapter_5_Machine_Learning_for_regression.ipynb

+14,510-164
Large diffs are not rendered by default.

python_notebooks/solutions/solution_02_FS.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
from sklearn.feature_selection import f_classif
33

44

5-
# Creating the object SelectKBest and settling for 10 best features
6-
skb = SelectKBest(f_classif, k=10)
5+
# Creating the object SelectKBest and settling for 5 best features
6+
skb = SelectKBest(f_classif, k=5)
77
skb.fit(
88
X_cancer,
99
y_cancer)
@@ -19,11 +19,11 @@
1919
break
2020
print('\t',feature , ':' , pval )
2121

22-
selected10 = [x for x,p in sortedPvals[:10] ]
22+
selected5 = [x for x,p in sortedPvals[:5] ]
2323
print("selected best:" , selected10 )
2424

2525

26-
sns.pairplot( df_cancer , hue='malignant' , vars=selected10 )
26+
sns.pairplot( df_cancer , hue='malignant' , vars=selected5 )
2727

2828

2929
## that is very nice, but a lot of these are highly correlated...
@@ -42,7 +42,7 @@
4242
## now we can select the best feature among the principal components
4343

4444

45-
skb = SelectKBest(f_classif, k=10)
45+
skb = SelectKBest(f_classif, k=5)
4646
skb.fit(
4747
x_pca,
4848
y_cancer)

python_notebooks/solutions/solution_02_KNN.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
X_penguin_train, X_penguin_test, y_penguin_train, y_penguin_test = train_test_split(
55
X_penguin, y_penguin,
6-
random_state=463390,stratify=y_penguin)
6+
random_state=4212280,stratify=y_penguin)
77

88
knn_i=KNeighborsClassifier(n_jobs=-1)
99

python_notebooks/solutions/solution_02_cancer.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,18 @@
2323
best_model_C = gridsearch_C.fit(X_cancer_train,y_cancer_train)
2424

2525
print(best_model_C.best_params_)
26-
26+
print("Model accuracy:",gridsearch_C.best_score_)
2727

2828

2929
## predicting the labels on the test set
3030
y_pred_test_c=best_model_C.predict(X_cancer_test)
3131

32-
bestC = best_model_C.best_params_['classifier__C']
33-
bestPenalty = best_model_C.best_params_['classifier__penalty']
32+
bestC = best_model_C.best_params_['classifier__gamma']
3433

3534

36-
plotTitle = 'logistic regression: {} penalty ; C: {:.1e}\n Accuracy: {:.3f}'.format(bestPenalty,
37-
bestC,
38-
accuracy_score(y_cancer_test,y_pred_test_c) )
35+
36+
plotTitle = 'RBF: gamma: {:.1e}\n Accuracy: {:.3f}'.format(bestGamma,
37+
accuracy_score(y_cancer_test,y_pred_test_c) )
3938

4039

4140
plotConfusionMatrix( y_cancer_test, y_pred_test_c,
@@ -70,7 +69,7 @@
7069

7170

7271

73-
PCA_NCOMPONENTS = 10
72+
PCA_NCOMPONENTS = 5
7473

7574
pipe_pca = Pipeline([('scalar1',StandardScaler()),
7675
('pca',PCA(n_components=PCA_NCOMPONENTS)),
@@ -85,7 +84,8 @@
8584
best_model_c_pca = gridsearch_c_pca.fit(X_cancer_train,y_cancer_train)
8685

8786
print(best_model_c_pca.best_params_)
88-
print("Model accuracy:",best_model_c_pca.score(X_cancer_test,y_cancer_test))
87+
print("Model accuracy:",gridsearch_c_pca.best_score_)
88+
8989

9090
## predicting the labels on the test set
9191
y_pred_test_c=best_model_c_pca.predict(X_cancer_test)

0 commit comments

Comments
 (0)