sib-swiss
diff --git a/‎images/ROC_curve.png
22.9 KB b/‎images/ROC_curve.png
22.9 KB
diff --git a/‎images/Screenshot from 2021-07-28 09-34-35.png
316 KB b/‎images/Screenshot from 2021-07-28 09-34-35.png
316 KB
diff --git a/‎images/precision_recall_curve.png
16.2 KB b/‎images/precision_recall_curve.png
16.2 KB
diff --git a/‎images/stringKernel.png
259 KB b/‎images/stringKernel.png
259 KB
diff --git a/‎python_notebooks/Chapter_3_Machine_Learning_routine__distance_based_model_for_classification.ipynb
+755-894 b/‎python_notebooks/Chapter_3_Machine_Learning_routine__distance_based_model_for_classification.ipynb
+755-894
diff --git a/‎python_notebooks/Chapter_4_Machine_Learning_based_on_decision_trees_for_classification.ipynb
+374-212 b/‎python_notebooks/Chapter_4_Machine_Learning_based_on_decision_trees_for_classification.ipynb
+374-212
diff --git a/‎python_notebooks/Chapter_5_Machine_Learning_for_regression.ipynb
+14,510-164 b/‎python_notebooks/Chapter_5_Machine_Learning_for_regression.ipynb
+14,510-164
diff --git a/‎python_notebooks/solutions/solution_02_FS.py
+5-5 b/‎python_notebooks/solutions/solution_02_FS.py
+5-5
diff --git a/‎python_notebooks/solutions/solution_02_KNN.py
+1-1 b/‎python_notebooks/solutions/solution_02_KNN.py
+1-1
diff --git a/‎python_notebooks/solutions/solution_02_cancer.py
+8-8 b/‎python_notebooks/solutions/solution_02_cancer.py
+8-8
@@ -2,8 +2,8 @@
 from sklearn.feature_selection import f_classif
 
 
-# Creating the object SelectKBest and settling for 10 best features 
-skb = SelectKBest(f_classif, k=10)
+# Creating the object SelectKBest and settling for 5 best features 
+skb = SelectKBest(f_classif, k=5)
 skb.fit(
     X_cancer, 
     y_cancer)
@@ -19,11 +19,11 @@
         break 
     print('\t',feature , ':' , pval )
 
-selected10 = [x for x,p in sortedPvals[:10] ]
+selected5 = [x for x,p in sortedPvals[:5] ]
 print("selected best:" , selected10 )
 
 
-sns.pairplot( df_cancer , hue='malignant' , vars=selected10 )
+sns.pairplot( df_cancer , hue='malignant' , vars=selected5 )
 
 
 ## that is very nice, but a lot of these are highly correlated...
@@ -42,7 +42,7 @@
 ## now we can select the best feature among the principal components
 
 
-skb = SelectKBest(f_classif, k=10)
+skb = SelectKBest(f_classif, k=5)
 skb.fit(
     x_pca, 
     y_cancer)
 
@@ -3,7 +3,7 @@
 
 X_penguin_train, X_penguin_test, y_penguin_train, y_penguin_test = train_test_split(
                                                     X_penguin, y_penguin,
-                                                    random_state=463390,stratify=y_penguin)
+                                                    random_state=4212280,stratify=y_penguin)
 
 knn_i=KNeighborsClassifier(n_jobs=-1)
 
 
@@ -23,19 +23,18 @@
 best_model_C = gridsearch_C.fit(X_cancer_train,y_cancer_train)
 
 print(best_model_C.best_params_)
-
+print("Model accuracy:",gridsearch_C.best_score_)
 
 
 ## predicting the labels on the test set    
 y_pred_test_c=best_model_C.predict(X_cancer_test)
 
-bestC = best_model_C.best_params_['classifier__C']
-bestPenalty = best_model_C.best_params_['classifier__penalty']
+bestC = best_model_C.best_params_['classifier__gamma']
 
 
-plotTitle = 'logistic regression: {} penalty ; C: {:.1e}\n Accuracy: {:.3f}'.format(bestPenalty,
-                                                                         bestC,
-                                                                         accuracy_score(y_cancer_test,y_pred_test_c) )
+
+plotTitle = 'RBF: gamma: {:.1e}\n Accuracy: {:.3f}'.format(bestGamma,
+                                                         accuracy_score(y_cancer_test,y_pred_test_c) )
 
 
 plotConfusionMatrix( y_cancer_test, y_pred_test_c, 
@@ -70,7 +69,7 @@
 
 
 
-PCA_NCOMPONENTS = 10
+PCA_NCOMPONENTS = 5
 
 pipe_pca = Pipeline([('scalar1',StandardScaler()),
                      ('pca',PCA(n_components=PCA_NCOMPONENTS)),
@@ -85,7 +84,8 @@
 best_model_c_pca = gridsearch_c_pca.fit(X_cancer_train,y_cancer_train)
 
 print(best_model_c_pca.best_params_)
-print("Model accuracy:",best_model_c_pca.score(X_cancer_test,y_cancer_test))
+print("Model accuracy:",gridsearch_c_pca.best_score_)
+
 
 ## predicting the labels on the test set    
 y_pred_test_c=best_model_c_pca.predict(X_cancer_test)