Skip to content

Commit 6d87c24

Browse files
author
nmeheus
committed
Added Tor and changed test splitting
1 parent b651bea commit 6d87c24

9 files changed

+800310
-58
lines changed

Multi_label/CV_hyperparameters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import sys
1717
from sklearn.metrics import confusion_matrix
1818

19-
FEATURE = 'size_IAT' # use burst features or size_IAT ('size_IAT', 'burst' or 'both')
19+
FEATURE = 'both' # use burst features or size_IAT ('size_IAT', 'burst' or 'both')
2020
METHOD = 'RF' # options: 'NB' : Naive Bayes, 'RF' : random forest, 'MLP' : , 'LR': logistic regression
2121
TEST_SIZE = 0.20
2222

Multi_label/trace_classification.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
parameters = {'size_IAT' :
3131
{'RF': {'n_estimators': 48}},
3232
'burst':
33-
{'RF': {'n_estimators': 21}},
34-
'both': {'RF': {'n_estimators': 21}}
33+
{'RF': {'n_estimators': 48}},
34+
'both': {'RF': {'n_estimators': 39}}
3535
}
3636
elif mode == 'ipsec_20':
3737
parameters = {'size_IAT' :

Multi_label/trace_visualization.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import matplotlib.pyplot as plt
77
import numpy.random as nprnd
88

9-
FEATURE = 'size_IAT' # use burst features or size_IAT ('size_IAT' or 'burst')
9+
FEATURE = 'burst' # use burst features or size_IAT ('size_IAT' or 'burst')
1010
modes = ['ipsec', 'ipsec_20','ipsec_50','ipsec_100','ipsec_200','ipsec_300','ipsec_400']
1111

1212
if __name__ == "__main__":

Single_label/CV_hyperparameters.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import sys
1515
from sklearn.metrics import confusion_matrix
1616

17-
FEATURE = 'size_IAT' # use burst features or size_IAT ('size_IAT', 'burst' or 'both')
18-
METHOD = 'RF' # options: 'NB' : Naive Bayes, 'RF' : random forest, 'MLP' : , 'LR': logistic regression
17+
#FEATURE = 'size_IAT' # use burst features or size_IAT ('size_IAT', 'burst' or 'both')
18+
#METHOD = 'LR' # options: 'NB' : Naive Bayes, 'RF' : random forest, 'MLP' : , 'LR': logistic regression
1919
TEST_SIZE = 0.20
2020

2121

@@ -26,13 +26,16 @@ def log(s):
2626

2727
if __name__ == "__main__":
2828
mode = sys.argv[1]
29+
FEATURE = sys.argv[2] # use burst features or size_IAT ('size_IAT', 'burst' or 'both')
30+
METHOD = sys.argv[3] # options: 'NB' : Naive Bayes, 'RF' : random forest, 'MLP' : , 'LR': logistic regression
2931
all_traces = load_pickled_traces(mode)
30-
windowed_traces = window_all_traces(all_traces)
32+
#windowed_traces = window_all_traces(all_traces)
3133

32-
# Split test set
33-
labels = [x.label for x in windowed_traces]
34-
X_train_val, X_test, y_train_val, y_test = train_test_split(windowed_traces,labels, stratify=np.array(labels), test_size=TEST_SIZE, random_state=0)
34+
# Split test set but keep windows from different traces seperated from eachother
35+
labels = [x.label for x in all_traces]
36+
X_train_val, X_test, y_train_val, y_test = train_test_split(all_traces,labels, stratify=np.array(labels), test_size=TEST_SIZE, random_state=0)
3537

38+
X_train_val = window_all_traces(X_train_val)
3639

3740
if METHOD == 'NB':
3841
clf = MultinomialNB()
@@ -53,8 +56,8 @@ def log(s):
5356
for train, val in kf.split(X_train_val):
5457
log('Started testing hyperparameters for fold ' + str(fold+1)+'.')
5558
# Seperate train list from val list
56-
train_list = [windowed_traces[i] for i in train]
57-
val_list = [windowed_traces[i] for i in val]
59+
train_list = [X_train_val[i] for i in train]
60+
val_list = [X_train_val[i] for i in val]
5861

5962
if FEATURE == 'size_IAT':
6063
feature_matrix, classes, train_range = build_feature_matrix_size_IAT(train_list)
@@ -63,7 +66,7 @@ def log(s):
6366
feature_matrix, classes, train_range = build_feature_matrix_burst(train_list)
6467
feature_matrix_val, classes_val, val_range = build_feature_matrix_burst(val_list, train_range)
6568
elif FEATURE == 'both':
66-
feature_matrix, classes, train_range = build_feature_matrix_both(X_train_val)
69+
feature_matrix, classes, train_range = build_feature_matrix_both(train_list)
6770
feature_matrix_val, classes_val, val_range = build_feature_matrix_both(val_list, train_range)
6871

6972
for par in list(parameters):

Single_label/background_traffic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import matplotlib.pyplot as plt
22

33
packetrate_up = [0,20,50,100,150,200,300,400]
4-
accuracy_up = [100,98.72,100,93.75,96.30,97.44,96.30,92.5]
4+
accuracy_up = [96.40,98.88,98.75,95,93.75,90,95,85]
55

66
packetrate_ud = [0,40,100,200,300,400,600,800]
7-
accuracy_ud = [100,98.75,96.25,97.5,96.25,95,95,92.5]
7+
accuracy_ud = [96.40,100,98.75,98.75,87.5,96.25,86.25,81.25]
88

9-
plt.ylim([80,101])
9+
plt.ylim([75,101])
1010
plt.plot(packetrate_up, accuracy_up, label='Only upstream packets')
1111
plt.plot(packetrate_ud, accuracy_ud, label='Upstream and downstream packets')
1212
plt.title('The effect of background traffic on the classification accuracy')
1313
plt.ylabel('Accuracy on the testset')
14-
plt.xlabel('# packets of added background traffic')
14+
plt.xlabel('# packets/s of added background traffic')
1515
plt.legend(loc=4)
1616
plt.show()
1717

Single_label/feature_extraction.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,16 @@
9797
'path': 'traces_400_ud/',
9898
'object_file': 'traces_400_ud/pickled_traces.dat',
9999
'ip': '192.168.0.2'
100+
},
101+
'tor':{
102+
'path': 'tor_traces/',
103+
'object_file': 'tor_traces/pickled_traces.dat',
104+
'ip': '192.168.2.2'
100105
}
101106
}
102107

103108
# Fill this in to determine which kind of traffic to work on
104-
mode = 'ipsec_400_ud'
109+
mode = 'tor'
105110

106111
# Load all traces that match the reg exp
107112
def load_traces():

0 commit comments

Comments
 (0)