-
Notifications
You must be signed in to change notification settings - Fork 0
/
sgd_alg.py
94 lines (77 loc) · 2.85 KB
/
sgd_alg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy as np
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
import math
import pickle
def save_obj(obj, name):
with open('/Users/manewilliams/Desktop/School/Cis519/Project/'+ name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
with open('/Users/manewilliams/Desktop/School/Cis519/Project/' + name + '.pkl', 'rb') as f:
return pickle.load(f)
users = load_obj('users_features')
num_users = len(users.keys())
print("Number of users: "+str(num_users))
bots = load_obj('bots_features')
num_bots = len(bots.keys())
print("Number of bots: "+str(num_bots))
def train(fold, num_folds, predictor):
user_range_min = int(num_users*(fold-1)/num_folds)
user_range_max = int(num_users*fold/num_folds)
train_x = []
train_y = []
for user in list(users.keys())[user_range_min:user_range_max]:
train_x.append(users[user])
train_y.append(0)
bot_range_min = int(num_bots*(fold-1)/num_folds)
bot_range_max = int(num_bots*fold/num_folds)
for bot in list(bots.keys())[bot_range_min:bot_range_max]:
train_x.append(bots[bot])
train_y.append(1)
predictor.fit(train_x, train_y)
return predictor
def test(fold, num_folds, predictor):
correct = 0
incorrect = 0
user_range_min = int(num_users*(fold-1)/num_folds)
user_range_max = int(num_users*fold/num_folds)
for user in list(users.keys())[user_range_min:user_range_max]:
prediction = predictor.predict([users[user]])
if prediction == 0:
correct += 1
else:
incorrect += 1
bot_range_min = int(num_bots*(fold-1)/num_folds)
bot_range_max = int(num_bots*fold/num_folds)
for bot in list(bots.keys())[bot_range_min:bot_range_max]:
prediction = predictor.predict([bots[bot]])
if prediction == 1:
correct += 1
else:
incorrect += 1
return correct, incorrect
def test_sgd(a, e, t, eta):
total_right = 0
total_wrong = 0
accuracies = []
for x in range(1, 6):
clf = SGDClassifier(loss='log', alpha = a, max_iter = 1000, tol = t, epsilon = e, learning_rate='constant', eta0 = eta)
feature_set = []
badge_set = []
for y in range(1, 6):
if x != y:
clf = train(y, 5, clf)
right, wrong = test(x, 5, clf)
total_right += right
total_wrong += wrong
acc = right / (right + wrong)
print("Fold #"+str(x)+" Accuracy: "+str(acc))
overall_acc = total_right / (total_right + total_wrong)
print("\nOverall Accuracy: "+str(overall_acc))
# TODO: adjust parameters for better results, or change the learning algorithm
best_alpha = 0.0076
best_epsilon = 1.6
best_tol = 0.008
best_eta0 = 0.03
test_sgd(best_alpha, best_epsilon, best_tol, best_eta0)