Skip to content

Commit f455607

Browse files
committedNov 13, 2016
added invalid data image and handling it in code
1 parent cfc68c3 commit f455607

File tree

4 files changed

+45
-35
lines changed

4 files changed

+45
-35
lines changed
 

‎images/Invalid data.png

401 KB
Loading

‎insight_testsuite/results.txt

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
[Sat Nov 12 19:07:24 PST 2016] 0 of 3 tests passed
44
[Sat Nov 12 19:08:16 PST 2016] 3 of 3 tests passed
55
[Sat Nov 12 19:08:44 PST 2016] 3 of 3 tests passed
6+
[Sun Nov 13 11:38:44 PST 2016] 3 of 3 tests passed

‎paymo_input/stream_payment.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
DOWNLOAD FILE AT [https://www.dropbox.com/s/vrn4pjlypwa2ki9/stream_payment.csv?dl=0]

‎src/antifraud.py

+43-35
Original file line numberDiff line numberDiff line change
@@ -121,44 +121,49 @@ def build_graph_from_txt(self, input_path):
121121
return
122122

123123
""" Write to all the output files """
124-
def __write_output(self, feature1_op_path, feature1_op, feature2_op_path, feature2_op, feature3_op_path, feature3_op):
125-
with open(feature1_op_path, 'a') as output1_file:
126-
output1_file.write('\n'.join(feature1_op))
127-
output1_file.write('\n')
128-
with open(feature2_op_path, 'a') as output2_file:
129-
output2_file.write('\n'.join(feature2_op))
130-
output2_file.write('\n')
131-
with open(feature3_op_path, 'a') as output3_file:
132-
output3_file.write('\n'.join(feature3_op))
133-
output3_file.write('\n')
124+
def __write_output(self, output1_file, feature1_op, output2_file, feature2_op, output3_file, feature3_op):
125+
output1_file.write('\n'.join(feature1_op))
126+
output1_file.write('\n')
127+
output2_file.write('\n'.join(feature2_op))
128+
output2_file.write('\n')
129+
output3_file.write('\n'.join(feature3_op))
130+
output3_file.write('\n')
134131

135132
def build_features(self, input_path, feature1_op_path, feature2_op_path, feature3_op_path):
136133
feature1, feature2, feature3 = [], [], []
137134
count = 0
138-
with open(input_path) as file:
139-
next(file)
140-
for line in file:
141-
count += 1
142-
content = line.split(',')
143-
vertex1, vertex2 = content[1].strip(), content[2].strip()
144-
# get feature 1
145-
result = self.__build_feature1(vertex1, vertex2)
146-
feature1.append(result)
147-
path_distance = self.__network.bidirectional_bfs(str(vertex1), str(vertex2))
148-
# get feature 2
149-
result = self.__build_feature_with_distance(vertex1, vertex2, 2, path_distance)
150-
feature2.append(result)
151-
# get feature 3
152-
result = self.__build_feature_with_distance(vertex1, vertex2, self.__maxDistance, path_distance)
153-
feature3.append(result)
154-
# write on batch of 20,000
155-
if count % 20000 == 0:
156-
print "writing.. Progress: ", count, ' trasactions'
157-
self.__write_output(feature1_op_path, feature1, feature2_op_path, feature2, feature3_op_path, feature3)
158-
feature1, feature2, feature3 = [], [], []
159-
160-
# write final remaining features to output
161-
self.__write_output(feature1_op_path, feature1, feature2_op_path, feature2, feature3_op_path, feature3)
135+
with open(feature1_op_path, 'w') as output1_file:
136+
with open(feature2_op_path, 'w') as output2_file:
137+
with open(feature3_op_path, 'w') as output3_file:
138+
with open(input_path) as file:
139+
next(file)
140+
for line in file:
141+
content = line.split(',')
142+
try:
143+
vertex1, vertex2 = content[1].strip(), content[2].strip()
144+
except Exception:
145+
print 'ignoring data as invalid', content
146+
else:
147+
count += 1
148+
# get feature 1
149+
result = self.__build_feature1(vertex1, vertex2)
150+
feature1.append(result)
151+
path_distance = self.__network.bidirectional_bfs(str(vertex1), str(vertex2))
152+
# get feature 2
153+
result = self.__build_feature_with_distance(vertex1, vertex2, 2, path_distance)
154+
feature2.append(result)
155+
# get feature 3
156+
result = self.__build_feature_with_distance(vertex1, vertex2, self.__maxDistance, path_distance)
157+
feature3.append(result)
158+
# write on batch of 20,000
159+
if count % 20000 == 0:
160+
print "writing.. Progress: ", count, ' trasactions'
161+
self.__write_output(output1_file, feature1, output2_file, feature2, output3_file, feature3)
162+
feature1, feature2, feature3 = [], [], []
163+
164+
# write final remaining features to output
165+
self.__write_output(output1_file, feature1, output2_file, feature2, output3_file, feature3)
166+
print "writing.. Progress: ", count, ' trasactions'
162167

163168
def __build_feature_with_distance(self, vertex1, vertex2, distance, path_distance):
164169
if path_distance <= distance and path_distance >= 0:
@@ -194,8 +199,11 @@ def __feature1(self, vertex1, vertex2):
194199
try:
195200
# max distance allowed for feature 3 to be trusted as argument
196201
fraud = AntiFraud(4)
202+
print 'Building the network...'
197203
fraud.build_graph_from_txt(batch_input)
204+
print 'Done. Started predicting fraud transactions...'
198205
fraud.build_features(stream_input, feature1_op, feature2_op, feature3_op)
206+
print 'done..'
199207
except Exception as e:
200208
print 'Program error, contact developer'
201-
raise 'program error'
209+
raise e

0 commit comments

Comments
 (0)
Please sign in to comment.