@@ -121,44 +121,49 @@ def build_graph_from_txt(self, input_path):
121
121
return
122
122
123
123
""" Write to all the output files """
124
- def __write_output (self , feature1_op_path , feature1_op , feature2_op_path , feature2_op , feature3_op_path , feature3_op ):
125
- with open (feature1_op_path , 'a' ) as output1_file :
126
- output1_file .write ('\n ' .join (feature1_op ))
127
- output1_file .write ('\n ' )
128
- with open (feature2_op_path , 'a' ) as output2_file :
129
- output2_file .write ('\n ' .join (feature2_op ))
130
- output2_file .write ('\n ' )
131
- with open (feature3_op_path , 'a' ) as output3_file :
132
- output3_file .write ('\n ' .join (feature3_op ))
133
- output3_file .write ('\n ' )
124
+ def __write_output (self , output1_file , feature1_op , output2_file , feature2_op , output3_file , feature3_op ):
125
+ output1_file .write ('\n ' .join (feature1_op ))
126
+ output1_file .write ('\n ' )
127
+ output2_file .write ('\n ' .join (feature2_op ))
128
+ output2_file .write ('\n ' )
129
+ output3_file .write ('\n ' .join (feature3_op ))
130
+ output3_file .write ('\n ' )
134
131
135
132
def build_features (self , input_path , feature1_op_path , feature2_op_path , feature3_op_path ):
136
133
feature1 , feature2 , feature3 = [], [], []
137
134
count = 0
138
- with open (input_path ) as file :
139
- next (file )
140
- for line in file :
141
- count += 1
142
- content = line .split (',' )
143
- vertex1 , vertex2 = content [1 ].strip (), content [2 ].strip ()
144
- # get feature 1
145
- result = self .__build_feature1 (vertex1 , vertex2 )
146
- feature1 .append (result )
147
- path_distance = self .__network .bidirectional_bfs (str (vertex1 ), str (vertex2 ))
148
- # get feature 2
149
- result = self .__build_feature_with_distance (vertex1 , vertex2 , 2 , path_distance )
150
- feature2 .append (result )
151
- # get feature 3
152
- result = self .__build_feature_with_distance (vertex1 , vertex2 , self .__maxDistance , path_distance )
153
- feature3 .append (result )
154
- # write on batch of 20,000
155
- if count % 20000 == 0 :
156
- print "writing.. Progress: " , count , ' trasactions'
157
- self .__write_output (feature1_op_path , feature1 , feature2_op_path , feature2 , feature3_op_path , feature3 )
158
- feature1 , feature2 , feature3 = [], [], []
159
-
160
- # write final remaining features to output
161
- self .__write_output (feature1_op_path , feature1 , feature2_op_path , feature2 , feature3_op_path , feature3 )
135
+ with open (feature1_op_path , 'w' ) as output1_file :
136
+ with open (feature2_op_path , 'w' ) as output2_file :
137
+ with open (feature3_op_path , 'w' ) as output3_file :
138
+ with open (input_path ) as file :
139
+ next (file )
140
+ for line in file :
141
+ content = line .split (',' )
142
+ try :
143
+ vertex1 , vertex2 = content [1 ].strip (), content [2 ].strip ()
144
+ except Exception :
145
+ print 'ignoring data as invalid' , content
146
+ else :
147
+ count += 1
148
+ # get feature 1
149
+ result = self .__build_feature1 (vertex1 , vertex2 )
150
+ feature1 .append (result )
151
+ path_distance = self .__network .bidirectional_bfs (str (vertex1 ), str (vertex2 ))
152
+ # get feature 2
153
+ result = self .__build_feature_with_distance (vertex1 , vertex2 , 2 , path_distance )
154
+ feature2 .append (result )
155
+ # get feature 3
156
+ result = self .__build_feature_with_distance (vertex1 , vertex2 , self .__maxDistance , path_distance )
157
+ feature3 .append (result )
158
+ # write on batch of 20,000
159
+ if count % 20000 == 0 :
160
+ print "writing.. Progress: " , count , ' trasactions'
161
+ self .__write_output (output1_file , feature1 , output2_file , feature2 , output3_file , feature3 )
162
+ feature1 , feature2 , feature3 = [], [], []
163
+
164
+ # write final remaining features to output
165
+ self .__write_output (output1_file , feature1 , output2_file , feature2 , output3_file , feature3 )
166
+ print "writing.. Progress: " , count , ' trasactions'
162
167
163
168
def __build_feature_with_distance (self , vertex1 , vertex2 , distance , path_distance ):
164
169
if path_distance <= distance and path_distance >= 0 :
@@ -194,8 +199,11 @@ def __feature1(self, vertex1, vertex2):
194
199
try :
195
200
# max distance allowed for feature 3 to be trusted as argument
196
201
fraud = AntiFraud (4 )
202
+ print 'Building the network...'
197
203
fraud .build_graph_from_txt (batch_input )
204
+ print 'Done. Started predicting fraud transactions...'
198
205
fraud .build_features (stream_input , feature1_op , feature2_op , feature3_op )
206
+ print 'done..'
199
207
except Exception as e :
200
208
print 'Program error, contact developer'
201
- raise 'program error'
209
+ raise e
0 commit comments