1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Sep 18 17:56:22 2018
4
+
5
+ @author: Mohammad Doosti Lakhani
6
+ """
7
+
8
+ # -*- coding: utf-8 -*-
9
+ """
10
+ Created on Mon Sep 17 21:44:06 2018
11
+
12
+ @author: Mohammad Doosti Lakhani
13
+ """
14
+
15
+ # Importing the libraries
16
+ import pandas as pd
17
+
18
+ # Importing the dataset
19
+ dataset = pd .read_csv ('Restaurant_Reviews.tsv' , delimiter = '\t ' , quoting = 3 ) # remove quoting marks
20
+ x = dataset .iloc [:,0 ].values
21
+ y = dataset .iloc [:,1 ].values
22
+
23
+ # Cleaning the Text
24
+ import re
25
+ from nltk .corpus import stopwords # removing useless words
26
+ from nltk .stem .porter import PorterStemmer # getting root of words
27
+ stopwords_list = stopwords .words ('english' )
28
+ ps = PorterStemmer ()
29
+
30
+ corpus = []
31
+ for i in range (0 ,len (x )):
32
+ corp = re .sub (pattern = '[^a-zA-Z]' , repl = ' ' , string = x [i ])
33
+ corp = corp .split ()
34
+ corp = [ps .stem (word ) for word in corp ]
35
+ corp = ' ' .join (corp )
36
+ corpus .append (corp )
37
+
38
+ # Creating the Bag of Words model
39
+ from sklearn .feature_extraction .text import CountVectorizer # text to column vector os sparse matrix
40
+ cv = CountVectorizer (lowercase = True , stop_words = stopwords_list ) # ignoring useless words and to lowercase
41
+ x_sparse = cv .fit_transform (corpus ) # bag of word sparse matrix
42
+ x = x_sparse .todense () # Convert sparse matrix to dense matrix
43
+
44
+ # Splitting dataset into Train set and Test set
45
+ from sklearn .model_selection import train_test_split
46
+ x_train ,x_test ,y_train ,y_test = train_test_split (x ,y , train_size = 0.85 , random_state = 8 )
47
+
48
+ # Fitting the Naive Bayes (Gausiian form) model to the train set
49
+ from sklearn .ensemble import RandomForestClassifier
50
+ classifier = RandomForestClassifier (n_estimators = 12 ,criterion = 'entropy' ,random_state = 0 )
51
+ classifier = classifier .fit (x_train ,y_train )
52
+
53
+ # Make the prediction on train set
54
+ y_train_pred = classifier .predict (x_train )
55
+
56
+ # Make the prediction on train set
57
+ y_test_pred = classifier .predict (x_test )
58
+
59
+ # Acurracy on test and train set
60
+ from sklearn .metrics import confusion_matrix
61
+ cm_train = confusion_matrix (y_train ,y_train_pred )
62
+ cm_test = confusion_matrix (y_test ,y_test_pred )
63
+
64
+ import os
65
+ import sys
66
+
67
+ scriptpath = "../../Tools" # functions of acc
68
+ # Add the directory containing your module to the Python path
69
+ sys .path .append (os .path .abspath (scriptpath ))
70
+ import accuracy as ac
71
+
72
+ t_train ,f_train ,acc_train = ac .accuracy_on_cm (cm_train )
73
+ print ('Train status = #{} True, #{} False, %{} Accuracy' .format (t_train ,f_train ,acc_train * 100 ))
74
+
75
+ t_test ,f_test ,acc_test = ac .accuracy_on_cm (cm_test )
76
+ print ('Test status = #{} True, #{} False, %{} Accuracy' .format (t_test ,f_test ,acc_test * 100 ))
0 commit comments