Skip to content

Commit b3dc367

Browse files
authored
Add files via upload
1 parent 4fc08cb commit b3dc367

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

WordNetDotNet.py

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import pandas as pd
2+
import numpy as np
3+
import tensorflow as tf
4+
import nltk
5+
from nltk.corpus import stopwords
6+
from nltk.corpus import wordnet
7+
from nltk.tokenize import word_tokenize
8+
from nltk.stem import PorterStemmer, WordNetLemmatizer
9+
from nltk.wsd import lesk
10+
import numpy as np
11+
from scipy.optimize import linear_sum_assignment
12+
from nltk import pos_tag, ne_chunk
13+
import nltk.tag.stanford as st
14+
classifier='/home/gautam/Desktop/Courses/MTL785/project/stanford-ner-2017-06-09/classifiers/english.all.3class.distsim.crf.ser.gz'
15+
jar='/home/gautam/Desktop/Courses/MTL785/project/stanford-ner-2017-06-09/stanford-ner.jar'
16+
s=st.StanfordNERTagger(classifier,jar)
17+
# nltk.download('wordnet')
18+
stemmer = PorterStemmer()
19+
lemmatiser = WordNetLemmatizer()
20+
stop_word = set(stopwords.words('english'))
21+
22+
df = pd.read_csv('data/train.csv')
23+
# print(df.columns.values)
24+
question2_total = df.iloc[:,4].values
25+
question1_total = df.iloc[:,3].values
26+
# question1_total = ['what is your name']
27+
# question2_total = ['what should I call you']
28+
# print(question1_total)
29+
question1 = word_tokenize(question1_total[0])
30+
question2 = word_tokenize(question2_total[0])
31+
print(question1)
32+
print(question2)
33+
34+
35+
nerq1=s.tag(question1)
36+
37+
nerq2=s.tag(question2)
38+
print("##########")
39+
for i in nerq1:
40+
# print(i[1])
41+
if(i[1]=="LOCATION"):
42+
loc1=i[0]
43+
if(i[1]=="NAME"):
44+
name1=i[0]
45+
print(loc1)
46+
47+
48+
for i in nerq2:
49+
# print(i[1])
50+
if(i[1]=="LOCATION"):
51+
loc2=i[0]
52+
if(i[1]=="NAME"):
53+
name2=i[0]
54+
# print(nerq2)
55+
print("##########")
56+
57+
tagged1 = nltk.pos_tag(question1)
58+
tagged2 = nltk.pos_tag(question2)
59+
# filtered_q1 = [w for w in tagged1 if not w[0] in stop_word]
60+
# filtered_q2 = [w for w in tagged2 if not w[0] in stop_word]
61+
# q1 = [(stemmer.stem(w[0]),w[1]) for w in tagged1]
62+
# q2 = [(stemmer.stem(w[0]),w[1]) for w in tagged2]
63+
64+
# print(tagged1)
65+
# print(tagged2)
66+
common_words = [word for word in question1 if word in question2]
67+
# print(common_words)
68+
69+
# list = []
70+
list1 = []
71+
list2 = []
72+
for item in tagged1:
73+
list1.append(lesk(tagged1,item[0]))
74+
# print(item[0])
75+
# print(item[1])
76+
print(list1)
77+
for item in tagged2:
78+
list2.append(lesk(tagged2,item[0]))
79+
# print(item[0])
80+
# print(item[1])
81+
print(list2)
82+
83+
R = np.zeros((len(list1),len(list2)))
84+
for i in range(len(list1)):
85+
for j in range(len(list2)):
86+
if list1[i]and list2[j]:
87+
R[i][j] = list1[i].wup_similarity(list2[j])
88+
print(R)
89+
# row_ind, col_ind = linear_sum_assignment(R)
90+
# print(row_ind)
91+
# print(col_ind)
92+
# for word1 in question1:
93+
# for word2 in question2:
94+
# wordFromList1 = wordnet.synsets(word1)
95+
# wordFromList2 = wordnet.synsets(word2)
96+
# # print(wordFromList1)
97+
# # print(wordFromList2)
98+
# if wordFromList1 and wordFromList2: #Thanks to @alexis' note
99+
# s = wordFromList1[0].wup_similarity(wordFromList2[0])
100+
# # print()
101+
# if (s>0.8)
102+
# print(s)
103+
# print(word1)
104+
# print(word2)
105+
# list.append(s)
106+
# break
107+
# break
108+
# print(list)
109+
# print(max(list))
110+
# print(lesk())

0 commit comments

Comments
 (0)