-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathABSA_Dependency_Tree.py
122 lines (107 loc) · 4.1 KB
/
ABSA_Dependency_Tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import textblob
from textblob import TextBlob
import numpy as np
from gensim.models.fasttext import FastText
import nltk
from nltk.stem import WordNetLemmatizer
import stanza
from article_preprocessing import *
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()
import stanza
from sentiment_config import *
from model import get_word_polarity, get_sentiment, get_senti_coeff_indic, sentiment_coeff
stanza.download('en')
stanza.download('hi')
stanza.download('ta')
stanza.download('te')
stanza.download('mr')
# function to find polar words in a sentence using dependency tree
# take the aspect term, a sentence from the article and language_code
def polar_dependency_tree(aspect_term, article_line, lang_code):
# handling the different languages for which the module can work
if lang_code == 'en':
nlp = stanza.Pipeline('en')
if lang_code == 'hi':
nlp = stanza.Pipeline('hi')
if lang_code == 'mr':
nlp = stanza.Pipeline('mr')
if lang_code == 'te':
nlp = stanza.Pipeline('te')
if lang_code == 'ta':
nlp = stanza.Pipeline('ta')
# using stanza pipeline to perform dependency parsing
article_line = nlp(article_line)
sentence = article_line.sentences[0]
# list of dependency relations which can have polar words
relations = ['acl', 'advcl', 'advmod', 'amod', 'xcomp', 'neg', 'parataxis', 'ccomp']
# parts of speech tags that can have polar words
pos_tags = ['VERB', 'ADJ']
entity_present = False
# checking if the entity is present or not.
for word in sentence.words:
if word.text == aspect_term:
entity = word
entity_present = True
if entity_present == False:
return []
# list of polar words with respect to the entity
polar_words = []
# Case 1: our entity is the root term, so checkout all the child nodes
if entity.deprel == 'root':
for word in sentence.words:
if (word.head == int(entity.id) and (word.deprel in relations or word.upos in pos_tags)):
polar_words.append(word.text)
# Case 2: our entity is not the root term, so traceback the path of the tree from entity to root.
# also check all the other child nodes after finding the root
else:
for word in sentence.words:
if word.deprel == 'root':
root_node = word
# checking the child nodes of the root
for word in sentence.words:
if (word.head == int(root_node.id) and (word.deprel in relations or word.upos in pos_tags)):
polar_words.append(word.text)
# tracing the path from entity to the root node
current_word = entity
while(current_word.deprel != 'root'):
current_word = sentence.words[current_word.head - 1]
if (current_word.deprel in relations or current_word.upos in pos_tags):
polar_words.append(current_word.text)
return list(set(polar_words))
def get_polarity_dep_tree(aspect_term, sentences, lang_code,model=None ):
polarity = 0
if lang_code == 'en':
aspect_term = spacy_tokenizer(aspect_term)
if lang_code == 'hi':
aspect_term = tokenize_hin(aspect_term)
for sentence in sentences:
if lang_code in ['en', 'hi']:
polar_words_tot = []
for term in aspect_term:
polar_words = polar_dependency_tree(term, sentence, lang_code)
polar_words_tot.extend(polar_words)
polar_words_tot = list(set(polar_words_tot))
if len(polar_words_tot) > 0:
if lang_code == 'en':
senti_vector = sentiment_coeff(polar_words_tot)
if lang_code == 'hi':
senti_vector = get_senti_coeff_indic(polar_words_tot, lang_code, model)
sentiment = sum(senti_vector)
polarity = polarity + sentiment
else:
polarity = polarity + 0
else:
polar_words = polar_dependency_tree(aspect_term, sentence, lang_code)
if len(polar_words) > 0:
senti_vector = get_senti_coeff_indic(polar_words,lang_code, model)
sentiment = sum(senti_vector)
polarity = polarity + sentiment
else:
polarity = polarity + 0
if polarity > 0:
return 1
elif polarity == 0:
return 0
else:
return -1