@@ -23,9 +23,13 @@ public class LinguisticFeatureExtractor implements FeatureExtractor{
23
23
private FastVector attrs ;
24
24
private FastVector pos_relations ;
25
25
private static final int OFFSET = 1 ;
26
+ private static LexicalizedParser lp ;
26
27
27
28
private void setupAttributes (List <Tweet > tweets )
28
29
{
30
+ lp = new LexicalizedParser ("res/englishPCFG.ser.gz" );
31
+ lp .setOptionFlags (new String []{"-maxLength" , "80" , "-retainTmpSubcategories" });
32
+
29
33
attrs = new FastVector ();
30
34
// Determine attributes
31
35
FastVector sentvals = new FastVector ();
@@ -59,14 +63,14 @@ public Instances extractFeatures(List<Tweet> tweets) {
59
63
GrammaticalStructureFactory gsf ;
60
64
GrammaticalStructure gs ;
61
65
Collection tdl ;
62
- LexicalizedParser lp = new LexicalizedParser ("res/englishPCFG.ser.gz" );
63
- lp .setOptionFlags (new String []{"-maxLength" , "80" , "-retainTmpSubcategories" });
66
+
64
67
tlp = new PennTreebankLanguagePack ();
65
68
gsf = tlp .grammaticalStructureFactory ();
66
-
69
+
67
70
for (Tweet t : tweets )
68
71
{
69
72
Instance inst = new Instance (1.0 , new double [attrs .size ()]);
73
+ inst .setDataset (feats );
70
74
inst .setClassValue (t .sentiment );
71
75
72
76
st = new StringTokenizer (t .text );
@@ -87,7 +91,6 @@ public Instances extractFeatures(List<Tweet> tweets) {
87
91
inst .setValue (pos_relations .indexOf (x .reln ().toString ())+OFFSET , 1 );
88
92
}
89
93
90
- inst .setDataset (feats );
91
94
feats .add (inst );
92
95
}
93
96
@@ -114,8 +117,7 @@ private static FastVector get_all_pos_relations(List<Tweet> tweets)
114
117
GrammaticalStructureFactory gsf ;
115
118
GrammaticalStructure gs ;
116
119
Collection tdl ;
117
- LexicalizedParser lp = new LexicalizedParser ("res/englishPCFG.ser.gz" );
118
- lp .setOptionFlags (new String []{"-maxLength" , "80" , "-retainTmpSubcategories" });
120
+
119
121
tlp = new PennTreebankLanguagePack ();
120
122
gsf = tlp .grammaticalStructureFactory ();
121
123
int postags_count = 0 ;
0 commit comments