-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHmmModelBuilder.java
121 lines (109 loc) · 3.93 KB
/
HmmModelBuilder.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class HmmModelBuilder {
public static void main(String[] args) {
// We need to read the following files:
// The combined file from SenseRelate results and N-grams
// The file containing the valence dictionary
// The file containing the Gold Standard
// We also need to write to two files:
// One which will contain the positive sentences
// One which will contain the negative sentences
// The output files will need to be in the following form:
// a; b; c; d
// Each letter denotes the combined POS/polarity of each word
// in each sentence. The words are ordered according to their position in the respective sentence.
NgramSenseRelateReader nsrr = new NgramSenseRelateReader(args[0]);
GoldStandardReader gsr = new GoldStandardReader(args[1]);
ValenceReader vr = new ValenceReader(args[2]);
PrintWriter positive = null;
PrintWriter negative = null;
try {
positive = new PrintWriter(args[3]);
negative = new PrintWriter(args[4]);
HashMap<String,ArrayList<String>> sentences = nsrr.getSentences();
Set<String> ids = sentences.keySet();
for (String id : ids) {
ArrayList<String> triplets = sentences.get(id);
String mappedSentence = "";
for (String triplet : triplets) {
Scanner sc = new Scanner(triplet);
String word = sc.findInLine(Pattern.compile(":[\\s]*[a-z]+[\\-a-z]*[_a-z]*#"));
sc.close();
System.out.println("ID is: " + id);
System.out.println("Word is: " + word);
if (word.contains(":") || word.contains("#")) {
if (word.contains(":") && word.contains("#")) {
// Start from the second character and end at the one before last thus excluding : and #.
word = word.substring(1,word.length() - 1).trim();
} else if (word.contains(":")) {
word = word.substring(1,word.length()).trim();
} else {
word = word.substring(0,word.length() - 1).trim();
}
} else {
word = word.trim();
}
System.out.print(id + ":" + word);
if (nsrr.isValence(id,word)) {
String mapping = vr.getHmmValueFor(word);
System.out.println(":" + mapping);
mappedSentence = mappedSentence.concat(mapping + "; ");
} else {
String pos = nsrr.getPosFor(id,word);
String polarity = nsrr.getPolarityFor(id,word);
String mapping = "";
if (pos.equals("n")) {
if (polarity.equals("pos")) {
mapping = "0";
mappedSentence = mappedSentence.concat(mapping + "; ");
} else {
mapping = "1";
mappedSentence = mappedSentence.concat(mapping + "; ");
}
} else if (pos.equals("v")) {
if (polarity.equals("pos")) {
mapping = "2";
mappedSentence = mappedSentence.concat(mapping + "; ");
} else {
mapping = "3";
mappedSentence = mappedSentence.concat(mapping + "; ");
}
} else if (pos.equals("a")) {
if (polarity.equals("neg")) {
mapping = "7";
mappedSentence = mappedSentence.concat(mapping + "; ");
} else {
mapping = "8";
mappedSentence = mappedSentence.concat(mapping + "; ");
}
} else { // adverb ("r")
if (polarity.equals("pos")) {
mapping = "9";
mappedSentence = mappedSentence.concat(mapping + "; ");
} else {
mapping = "10";
mappedSentence = mappedSentence.concat(mapping + "; ");
}
}
System.out.println(":" + mapping);
}
}
String sentencePolarity = gsr.returnValueFor(id);
System.out.println(sentencePolarity);
if (sentencePolarity.equals("pos")) {
positive.println(mappedSentence.trim());
} else {
negative.println(mappedSentence.trim());
}
//System.out.println(id + " : " + mappedSentence.trim() + " : " + sentencePolarity);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
positive.close();
negative.close();
}
}
}