001 package org.maltparser.core.syntaxgraph.feature;
002
003 import java.util.LinkedHashMap;
004 import java.util.Map;
005 import org.maltparser.core.exception.MaltChainedException;
006 import org.maltparser.core.feature.function.AddressFunction;
007 import org.maltparser.core.feature.function.FeatureFunction;
008 import org.maltparser.core.feature.value.AddressValue;
009 import org.maltparser.core.feature.value.FeatureValue;
010 import org.maltparser.core.feature.value.SingleFeatureValue;
011 import org.maltparser.core.io.dataformat.ColumnDescription;
012 import org.maltparser.core.symbol.SymbolTable;
013 import org.maltparser.core.symbol.SymbolTableHandler;
014 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
015 import org.maltparser.core.syntaxgraph.SyntaxGraphException;
016 import org.maltparser.core.syntaxgraph.node.DependencyNode;
017
018 public class DistanceFeature implements FeatureFunction {
019 protected AddressFunction addressFunction1;
020 protected AddressFunction addressFunction2;
021 protected SymbolTableHandler tableHandler;
022 protected SymbolTable table;
023 protected SingleFeatureValue featureValue;
024 protected String normalizationString;
025 protected Map<Integer,String> normalization;
026
027
028 public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException {
029 super();
030 featureValue = new SingleFeatureValue(this);
031 setTableHandler(tableHandler);
032 normalization = new LinkedHashMap<Integer,String>();
033 }
034
035 /**
036 * Initialize the distance feature function
037 *
038 * @param arguments an array of arguments with the type returned by getParameterTypes()
039 * @throws MaltChainedException
040 */
041 public void initialize(Object[] arguments) throws MaltChainedException {
042 if (arguments.length != 3) {
043 throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. ");
044 }
045 // Checks that the two arguments are address functions
046 if (!(arguments[0] instanceof AddressFunction)) {
047 throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. ");
048 }
049 if (!(arguments[1] instanceof AddressFunction)) {
050 throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. ");
051 }
052 if (!(arguments[2] instanceof java.lang.String)) {
053 throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. ");
054 }
055 setAddressFunction1((AddressFunction)arguments[0]);
056 setAddressFunction2((AddressFunction)arguments[1]);
057
058 normalizationString = (String)arguments[2];
059 // Creates a symbol table called "DISTANCE" using one null value
060 setSymbolTable(tableHandler.addSymbolTable("DISTANCE_"+normalizationString, ColumnDescription.INPUT, "one"));
061
062 String[] items = normalizationString.split("\\|");
063
064 if (items.length <= 0 || !items[0].equals("0")) {
065 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0.");
066 }
067 int tmp = -1;
068 for (int i = 0; i < items.length; i++) {
069 int v;
070 try {
071 v = Integer.parseInt(items[i]);
072 } catch (NumberFormatException e) {
073 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e);
074 }
075 normalization.put(v, ">="+v);
076 table.addSymbol(">="+v);
077 if (tmp != -1 && tmp >= v) {
078 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |");
079 }
080 tmp = v;
081 }
082 }
083
084 /**
085 * Returns an array of class types used by the feature extraction system to invoke initialize with
086 * correct arguments.
087 *
088 * @return an array of class types
089 */
090 public Class<?>[] getParameterTypes() {
091 Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class,
092 org.maltparser.core.feature.function.AddressFunction.class,
093 java.lang.String.class};
094 return paramTypes;
095 }
096
097 /**
098 * Returns the string representation of the integer <code>code</code> according to the distance feature function.
099 *
100 * @param code the integer representation of the symbol
101 * @return the string representation of the integer <code>code</code> according to the distance feature function.
102 * @throws MaltChainedException
103 */
104 public String getSymbol(int code) throws MaltChainedException {
105 return table.getSymbolCodeToString(code);
106 }
107
108 /**
109 * Returns the integer representation of the string <code>symbol</code> according to the distance feature function.
110 *
111 * @param symbol the string representation of the symbol
112 * @return the integer representation of the string <code>symbol</code> according to the distance feature function.
113 * @throws MaltChainedException
114 */
115 public int getCode(String symbol) throws MaltChainedException {
116 return table.getSymbolStringToCode(symbol);
117 }
118
119 /**
120 * Cause the distance feature function to update the cardinality of the feature value.
121 *
122 * @throws MaltChainedException
123 */
124 public void updateCardinality() {
125 // featureValue.setCardinality(table.getValueCounter());
126 }
127
128 /**
129 * Cause the feature function to update the feature value.
130 *
131 * @throws MaltChainedException
132 */
133 public void update() throws MaltChainedException {
134 // Retrieve the address value
135 final AddressValue arg1 = addressFunction1.getAddressValue();
136 final AddressValue arg2 = addressFunction2.getAddressValue();
137 // featureValue.setKnown(true);
138 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value
139 if (arg1.getAddress() == null || arg2.getAddress() == null) {
140 featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE));
141 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE));
142 featureValue.setValue(1);
143
144 featureValue.setNullValue(true);
145 } else {
146 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class);
147 // Cast the address arguments to dependency nodes
148 final DependencyNode node1 = (DependencyNode)arg1.getAddress();
149 final DependencyNode node2 = (DependencyNode)arg2.getAddress();
150
151 if (!node1.isRoot() && !node2.isRoot()) {
152 // Calculates the distance
153 final int index1 = node1.getIndex();
154 final int index2 = node2.getIndex();
155 final int distance = Math.abs(index1-index2);
156
157
158 int lower = -1;
159 boolean f = false;
160 for (Integer upper : normalization.keySet()) {
161 if (distance >= lower && distance < upper) {
162 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower)));
163 featureValue.setSymbol(normalization.get(lower));
164 featureValue.setValue(1);
165 f = true;
166 break;
167 }
168 lower = upper;
169 }
170 if (f == false) {
171 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower)));
172 featureValue.setSymbol(normalization.get(lower));
173 featureValue.setValue(1);
174 }
175
176 // Tells the feature value that the feature is known and is not a null value
177
178 featureValue.setNullValue(false);
179
180 } else {
181 // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value
182 featureValue.setIndexCode(table.getNullValueCode(NullValueId.ROOT_NODE));
183 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE));
184 featureValue.setValue(1);
185 featureValue.setNullValue(true);
186 }
187 }
188 }
189
190 /**
191 * Returns the feature value
192 *
193 * @return the feature value
194 */
195 public FeatureValue getFeatureValue() {
196 return featureValue;
197 }
198
199 /**
200 * Returns the symbol table used by the distance feature function
201 *
202 * @return the symbol table used by the distance feature function
203 */
204 public SymbolTable getSymbolTable() {
205 return table;
206 }
207
208 /**
209 * Returns the address function 1 (argument 1)
210 *
211 * @return the address function 1 (argument 1)
212 */
213 public AddressFunction getAddressFunction1() {
214 return addressFunction1;
215 }
216
217
218 /**
219 * Sets the address function 1 (argument 1)
220 *
221 * @param addressFunction1 a address function 1 (argument 1)
222 */
223 public void setAddressFunction1(AddressFunction addressFunction1) {
224 this.addressFunction1 = addressFunction1;
225 }
226
227 /**
228 * Returns the address function 2 (argument 2)
229 *
230 * @return the address function 1 (argument 2)
231 */
232 public AddressFunction getAddressFunction2() {
233 return addressFunction2;
234 }
235
236 /**
237 * Sets the address function 2 (argument 2)
238 *
239 * @param addressFunction2 a address function 2 (argument 2)
240 */
241 public void setAddressFunction2(AddressFunction addressFunction2) {
242 this.addressFunction2 = addressFunction2;
243 }
244
245 /**
246 * Returns symbol table handler
247 *
248 * @return a symbol table handler
249 */
250 public SymbolTableHandler getTableHandler() {
251 return tableHandler;
252 }
253
254 /**
255 * Sets the symbol table handler
256 *
257 * @param tableHandler a symbol table handler
258 */
259 public void setTableHandler(SymbolTableHandler tableHandler) {
260 this.tableHandler = tableHandler;
261 }
262
263 /**
264 * Sets the symbol table used by the distance feature function
265 *
266 * @param table
267 */
268 public void setSymbolTable(SymbolTable table) {
269 this.table = table;
270 }
271
272 public int getType() {
273 return ColumnDescription.STRING;
274 }
275
276 public String getMapIdentifier() {
277 return getSymbolTable().getName();
278 }
279
280 public boolean equals(Object obj) {
281 if (this == obj)
282 return true;
283 if (obj == null)
284 return false;
285 if (getClass() != obj.getClass())
286 return false;
287 return obj.toString().equals(this.toString());
288 }
289
290 public int hashCode() {
291 return 217 + (null == toString() ? 0 : toString().hashCode());
292 }
293
294 public String toString() {
295 final StringBuilder sb = new StringBuilder();
296 sb.append("Distance(");
297 sb.append(addressFunction1.toString());
298 sb.append(", ");
299 sb.append(addressFunction2.toString());
300 sb.append(", ");
301 sb.append(normalizationString);
302 sb.append(')');
303 return sb.toString();
304 }
305 }
306