001 package org.maltparser.transform.pseudo;
002
003 import java.util.SortedSet;
004 import java.util.TreeSet;
005 import java.util.Vector;
006
007 import org.apache.log4j.Logger;
008 import org.maltparser.core.exception.MaltChainedException;
009 import org.maltparser.core.io.dataformat.ColumnDescription;
010 import org.maltparser.core.io.dataformat.DataFormatInstance;
011 import org.maltparser.core.symbol.SymbolTable;
012 import org.maltparser.core.syntaxgraph.DependencyStructure;
013 import org.maltparser.core.syntaxgraph.node.DependencyNode;
014
015 /**
016 * This class contains methods for projectivizing and deprojectivizing
017 *
018 * @author Jens Nilsson
019 */
020 public class PseudoProjectivity {
021 static int id = 0;
022
023 private enum PseudoProjectiveEncoding {
024 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
025 };
026
027 private enum CoveredRootAttachment {
028 NONE, IGNORE, LEFT, RIGHT, HEAD
029 };
030
031 private enum LiftingOrder {
032 SHORTEST, DEEPEST
033 };
034
035 private PseudoProjectiveEncoding markingStrategy;
036 private CoveredRootAttachment rootAttachment;
037 private LiftingOrder liftingOrder;
038 private Logger configLogger;
039
040 private SymbolTable deprelSymbolTable;
041 private SymbolTable pppathSymbolTable;
042 private SymbolTable ppliftedSymbolTable;
043 private SymbolTable ppcoveredRootSymbolTable;
044
045 private ColumnDescription deprelColumn;
046 private ColumnDescription pppathColumn;
047 private ColumnDescription ppliftedColumn;
048 private ColumnDescription ppcoveredRootColumn;
049
050 private Vector<Boolean> nodeLifted;
051 private Vector<Vector<DependencyNode>> nodeTrace;
052 private Vector<DependencyNode> headDeprel;
053 private Vector<Boolean> nodePath;
054 private Vector<Boolean> isCoveredRoot;
055 private Vector<Integer> nodeRelationLength;
056 private Vector<String> synacticHeadDeprel;
057
058
059 public PseudoProjectivity() { }
060
061 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
062 DataFormatInstance dataFormatInstance) throws MaltChainedException {
063 nodeLifted = new Vector<Boolean>();
064 nodeTrace = new Vector<Vector<DependencyNode>>();
065 headDeprel = new Vector<DependencyNode>();
066 nodePath = new Vector<Boolean>();
067 isCoveredRoot = new Vector<Boolean>();
068 nodeRelationLength = new Vector<Integer>();
069 synacticHeadDeprel = new Vector<String>();
070
071 this.configLogger = configLogger;
072 if (markingStrategyString.equalsIgnoreCase("none")) {
073 markingStrategy = PseudoProjectiveEncoding.NONE;
074 } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
075 markingStrategy = PseudoProjectiveEncoding.BASELINE;
076 } else if (markingStrategyString.equalsIgnoreCase("head")) {
077 markingStrategy = PseudoProjectiveEncoding.HEAD;
078 } else if (markingStrategyString.equalsIgnoreCase("path")) {
079 markingStrategy = PseudoProjectiveEncoding.PATH;
080 } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
081 markingStrategy = PseudoProjectiveEncoding.HEADPATH;
082 } else if (markingStrategyString.equalsIgnoreCase("trace")) {
083 markingStrategy = PseudoProjectiveEncoding.TRACE;
084 }
085 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
086 this.deprelSymbolTable = deprelColumn.getSymbolTable();
087 // this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL");
088 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
089 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
090 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
091 this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable();
092 // this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable);
093 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
094 ppliftedSymbolTable.addSymbol("#true#");
095 ppliftedSymbolTable.addSymbol("#false#");
096 } else {
097 ppliftedSymbolTable.addSymbol("#false#");
098 }
099 }
100
101 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
102 this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
103 this.pppathSymbolTable = pppathColumn.getSymbolTable();
104 pppathSymbolTable.addSymbol("#true#");
105 pppathSymbolTable.addSymbol("#false#");
106 }
107
108 if (coveredRoot.equalsIgnoreCase("none")) {
109 this.rootAttachment = CoveredRootAttachment.NONE;
110 } else if (coveredRoot.equalsIgnoreCase("ignore")) {
111 this.rootAttachment = CoveredRootAttachment.IGNORE;
112 } else if (coveredRoot.equalsIgnoreCase("left")) {
113 this.rootAttachment = CoveredRootAttachment.LEFT;
114 } else if (coveredRoot.equalsIgnoreCase("right")) {
115 this.rootAttachment = CoveredRootAttachment.RIGHT;
116 } else if (coveredRoot.equalsIgnoreCase("head")) {
117 this.rootAttachment = CoveredRootAttachment.HEAD;
118 }
119
120 if (this.rootAttachment != CoveredRootAttachment.NONE) {
121 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
122 this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable();
123 ppcoveredRootSymbolTable.addSymbol("#true#");
124 ppcoveredRootSymbolTable.addSymbol("#false#");
125 }
126 if (liftingOrder.equalsIgnoreCase("shortest")) {
127 this.liftingOrder = LiftingOrder.SHORTEST;
128 } else if (liftingOrder.equalsIgnoreCase("deepest")) {
129 this.liftingOrder = LiftingOrder.DEEPEST;
130 }
131 }
132
133 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
134 nodeLifted.clear();
135 nodeTrace.clear();
136 headDeprel.clear();
137 nodePath.clear();
138 isCoveredRoot.clear();
139 nodeRelationLength.clear();
140
141 for (int index : pdg.getDependencyIndices()) {
142 nodeLifted.add(false);
143 nodeTrace.add(new Vector<DependencyNode>());
144 headDeprel.add(null);
145 nodePath.add(false);
146 isCoveredRoot.add(false);
147 if (ppliftedSymbolTable != null && index != 0) {
148 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
149 }
150 if (pppathSymbolTable != null && index != 0) {
151 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
152 }
153 if (ppcoveredRootSymbolTable != null && index != 0) {
154 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
155 }
156 }
157 computeRelationLength(pdg);
158 }
159
160 public void projectivize(DependencyStructure pdg) throws MaltChainedException {
161 id++;
162 if (!pdg.isTree()) {
163 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
164 return;
165 }
166 DependencyNode deepestNonProjectiveNode;
167 initProjectivization(pdg);
168 if (rootAttachment == CoveredRootAttachment.IGNORE) {
169 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
170 while (!pdg.isProjective()) {
171 if (liftingOrder == LiftingOrder.DEEPEST) {
172 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
173 } else {
174 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
175 }
176 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
177 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
178 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
179 setPath(deepestNonProjectiveNode.getHead());
180 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
181 }
182 }
183 deattachCoveredRootsForProjectivization(pdg);
184 }
185 } else {
186 if (rootAttachment != CoveredRootAttachment.NONE) {
187 for (int index : pdg.getTokenIndices()) {
188 attachCoveredRoots(pdg, pdg.getTokenNode(index));
189 }
190 }
191 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
192 while (!pdg.isProjective()) {
193 if (liftingOrder == LiftingOrder.DEEPEST) {
194 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
195 } else {
196 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
197 }
198 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
199 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
200 setPath(deepestNonProjectiveNode.getHead());
201 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
202 }
203 }
204 }
205 // collectTraceStatistics(pdg);
206 assignPseudoProjectiveDeprels(pdg);
207 }
208
209 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
210 assignPseudoProjectiveDeprelsForMerge(pdg);
211 }
212
213 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
214 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
215 String label;
216 initDeprojeciviztion(pdg);
217 for (int index : pdg.getTokenIndices()) {
218 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
219 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
220 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
221 label = label.substring(0, pathLabelIndex);
222 setLabel(pdg.getTokenNode(index), label);
223 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
224 }
225 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
226 if (movedLabelIndex + 1 < label.length()) {
227 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
228 } else {
229 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
230 }
231 label = label.substring(0, movedLabelIndex);
232 setLabel(pdg.getTokenNode(index), label);
233 }
234 }
235 }
236 for (int index : pdg.getTokenIndices()) {
237 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
238 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
239 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
240 label = label.substring(0, coveredArcLabelIndex);
241 setLabel(pdg.getTokenNode(index), label);
242 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
243 }
244 }
245 }
246 }
247
248 private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
249 if (headDeprel.get(node.getIndex()) == null) {
250 headDeprel.set(node.getIndex(), parent);
251 }
252 nodeTrace.set(node.getIndex(), headDeprel);
253 }
254
255 private void setPath(DependencyNode node) {
256 nodePath.set(node.getIndex(), true);
257 }
258
259 private boolean isCoveredRoot(DependencyNode node) {
260 return isCoveredRoot.get(node.getIndex());
261 }
262
263 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
264 for (int index : pdg.getTokenIndices()) {
265 if (isCoveredRoot(pdg.getTokenNode(index))) {
266 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
267 }
268 }
269 }
270
271 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
272 int i;
273 boolean foundCoveredRoot = false;
274 DependencyNode coveredRootHead;
275 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
276 .getIndex()); i++) {
277 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
278 if (leftMostIndex == -1) {
279 leftMostIndex = i;
280 }
281 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
282 if (rightMostIndex == -1) {
283 rightMostIndex = i;
284 }
285 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
286 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
287 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
288 if (rootAttachment == CoveredRootAttachment.LEFT) {
289 if (deepest.getHead().getIndex() < deepest.getIndex()) {
290 coveredRootHead = deepest.getHead();
291 } else {
292 coveredRootHead = deepest;
293 }
294 } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
295 if (deepest.getIndex() < deepest.getHead().getIndex()) {
296 coveredRootHead = deepest.getHead();
297 } else {
298 coveredRootHead = deepest;
299 }
300 } else {
301 coveredRootHead = deepest.getHead();
302 }
303 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
304 setCoveredRoot(pdg.getDependencyNode(i));
305 foundCoveredRoot = true;
306 }
307 }
308 return foundCoveredRoot;
309 }
310
311 private void setCoveredRoot(DependencyNode node) {
312 isCoveredRoot.set(node.getIndex(), true);
313 }
314
315 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
316 DependencyNode deepestNonProjectiveNode = null;
317 for (int index : pdg.getDependencyIndices()) {
318 if (!pdg.getDependencyNode(index).isProjective()
319 && (deepestNonProjectiveNode == null
320 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
321 deepestNonProjectiveNode = pdg.getDependencyNode(index);
322 }
323 }
324
325 return deepestNonProjectiveNode;
326 }
327
328 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
329 DependencyNode shortestNonProjectiveNode = null;
330 for (int index : pdg.getDependencyIndices()) {
331 if (!pdg.getDependencyNode(index).isProjective()
332 && (shortestNonProjectiveNode == null
333 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex())
334 )) {
335 // || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
336 shortestNonProjectiveNode = pdg.getDependencyNode(index);
337 }
338 }
339 return shortestNonProjectiveNode;
340 }
341
342
343 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
344 nodeRelationLength.add(0);
345 for (int index : pdg.getTokenIndices()) {
346 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
347 }
348 }
349
350 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
351 int newLabelCode;
352 for (int index : pdg.getTokenIndices()) {
353 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
354 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
355 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
356 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
357 if (nodeLifted.get(index)) {
358 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
359 } else {
360 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
361 }
362 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
363 } else {
364 if (nodeLifted.get(index)) {
365 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
366 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
367 } else {
368 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
369 }
370 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
371 }
372 }
373
374 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
375 if (nodePath.get(index)) {
376 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
377 } else {
378 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
379 }
380 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
381 }
382
383 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
384 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
385 }
386 }
387 }
388
389 private void setLabel(DependencyNode node, String label) throws MaltChainedException {
390 // node.getLabelCode().clear();
391 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
392 }
393
394 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
395 Vector<String> originalDeprel = new Vector<String>();
396 String newLabel;
397 originalDeprel.add(null);
398 for (int index : pdg.getTokenIndices()) {
399 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
400 }
401 for (int index : pdg.getTokenIndices()) {
402 newLabel = null;
403 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
404 if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
405 if (nodeLifted.get(index)) {
406 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
407 + originalDeprel.get(headDeprel.get(index).getIndex());
408 // } else {
409 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
410 }
411 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
412 if (nodeLifted.get(index) && nodePath.get(index)) {
413 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
414 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
416 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
417 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
418 }
419 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
420 if (nodeLifted.get(index) && nodePath.get(index)) {
421 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
422 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
423 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
424 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
425 + originalDeprel.get(headDeprel.get(index).getIndex());
426 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
427 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
428 }
429 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
430 if (nodeLifted.get(index)) {
431 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
432 }
433 }
434 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
435 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
436 }
437 if (newLabel != null) {
438 setLabel(pdg.getDependencyNode(index), newLabel);
439 }
440 }
441 }
442
443 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
444 initDeprojeciviztion(pdg);
445
446 for (int index : pdg.getTokenIndices()) {
447 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
448 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
449 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
450 setPath(pdg.getDependencyNode(index));
451 }
452 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
453 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
454 nodeLifted.set(index, true);
455 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
456 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
457 .getLabelCode(ppliftedSymbolTable)));
458 }
459 }
460 }
461 }
462 deattachCoveredRootsForDeprojectivization(pdg);
463 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
464 deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
465 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
466 deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
467 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
468 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
469 }
470 }
471
472 private void initDeprojeciviztion(DependencyStructure pdg) {
473 nodeLifted.clear();
474 nodePath.clear();
475 synacticHeadDeprel.clear();
476 for (int index : pdg.getDependencyIndices()) {
477 nodeLifted.add(false);
478 nodePath.add(false);
479 synacticHeadDeprel.add(null);
480 }
481 }
482
483 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
484 for (int index : pdg.getTokenIndices()) {
485 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
486 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
487 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
488 "#true#")) {
489 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
490 }
491 }
492 }
493 }
494
495 // Check whether there is at least one node in the specified dependency structure that can be lifted.
496 // If this is not the case, there is no need to call deprojectivizeWithHead.
497
498 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
499 for (int index : pdg.getDependencyIndices()) {
500 if (nodeLifted.get(index)) {
501 DependencyNode node = pdg.getDependencyNode(index);
502 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
503 return true;
504 }
505 }
506 }
507 return false;
508 }
509
510 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
511 boolean success = true, childSuccess = false;
512 int i, childAttempts = 2;
513 DependencyNode child, possibleSyntacticHead;
514 String syntacticHeadDeprel;
515 if (nodeLifted.get(node.getIndex())) {
516 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
517 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
518 if (possibleSyntacticHead != null) {
519 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
520 nodeLifted.set(node.getIndex(), false);
521 } else {
522 success = false;
523 }
524 }
525 while (!childSuccess && childAttempts > 0) {
526 childSuccess = true;
527 Vector<DependencyNode> children = new Vector<DependencyNode>();
528 i = 0;
529 while ((child = node.getLeftDependent(i)) != null) {
530 children.add(child);
531 i++;
532 }
533 i = 0;
534 while ((child = node.getRightDependent(i)) != null) {
535 children.add(child);
536 i++;
537 }
538 for (i = 0; i < children.size(); i++) {
539 child = children.get(i);
540 if (!deprojectivizeWithHead(pdg, child)) {
541 childSuccess = false;
542 }
543 }
544 childAttempts--;
545 }
546 return childSuccess && success;
547 }
548
549 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
550 throws MaltChainedException {
551 DependencyNode dependent;
552 String dependentDeprel;
553 Vector<DependencyNode> nodes = new Vector<DependencyNode>();
554 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
555 while (nodes.size() > 0) {
556 dependent = nodes.remove(0);
557 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
558 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
559 if (dependentDeprel.equals(syntacticHeadDeprel)) {
560 return dependent;
561 }
562 }
563 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
564 }
565 return null;
566 }
567
568
569 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
570 boolean percentOnly) {
571 Vector<DependencyNode> output = new Vector<DependencyNode>();
572 SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>();
573 dependents.addAll(governor.getLeftDependents());
574 dependents.addAll(governor.getRightDependents());
575
576
577 DependencyNode[] deps = new DependencyNode[dependents.size()];
578 int[] distances = new int[dependents.size()];
579 int i = 0;
580 for (DependencyNode dep : dependents) {
581 distances[i] = Math.abs(dep.getIndex() - avoid.getIndex());
582 deps[i] = dep;
583 i++;
584 }
585 if (distances.length > 1) {
586 int smallest;
587 int n = distances.length;
588 int tmpDist;
589 DependencyNode tmpDep;
590 for (i=0; i < n; i++) {
591 smallest = i;
592 for (int j=i; j < n; j++) {
593 if (distances[j] < distances[smallest]) {
594 smallest = j;
595 }
596 }
597 if (smallest != i) {
598 tmpDist = distances[smallest];
599 distances[smallest] = distances[i];
600 distances[i] = tmpDist;
601 tmpDep = deps[smallest];
602 deps[smallest] = deps[i];
603 deps[i] = tmpDep;
604 }
605 }
606 }
607 for (i=0; i<distances.length;i++) {
608 if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) {
609 output.add(deps[i]);
610 }
611 }
612 return output;
613 }
614
615 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
616 boolean percentOnly) {
617 int i, j;
618 Vector<DependencyNode> dependents = new Vector<DependencyNode>();
619 DependencyNode leftChild, rightChild;
620
621 i = governor.getLeftDependentCount() - 1;
622 j = 0;
623 leftChild = governor.getLeftDependent(i--);
624 rightChild = governor.getRightDependent(j++);
625
626 while (leftChild != null && rightChild != null) {
627 if (leftChild == avoid) {
628 leftChild = governor.getLeftDependent(i--);
629 } else if (rightChild == avoid) {
630 rightChild = governor.getRightDependent(j++);
631 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
632 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
633 dependents.add(leftChild);
634 }
635 leftChild = governor.getLeftDependent(i--);
636 } else {
637 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
638 dependents.add(rightChild);
639 }
640 rightChild = governor.getRightDependent(j++);
641 }
642 }
643 while (leftChild != null) {
644 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
645 dependents.add(leftChild);
646 }
647 leftChild = governor.getLeftDependent(i--);
648 }
649 while (rightChild != null) {
650 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
651 dependents.add(rightChild);
652 }
653 rightChild = governor.getRightDependent(j++);
654 }
655 return dependents;
656 }
657
658 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
659 boolean success = true, childSuccess = false;
660 int i, childAttempts = 2;
661 DependencyNode child, possibleSyntacticHead;
662 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
663 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
664 if (possibleSyntacticHead != null) {
665 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
666 nodeLifted.set(node.getIndex(), false);
667 } else {
668 success = false;
669 }
670 }
671 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
672 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
673 if (possibleSyntacticHead != null) {
674 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
675 nodeLifted.set(node.getIndex(), false);
676 } else {
677 success = false;
678 }
679 }
680 while (!childSuccess && childAttempts > 0) {
681 childSuccess = true;
682 Vector<DependencyNode> children = new Vector<DependencyNode>();
683 i = 0;
684 while ((child = node.getLeftDependent(i)) != null) {
685 children.add(child);
686 i++;
687 }
688 i = 0;
689 while ((child = node.getRightDependent(i)) != null) {
690 children.add(child);
691 i++;
692 }
693 for (i = 0; i < children.size(); i++) {
694 child = children.get(i);
695 if (!deprojectivizeWithPath(pdg, child)) {
696 childSuccess = false;
697 }
698 }
699 childAttempts--;
700 }
701 return childSuccess && success;
702 }
703
704 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
705 DependencyNode dependent;
706 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
707 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
708 while (nodes.size() > 0) {
709 dependent = nodes.remove(0);
710 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
711 return dependent;
712 }
713 nodes.addAll(newNodes);
714 }
715 return null;
716 }
717
718 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
719 boolean success = true, childSuccess = false;
720 int i, childAttempts = 2;
721 DependencyNode child, possibleSyntacticHead;
722 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
723 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
724 .getIndex()));
725 if (possibleSyntacticHead != null) {
726 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
727 nodeLifted.set(node.getIndex(), false);
728 } else {
729 success = false;
730 }
731 }
732 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
733 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
734 .getIndex()));
735 if (possibleSyntacticHead != null) {
736 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
737 nodeLifted.set(node.getIndex(), false);
738 } else {
739 success = false;
740 }
741 }
742 while (!childSuccess && childAttempts > 0) {
743 childSuccess = true;
744 Vector<DependencyNode> children = new Vector<DependencyNode>();
745 i = 0;
746 while ((child = node.getLeftDependent(i)) != null) {
747 children.add(child);
748 i++;
749 }
750 i = 0;
751 while ((child = node.getRightDependent(i)) != null) {
752 children.add(child);
753 i++;
754 }
755 for (i = 0; i < children.size(); i++) {
756 child = children.get(i);
757 if (!deprojectivizeWithHeadAndPath(pdg, child)) {
758 childSuccess = false;
759 }
760 }
761 childAttempts--;
762 }
763 return childSuccess && success;
764 }
765
766 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
767 throws MaltChainedException {
768 DependencyNode dependent;
769 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
770 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
771 while (nodes.size() > 0) {
772 dependent = nodes.remove(0);
773 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
774 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
775 return dependent;
776 }
777 nodes.addAll(newNodes);
778 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
779 && newNodes.size() != 0) {
780 secondChance.add(dependent);
781 }
782 }
783 if (secondChance.size() > 0) {
784 return secondChance.firstElement();
785 }
786 return null;
787 }
788 }