001 package org.maltparser.parser;
002
003 import java.io.File;
004 import java.io.IOException;
005 import java.net.URL;
006 import java.util.Formatter;
007 import java.util.regex.Pattern;
008
009 import org.apache.log4j.FileAppender;
010 import org.apache.log4j.Level;
011 import org.apache.log4j.Logger;
012 import org.apache.log4j.PatternLayout;
013 import org.maltparser.core.config.ConfigurationDir;
014 import org.maltparser.core.config.ConfigurationException;
015 import org.maltparser.core.config.ConfigurationRegistry;
016 import org.maltparser.core.exception.MaltChainedException;
017 import org.maltparser.core.helper.SystemLogger;
018 import org.maltparser.core.helper.URLFinder;
019 import org.maltparser.core.io.dataformat.DataFormatInstance;
020 import org.maltparser.core.options.OptionManager;
021 import org.maltparser.core.propagation.PropagationManager;
022 import org.maltparser.core.symbol.SymbolTableHandler;
023 import org.maltparser.core.syntaxgraph.DependencyStructure;
024 import org.maltparser.parser.guide.ClassifierGuide;
025
026 /**
027 * @author Johan Hall
028 *
029 */
030 public class SingleMalt implements DependencyParserConfig {
031 public static final int LEARN = 0;
032 public static final int PARSE = 1;
033 protected ConfigurationDir configDir;
034 protected Logger configLogger;
035 protected int optionContainerIndex;
036 protected Algorithm parsingAlgorithm = null;
037 protected int mode;
038 protected ConfigurationRegistry registry;
039 protected SymbolTableHandler symbolTableHandler;
040 protected DataFormatInstance dataFormatInstance;
041 protected long startTime;
042 protected long endTime;
043 protected int nIterations = 0;
044 protected PropagationManager propagationManager;
045 private Parser parser;
046 private Trainer trainer;
047
048 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
049
050 this.optionContainerIndex = containerIndex;
051 this.mode = mode;
052 setConfigurationDir(configDir);
053 startTime = System.currentTimeMillis();
054 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
055 registry = new ConfigurationRegistry();
056 this.dataFormatInstance = dataFormatInstance;
057 symbolTableHandler = dataFormatInstance.getSymbolTables();
058
059 if (mode == SingleMalt.LEARN) {
060 checkOptionDependency();
061 }
062 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
063 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
064 // registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
065 initPropagation();
066 initParsingAlgorithm();
067 if (configLogger.isInfoEnabled()) {
068 URL inputFormatURL = configDir.getInputFormatURL();
069 URL outputFormatURL = configDir.getOutputFormatURL();
070 if (inputFormatURL != null) {
071 if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) {
072 int index = inputFormatURL.toString().indexOf('!');
073 if (index == -1) {
074 configLogger.info(" Data Format : "+inputFormatURL.toString()+"\n");
075 } else {
076 configLogger.info(" Data Format : "+inputFormatURL.toString().substring(index+1)+"\n");
077 }
078 } else {
079 int indexIn = inputFormatURL.toString().indexOf('!');
080 int indexOut = outputFormatURL.toString().indexOf('!');
081 if (indexIn == -1) {
082 configLogger.info(" Input Data Format : "+inputFormatURL.toString()+"\n");
083 } else {
084 configLogger.info(" Input Data Format : "+inputFormatURL.toString().substring(indexIn+1)+"\n");
085 }
086 if (indexOut == -1) {
087 configLogger.info(" Output Data Format : "+outputFormatURL.toString()+"\n");
088 } else {
089 configLogger.info(" Output Data Format : "+outputFormatURL.toString().substring(indexOut+1)+"\n");
090 }
091 }
092 }
093 }
094 }
095
096 private void initPropagation() throws MaltChainedException {
097 String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
098 if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
099 return;
100 }
101 propagationManager = new PropagationManager(configDir);
102 if (mode == SingleMalt.LEARN) {
103 propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
104 OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
105 }
106 getConfigLogger().info(" Propagation : " + propagationSpecFileName+"\n");
107 propagationManager.loadSpecification(propagationSpecFileName);
108 }
109
110 /**
111 * Initialize the parsing algorithm
112 *
113 * @throws MaltChainedException
114 */
115 protected void initParsingAlgorithm() throws MaltChainedException {
116 if (mode == LEARN) {
117 parsingAlgorithm = trainer = new BatchTrainer(this);
118 } else if (mode == PARSE) {
119 parsingAlgorithm = parser = new DeterministicParser(this);
120 }
121 }
122
123 public void addRegistry(Class<?> clazz, Object o) {
124 registry.put(clazz, o);
125 }
126
127 public void process(Object[] arguments) throws MaltChainedException {
128 if (mode == LEARN) {
129 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
130 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
131 }
132 DependencyStructure systemGraph = (DependencyStructure)arguments[0];
133 DependencyStructure goldGraph = (DependencyStructure)arguments[1];
134 if (systemGraph.hasTokens() && getGuide() != null) {
135 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
136 }
137 } else if (mode == PARSE) {
138 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
139 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
140 }
141 DependencyStructure processGraph = (DependencyStructure)arguments[0];
142 if (processGraph.hasTokens()) {
143 parser.parse(processGraph);
144 // ((Parser)getAlgorithm()).parse(processGraph);
145 }
146 }
147 }
148
149 public void parse(DependencyStructure graph) throws MaltChainedException {
150 if (graph.hasTokens()) {
151 // ((Parser)getAlgorithm()).parse(graph);
152 parser.parse(graph);
153 }
154 }
155
156 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
157 if (oracleGraph.hasTokens()) {
158 if (getGuide() != null) {
159 getGuide().finalizeSentence(trainer.parse(goldGraph, oracleGraph));
160 } else {
161 trainer.parse(goldGraph, oracleGraph);
162 }
163 }
164 }
165
166 public void train() throws MaltChainedException {
167 if (getGuide() == null) {
168 ((Trainer)getAlgorithm()).train();
169 }
170 }
171
172 public void terminate(Object[] arguments) throws MaltChainedException {
173 // if (getAlgorithm() instanceof Trainer) {
174 // ((Trainer)getAlgorithm()).terminate();
175 // }
176 getAlgorithm().terminate();
177 if (getGuide() != null) {
178 getGuide().terminate();
179 }
180 if (mode == LEARN) {
181 endTime = System.currentTimeMillis();
182 long elapsed = endTime - startTime;
183 if (configLogger.isInfoEnabled()) {
184 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
185 }
186 } else if (mode == PARSE) {
187 endTime = System.currentTimeMillis();
188 long elapsed = endTime - startTime;
189 if (configLogger.isInfoEnabled()) {
190 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
191 }
192 }
193 if (SystemLogger.logger() != configLogger && configLogger != null) {
194 configLogger.removeAllAppenders();
195 }
196 }
197
198 /**
199 * Initialize the configuration logger
200 *
201 * @return the configuration logger
202 * @throws MaltChainedException
203 */
204 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
205 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
206 configLogger = Logger.getLogger(logfile);
207 FileAppender fileAppender = null;
208 try {
209 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
210 } catch(IOException e) {
211 throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
212 }
213 fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
214 configLogger.addAppender(fileAppender);
215 configLogger.setLevel(Level.toLevel(level, Level.INFO));
216 } else {
217 configLogger = SystemLogger.logger();
218 }
219
220 return configLogger;
221 }
222
223 public Logger getConfigLogger() {
224 return configLogger;
225 }
226
227 public void setConfigLogger(Logger logger) {
228 configLogger = logger;
229 }
230
231 public ConfigurationDir getConfigurationDir() {
232 return configDir;
233 }
234
235 public void setConfigurationDir(ConfigurationDir configDir) {
236 this.configDir = configDir;
237 }
238
239 public int getMode() {
240 return mode;
241 }
242
243 public ConfigurationRegistry getRegistry() {
244 return registry;
245 }
246
247 public void setRegistry(ConfigurationRegistry registry) {
248 this.registry = registry;
249 }
250
251 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
252 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
253 }
254
255 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
256 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
257 }
258
259 public OptionManager getOptionManager() throws MaltChainedException {
260 return OptionManager.instance();
261 }
262 /******************************** MaltParserConfiguration specific ********************************/
263
264 /**
265 * Returns the list of symbol tables
266 *
267 * @return the list of symbol tables
268 */
269 public SymbolTableHandler getSymbolTables() {
270 return symbolTableHandler;
271 }
272
273 public PropagationManager getPropagationManager() {
274 return propagationManager;
275 }
276
277 public Algorithm getAlgorithm() {
278 return parsingAlgorithm;
279 }
280 /**
281 * Returns the guide
282 *
283 * @return the guide
284 */
285 public ClassifierGuide getGuide() {
286 return parsingAlgorithm.getGuide();
287 }
288
289 public void checkOptionDependency() throws MaltChainedException {
290 try {
291 if (configDir.getInfoFileWriter() != null) {
292 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
293 }
294
295 // Copy the feature model file into the configuration directory
296 String featureModelFileName = getOptionValue("guide", "features").toString().trim();
297 if (featureModelFileName.equals("")) {
298 // use default feature model depending on the selected parser algorithm
299 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
300 featureModelFileName = getOptionValue("guide", "features").toString().trim();
301 /* START: Temp fix during development of new liblinear and libsvm interface */
302 String learner = getOptionValueString("guide", "learner");
303 if (!learner.startsWith("lib")) {
304 learner = "lib"+learner;
305 }
306 /* END: Temp fix during development of new liblinear and libsvm interface */
307 featureModelFileName = featureModelFileName.replace("{learner}", learner);
308 final URLFinder f = new URLFinder();
309 featureModelFileName = configDir.copyToConfig(f.findURLinJars(featureModelFileName));
310 } else {
311 featureModelFileName = configDir.copyToConfig(featureModelFileName);
312 }
313 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
314 if (configDir.getInfoFileWriter() != null) {
315 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n");
316 }
317
318 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
319 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
320 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
321 if (configDir.getInfoFileWriter() != null) {
322 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n");
323 }
324 }
325 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
326 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
327 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
328 if (configDir.getInfoFileWriter() != null) {
329 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n");
330 }
331 }
332
333 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
334 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
335 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
336 StringBuilder newDecisionSettings = new StringBuilder();
337
338 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
339 decisionSettings = "T.TRANS+A.DEPREL";
340 } else {
341 decisionSettings = decisionSettings.toUpperCase();
342 }
343
344 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
345 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
346 newDecisionSettings.append("+A.PPLIFTED");
347 }
348 }
349 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
350 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
351 newDecisionSettings.append("+A.PPPATH");
352 }
353 }
354 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
355 newDecisionSettings.append("+A.PPCOVERED");
356 }
357 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
358 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
359 if (configDir.getInfoFileWriter() != null) {
360 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n");
361 }
362 }
363 if (configDir.getInfoFileWriter() != null) {
364 configDir.getInfoFileWriter().flush();
365 }
366 } catch (IOException e) {
367 throw new ConfigurationException("Could not write to the configuration information file. ", e);
368 }
369 }
370 }