001 package org.maltparser.core.config;
002
003 import java.io.BufferedInputStream;
004 import java.io.BufferedOutputStream;
005 import java.io.BufferedReader;
006 import java.io.BufferedWriter;
007 import java.io.File;
008 import java.io.FileInputStream;
009 import java.io.FileNotFoundException;
010 import java.io.FileOutputStream;
011 import java.io.FileReader;
012 import java.io.FileWriter;
013 import java.io.IOException;
014 import java.io.InputStream;
015 import java.io.InputStreamReader;
016 import java.io.OutputStreamWriter;
017 import java.io.UnsupportedEncodingException;
018 import java.net.MalformedURLException;
019 import java.net.URL;
020 import java.util.Date;
021 import java.util.Enumeration;
022 import java.util.HashMap;
023 import java.util.Set;
024 import java.util.SortedSet;
025 import java.util.TreeSet;
026 import java.util.jar.JarEntry;
027 import java.util.jar.JarFile;
028 import java.util.jar.JarInputStream;
029 import java.util.jar.JarOutputStream;
030
031 import org.maltparser.core.config.version.Versioning;
032 import org.maltparser.core.exception.MaltChainedException;
033 import org.maltparser.core.helper.HashSet;
034 import org.maltparser.core.helper.SystemInfo;
035 import org.maltparser.core.helper.SystemLogger;
036 import org.maltparser.core.helper.URLFinder;
037 import org.maltparser.core.io.dataformat.DataFormatInstance;
038 import org.maltparser.core.io.dataformat.DataFormatManager;
039 import org.maltparser.core.io.dataformat.DataFormatSpecification.DataStructure;
040 import org.maltparser.core.io.dataformat.DataFormatSpecification.Dependency;
041 import org.maltparser.core.options.OptionManager;
042 import org.maltparser.core.symbol.SymbolTableHandler;
043 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler;
044
045
046 /**
047 * This class contains methods for handle the configuration directory.
048 *
049 * @author Johan Hall
050 */
051 public class ConfigurationDir {
052 protected static final int BUFFER = 4096;
053 protected File configDirectory;
054 protected String name;
055 protected String type;
056 protected File workingDirectory;
057 protected URL url = null;
058 protected int containerIndex;
059 protected BufferedWriter infoFile = null;
060 protected String createdByMaltParserVersion;
061
062 private SymbolTableHandler symbolTables;
063 private DataFormatManager dataFormatManager;
064 private HashMap<String,DataFormatInstance> dataFormatInstances;
065 private URL inputFormatURL;
066 private URL outputFormatURL;
067
068 /**
069 * Creates a configuration directory from a mco-file specified by an URL.
070 *
071 * @param url an URL to a mco-file
072 * @throws MaltChainedException
073 */
074 public ConfigurationDir(URL url) throws MaltChainedException {
075 initWorkingDirectory();
076 setUrl(url);
077 initNameNTypeFromInfoFile(url);
078 // initData();
079 }
080
081 /**
082 * Creates a new configuration directory or a configuration directory from a mco-file
083 *
084 * @param name the name of the configuration
085 * @param type the type of configuration
086 * @param containerIndex the container index
087 * @throws MaltChainedException
088 */
089 public ConfigurationDir(String name, String type, int containerIndex) throws MaltChainedException {
090 setContainerIndex(containerIndex);
091
092 initWorkingDirectory();
093 if (name != null && name.length() > 0 && type != null && type.length() > 0) {
094 setName(name);
095 setType(type);
096 } else {
097 throw new ConfigurationException("The configuration name is not specified. ");
098 }
099
100 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName()));
101
102 String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim();
103 if (mode.equals("parse")) {
104 // During parsing also search for the MaltParser configuration file in the class path
105 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
106 if (!mcoPath.exists()) {
107 String classpath = System.getProperty("java.class.path");
108 String[] items = classpath.split(System.getProperty("path.separator"));
109 boolean found = false;
110 for (String item : items) {
111 File candidateDir = new File(item);
112 if (candidateDir.exists() && candidateDir.isDirectory()) {
113 File candidateConfigFile = new File(candidateDir.getPath()+File.separator+getName()+".mco");
114 if (candidateConfigFile.exists()) {
115 initWorkingDirectory(candidateDir.getPath());
116 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName()));
117 found = true;
118 break;
119 }
120 }
121 }
122 if (found == false) {
123 throw new ConfigurationException("Couldn't find the MaltParser configuration file: " + getName()+".mco");
124 }
125 }
126 }
127 }
128
129 public void initDataFormat() throws MaltChainedException {
130 String inputFormatName = OptionManager.instance().getOptionValue(containerIndex, "input", "format").toString().trim();
131 String outputFormatName = OptionManager.instance().getOptionValue(containerIndex, "output", "format").toString().trim();
132 final URLFinder f = new URLFinder();
133
134 if (configDirectory != null && configDirectory.exists()) {
135 if (outputFormatName.length() == 0 || inputFormatName.equals(outputFormatName)) {
136 URL inputFormatURL = f.findURLinJars(inputFormatName);
137 if (inputFormatURL != null) {
138 outputFormatName = inputFormatName = this.copyToConfig(inputFormatURL);
139 } else {
140 outputFormatName = inputFormatName = this.copyToConfig(inputFormatName);
141 }
142 } else {
143 URL inputFormatURL = f.findURLinJars(inputFormatName);
144 if (inputFormatURL != null) {
145 inputFormatName = this.copyToConfig(inputFormatURL);
146 } else {
147 inputFormatName = this.copyToConfig(inputFormatName);
148 }
149 URL outputFormatURL = f.findURLinJars(outputFormatName);
150 if (inputFormatURL != null) {
151 outputFormatName = this.copyToConfig(outputFormatURL);
152 } else {
153 outputFormatName = this.copyToConfig(outputFormatName);
154 }
155 }
156 OptionManager.instance().overloadOptionValue(containerIndex, "input", "format", inputFormatName);
157 } else {
158 if (outputFormatName.length() == 0) {
159 outputFormatName = inputFormatName;
160 }
161 }
162 dataFormatInstances = new HashMap<String, DataFormatInstance>(3);
163
164 inputFormatURL = findURL(inputFormatName);
165 outputFormatURL = findURL(outputFormatName);
166 if (outputFormatURL != null) {
167 try {
168 InputStream is = outputFormatURL.openStream();
169 } catch (FileNotFoundException e) {
170 outputFormatURL = f.findURL(outputFormatName);
171 } catch (IOException e) {
172 outputFormatURL = f.findURL(outputFormatName);
173 }
174 } else {
175 outputFormatURL = f.findURL(outputFormatName);
176 }
177 dataFormatManager = new DataFormatManager(inputFormatURL, outputFormatURL);
178
179 String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim();
180 if (mode.equals("parse")) {
181 symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE);
182 // symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
183 } else {
184 symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
185 }
186 if (dataFormatManager.getInputDataFormatSpec().getDataStructure() == DataStructure.PHRASE) {
187 if (mode.equals("learn")) {
188 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies();
189 for (Dependency dep : deps) {
190 URL depFormatURL = f.findURLinJars(dep.getUrlString());
191 if (depFormatURL != null) {
192 this.copyToConfig(depFormatURL);
193 } else {
194 this.copyToConfig(dep.getUrlString());
195 }
196 }
197 }
198 else if (mode.equals("parse")) {
199 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies();
200 String nullValueStategy = OptionManager.instance().getOptionValue(containerIndex, "singlemalt", "null_value").toString();
201 for (Dependency dep : deps) {
202 // URL depFormatURL = f.findURLinJars(dep.getUrlString());
203 DataFormatInstance dataFormatInstance = dataFormatManager.getDataFormatSpec(dep.getDependentOn()).createDataFormatInstance(symbolTables, nullValueStategy);
204 addDataFormatInstance(dataFormatManager.getDataFormatSpec(dep.getDependentOn()).getDataFormatName(), dataFormatInstance);
205 dataFormatManager.setInputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn()));
206 // dataFormatManager.setOutputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn()));
207 }
208 }
209 }
210 }
211
212 private URL findURL(String specModelFileName) throws MaltChainedException {
213 URL url = null;
214 File specFile = this.getFile(specModelFileName);
215 if (specFile.exists()) {
216 try {
217 url = new URL("file:///"+specFile.getAbsolutePath());
218 } catch (MalformedURLException e) {
219 throw new MaltChainedException("Malformed URL: "+specFile, e);
220 }
221 } else {
222 url = this.getConfigFileEntryURL(specModelFileName);
223 }
224 return url;
225 }
226
227 /**
228 * Creates an output stream writer, where the corresponding file will be included in the configuration directory
229 *
230 * @param fileName a file name
231 * @param charSet a char set
232 * @return an output stream writer for writing to a file within the configuration directory
233 * @throws MaltChainedException
234 */
235 public OutputStreamWriter getOutputStreamWriter(String fileName, String charSet) throws MaltChainedException {
236 try {
237 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName), charSet);
238 } catch (FileNotFoundException e) {
239 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e);
240 } catch (UnsupportedEncodingException e) {
241 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e);
242 }
243 }
244
245 /**
246 * Creates an output stream writer, where the corresponding file will be included in the
247 * configuration directory. Uses UTF-8 for character encoding.
248 *
249 * @param fileName a file name
250 * @return an output stream writer for writing to a file within the configuration directory
251 * @throws MaltChainedException
252 */
253 public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException {
254 try {
255 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8");
256 } catch (FileNotFoundException e) {
257 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e);
258 } catch (UnsupportedEncodingException e) {
259 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e);
260 }
261 }
262 /**
263 * This method acts the same as getOutputStreamWriter with the difference that the writer append in the file
264 * if it already exists instead of deleting the previous content before starting to write.
265 *
266 * @param fileName a file name
267 * @return an output stream writer for writing to a file within the configuration directory
268 * @throws MaltChainedException
269 */
270 public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException {
271 try {
272 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8");
273 } catch (FileNotFoundException e) {
274 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e);
275 } catch (UnsupportedEncodingException e) {
276 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e);
277 }
278 }
279
280 /**
281 * Creates an input stream reader for reading a file within the configuration directory
282 *
283 * @param fileName a file name
284 * @param charSet a char set
285 * @return an input stream reader for reading a file within the configuration directory
286 * @throws MaltChainedException
287 */
288 public InputStreamReader getInputStreamReader(String fileName, String charSet) throws MaltChainedException {
289 try {
290 return new InputStreamReader(new FileInputStream(configDirectory.getPath()+File.separator+fileName), charSet);
291 } catch (FileNotFoundException e) {
292 throw new ConfigurationException("The file '"+fileName+"' cannot be found. ", e);
293 } catch (UnsupportedEncodingException e) {
294 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e);
295 }
296 }
297
298 /**
299 * Creates an input stream reader for reading a file within the configuration directory.
300 * Uses UTF-8 for character encoding.
301 *
302 * @param fileName a file name
303 * @return an input stream reader for reading a file within the configuration directory
304 * @throws MaltChainedException
305 */
306 public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException {
307 return getInputStreamReader(fileName, "UTF-8");
308 }
309
310 public JarEntry getConfigFileEntry(String fileName) throws MaltChainedException {
311 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
312 try {
313 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath());
314 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName);
315 if (entry == null) {
316 entry = mcoFile.getJarEntry(getName()+'\\'+fileName);
317 }
318 return entry;
319 } catch (FileNotFoundException e) {
320 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e);
321 } catch (IOException e) {
322 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e);
323 }
324 }
325
326 public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException {
327 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
328 try {
329 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath());
330 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName);
331 if (entry == null) {
332 entry = mcoFile.getJarEntry(getName()+'\\'+fileName);
333 }
334 if (entry == null) {
335 throw new FileNotFoundException();
336 }
337 return mcoFile.getInputStream(entry);
338 } catch (FileNotFoundException e) {
339 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e);
340 } catch (IOException e) {
341 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e);
342 }
343 }
344
345 public InputStreamReader getInputStreamReaderFromConfigFileEntry(String fileName, String charSet) throws MaltChainedException {
346 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
347 try {
348 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath());
349 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName);
350 if (entry == null) {
351 entry = mcoFile.getJarEntry(getName()+'\\'+fileName);
352 }
353 if (entry == null) {
354 throw new FileNotFoundException();
355 }
356 return new InputStreamReader(mcoFile.getInputStream(entry), charSet);
357 } catch (FileNotFoundException e) {
358 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e);
359 } catch (UnsupportedEncodingException e) {
360 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e);
361 } catch (IOException e) {
362 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e);
363 }
364 }
365
366 public InputStreamReader getInputStreamReaderFromConfigFile(String fileName) throws MaltChainedException {
367 return getInputStreamReaderFromConfigFileEntry(fileName, "UTF-8");
368 }
369
370 /**
371 * Returns a file handler object of a file within the configuration directory
372 *
373 * @param fileName a file name
374 * @return a file handler object of a file within the configuration directory
375 * @throws MaltChainedException
376 */
377 public File getFile(String fileName) throws MaltChainedException {
378 return new File(configDirectory.getPath()+File.separator+fileName);
379 }
380
381 public URL getConfigFileEntryURL(String fileName) throws MaltChainedException {
382 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
383 try {
384 if (!mcoPath.exists()) {
385 throw new ConfigurationException("Couldn't find mco-file '" +mcoPath.getAbsolutePath()+ "'");
386 }
387 // new URL("file", null, mcoPath.getAbsolutePath());
388 URL url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'/'+fileName + "\n");
389 try {
390 InputStream is = url.openStream();
391 is.close();
392 } catch (IOException e) {
393 url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'\\'+fileName + "\n");
394 }
395 return url;
396 } catch (MalformedURLException e) {
397 throw new ConfigurationException("Couldn't find the URL '" +"jar:"+mcoPath.getAbsolutePath()+"!/"+getName()+'/'+fileName+ "'", e);
398 }
399 }
400
401 /**
402 * Copies a file into the configuration directory.
403 *
404 * @param source a path to file
405 * @throws MaltChainedException
406 */
407 public String copyToConfig(File source) throws MaltChainedException {
408 byte[] readBuffer = new byte[BUFFER];
409 String destination = configDirectory.getPath()+File.separator+source.getName();
410 try {
411 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source));
412 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
413
414 int n = 0;
415 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
416 bos.write(readBuffer, 0, n);
417 }
418 bos.flush();
419 bos.close();
420 bis.close();
421 } catch (FileNotFoundException e) {
422 throw new ConfigurationException("The source file '"+source+"' cannot be found or the destination file '"+destination+"' cannot be created when coping the file. ", e);
423 } catch (IOException e) {
424 throw new ConfigurationException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e);
425 }
426 return source.getName();
427 }
428
429
430 public String copyToConfig(String fileUrl) throws MaltChainedException {
431 final URLFinder f = new URLFinder();
432 URL url = f.findURL(fileUrl);
433 if (url == null) {
434 throw new ConfigurationException("The file or URL '"+fileUrl+"' could not be found. ");
435 }
436 return copyToConfig(url);
437 }
438
439 public String copyToConfig(URL url) throws MaltChainedException {
440 if (url == null) {
441 throw new ConfigurationException("URL could not be found. ");
442 }
443 byte[] readBuffer = new byte[BUFFER];
444 String destFileName = url.getPath();
445 int indexSlash = destFileName.lastIndexOf('/');
446 if (indexSlash == -1) {
447 indexSlash = destFileName.lastIndexOf('\\');
448 }
449
450 if (indexSlash != -1) {
451 destFileName = destFileName.substring(indexSlash+1);
452 }
453
454 String destination = configDirectory.getPath()+File.separator+destFileName;
455 try {
456 BufferedInputStream bis = new BufferedInputStream(url.openStream());
457 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
458
459 int n = 0;
460 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
461 bos.write(readBuffer, 0, n);
462 }
463 bos.flush();
464 bos.close();
465 bis.close();
466 } catch (FileNotFoundException e) {
467 throw new ConfigurationException("The destination file '"+destination+"' cannot be created when coping the file. ", e);
468 } catch (IOException e) {
469 throw new ConfigurationException("The URL '"+url+"' cannot be copied to destination '"+destination+"'. ", e);
470 }
471 return destFileName;
472 }
473
474
475 /**
476 * Removes the configuration directory, if it exists and it contains a .info file.
477 *
478 * @throws MaltChainedException
479 */
480 public void deleteConfigDirectory() throws MaltChainedException {
481 if (!configDirectory.exists()) {
482 return;
483 }
484 File infoFile = new File(configDirectory.getPath()+File.separator+getName()+"_"+getType()+".info");
485 if (infoFile.exists()) {
486 deleteConfigDirectory(configDirectory);
487 } else {
488 throw new ConfigurationException("There exists a directory that is not a MaltParser configuration directory. ");
489 }
490 }
491
492 private void deleteConfigDirectory(File directory) throws MaltChainedException {
493 if (directory.exists()) {
494 File[] files = directory.listFiles();
495 for (int i = 0; i < files.length; i++) {
496 if (files[i].isDirectory()) {
497 deleteConfigDirectory(files[i]);
498 } else {
499 files[i].delete();
500 }
501 }
502 } else {
503 throw new ConfigurationException("The directory '"+directory.getPath()+ "' cannot be found. ");
504 }
505 directory.delete();
506 }
507
508 /**
509 * Returns a file handler object for the configuration directory
510 *
511 * @return a file handler object for the configuration directory
512 */
513 public File getConfigDirectory() {
514 return configDirectory;
515 }
516
517 protected void setConfigDirectory(File dir) {
518 this.configDirectory = dir;
519 }
520
521 /**
522 * Creates the configuration directory
523 *
524 * @throws MaltChainedException
525 */
526 public void createConfigDirectory() throws MaltChainedException {
527 checkConfigDirectory();
528 configDirectory.mkdir();
529 createInfoFile();
530 }
531
532 protected void checkConfigDirectory() throws MaltChainedException {
533 if (configDirectory.exists() && !configDirectory.isDirectory()) {
534 throw new ConfigurationException("The configuration directory name already exists and is not a directory. ");
535 }
536
537 if (configDirectory.exists()) {
538 deleteConfigDirectory();
539 }
540 }
541
542 protected void createInfoFile() throws MaltChainedException {
543 infoFile = new BufferedWriter(getOutputStreamWriter(getName()+"_"+getType()+".info"));
544 try {
545 infoFile.write("CONFIGURATION\n");
546 infoFile.write("Configuration name: "+getName()+"\n");
547 infoFile.write("Configuration type: "+getType()+"\n");
548 infoFile.write("Created: "+new Date(System.currentTimeMillis())+"\n");
549
550 infoFile.write("\nSYSTEM\n");
551 infoFile.write("Operating system architecture: "+System.getProperty("os.arch")+"\n");
552 infoFile.write("Operating system name: "+System.getProperty("os.name")+"\n");
553 infoFile.write("JRE vendor name: "+System.getProperty("java.vendor")+"\n");
554 infoFile.write("JRE version number: "+System.getProperty("java.version")+"\n");
555
556 infoFile.write("\nMALTPARSER\n");
557 infoFile.write("Version: "+SystemInfo.getVersion()+"\n");
558 infoFile.write("Build date: "+SystemInfo.getBuildDate()+"\n");
559 Set<String> excludeGroups = new HashSet<String>();
560 excludeGroups.add("system");
561 infoFile.write("\nSETTINGS\n");
562 infoFile.write(OptionManager.instance().toStringPrettyValues(containerIndex, excludeGroups));
563 infoFile.flush();
564 } catch (IOException e) {
565 throw new ConfigurationException("Could not create the maltparser info file. ");
566 }
567 }
568
569 /**
570 * Returns a writer to the configuration information file
571 *
572 * @return a writer to the configuration information file
573 * @throws MaltChainedException
574 */
575 public BufferedWriter getInfoFileWriter() throws MaltChainedException {
576 return infoFile;
577 }
578
579 /**
580 * Creates the malt configuration file (.mco). This file is compressed.
581 *
582 * @throws MaltChainedException
583 */
584 public void createConfigFile() throws MaltChainedException {
585 try {
586 JarOutputStream jos = new JarOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+getName()+".mco"));
587 // configLogger.info("Creates configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco' ...\n");
588 createConfigFile(configDirectory.getPath(), jos);
589 jos.close();
590 } catch (FileNotFoundException e) {
591 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e);
592 } catch (IOException e) {
593 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be created. ", e);
594 }
595 }
596
597 private void createConfigFile(String directory, JarOutputStream jos) throws MaltChainedException {
598 byte[] readBuffer = new byte[BUFFER];
599 try {
600 File zipDir = new File(directory);
601 String[] dirList = zipDir.list();
602
603 int bytesIn = 0;
604
605 for (int i = 0; i < dirList.length; i++) {
606 File f = new File(zipDir, dirList[i]);
607 if (f.isDirectory()) {
608 String filePath = f.getPath();
609 createConfigFile(filePath, jos);
610 continue;
611 }
612
613 FileInputStream fis = new FileInputStream(f);
614
615 String entryPath = f.getPath().substring(workingDirectory.getPath().length()+1);
616 entryPath = entryPath.replace('\\', '/');
617 JarEntry entry = new JarEntry(entryPath);
618 jos.putNextEntry(entry);
619
620 while ((bytesIn = fis.read(readBuffer)) != -1) {
621 jos.write(readBuffer, 0, bytesIn);
622 }
623
624 fis.close();
625 }
626 } catch (FileNotFoundException e) {
627 throw new ConfigurationException("The directory '"+directory+"' cannot be found. ", e);
628 } catch (IOException e) {
629 throw new ConfigurationException("The directory '"+directory+"' cannot be compressed into a mco file. ", e);
630 }
631 }
632
633
634 public void copyConfigFile(File in, File out, Versioning versioning) throws MaltChainedException {
635 try {
636 JarFile jar = new JarFile(in);
637 JarOutputStream tempJar = new JarOutputStream(new FileOutputStream(out));
638 byte[] buffer = new byte[BUFFER];
639 int bytesRead;
640 final StringBuilder sb = new StringBuilder();
641 final URLFinder f = new URLFinder();
642
643 for (Enumeration<JarEntry> entries = jar.entries(); entries.hasMoreElements(); ) {
644 JarEntry inEntry = (JarEntry) entries.nextElement();
645 InputStream entryStream = jar.getInputStream(inEntry);
646 JarEntry outEntry = versioning.getJarEntry(inEntry);
647
648 if (!versioning.hasChanges(inEntry, outEntry)) {
649 tempJar.putNextEntry(outEntry);
650 while ((bytesRead = entryStream.read(buffer)) != -1) {
651 tempJar.write(buffer, 0, bytesRead);
652 }
653 } else {
654 tempJar.putNextEntry(outEntry);
655 BufferedReader br = new BufferedReader(new InputStreamReader(entryStream));
656 String line = null;
657 sb.setLength(0);
658 while ((line = br.readLine()) != null) {
659 sb.append(line);
660 sb.append('\n');
661 }
662 String outString = versioning.modifyJarEntry(inEntry, outEntry, sb);
663 tempJar.write(outString.getBytes());
664 }
665 }
666 if (versioning.getFeatureModelXML() != null && versioning.getFeatureModelXML().startsWith("/appdata")) {
667 int index = versioning.getFeatureModelXML().lastIndexOf('/');
668 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getFeatureModelXML()).openStream());
669 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getFeatureModelXML().substring(index+1)));
670 int n = 0;
671 while ((n = bis.read(buffer, 0, BUFFER)) != -1) {
672 tempJar.write(buffer, 0, n);
673 }
674 bis.close();
675 }
676 if (versioning.getInputFormatXML() != null && versioning.getInputFormatXML().startsWith("/appdata")) {
677 int index = versioning.getInputFormatXML().lastIndexOf('/');
678 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getInputFormatXML()).openStream());
679 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getInputFormatXML().substring(index+1)));
680 int n = 0;
681 while ((n = bis.read(buffer, 0, BUFFER)) != -1) {
682 tempJar.write(buffer, 0, n);
683 }
684 bis.close();
685 }
686 tempJar.flush();
687 tempJar.close();
688 jar.close();
689 } catch (IOException e) {
690 throw new ConfigurationException("", e);
691 }
692 }
693
694 protected void initNameNTypeFromInfoFile(URL url) throws MaltChainedException {
695 if (url == null) {
696 throw new ConfigurationException("The URL cannot be found. ");
697 }
698 try {
699 JarEntry je;
700 JarInputStream jis = new JarInputStream(url.openConnection().getInputStream());
701 while ((je = jis.getNextJarEntry()) != null) {
702 String entryName = je.getName();
703 if (entryName.endsWith(".info")) {
704 int indexUnderScore = entryName.lastIndexOf('_');
705 int indexSeparator = entryName.lastIndexOf(File.separator);
706 if (indexSeparator == -1) {
707 indexSeparator = entryName.lastIndexOf('/');
708 }
709 if (indexSeparator == -1) {
710 indexSeparator = entryName.lastIndexOf('\\');
711 }
712 int indexDot = entryName.lastIndexOf('.');
713 if (indexUnderScore == -1 || indexDot == -1) {
714 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ");
715 }
716 setName(entryName.substring(indexSeparator+1, indexUnderScore));
717 setType(entryName.substring(indexUnderScore+1, indexDot));
718 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName()));
719 jis.close();
720 return;
721 }
722 }
723
724 } catch (IOException e) {
725 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ", e);
726 }
727 }
728
729 /**
730 * Prints the content of the configuration information file to the system logger
731 *
732 * @throws MaltChainedException
733 */
734 public void echoInfoFile() throws MaltChainedException {
735 checkConfigDirectory();
736 JarInputStream jis;
737 try {
738 if (url == null) {
739 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco"));
740 } else {
741 jis = new JarInputStream(url.openConnection().getInputStream());
742 }
743 JarEntry je;
744
745 while ((je = jis.getNextJarEntry()) != null) {
746 String entryName = je.getName();
747
748 if (entryName.endsWith(getName()+"_"+getType()+".info")) {
749 int c;
750 while ((c = jis.read()) != -1) {
751 SystemLogger.logger().info((char)c);
752 }
753 }
754 }
755 jis.close();
756 } catch (FileNotFoundException e) {
757 throw new ConfigurationException("Could not print configuration information file. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e);
758 } catch (IOException e) {
759 throw new ConfigurationException("Could not print configuration information file. ", e);
760 }
761
762 }
763
764 /**
765 * Unpacks the malt configuration file (.mco).
766 *
767 * @throws MaltChainedException
768 */
769 public void unpackConfigFile() throws MaltChainedException {
770 checkConfigDirectory();
771 JarInputStream jis;
772 try {
773 if (url == null) {
774 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco"));
775 } else {
776 jis = new JarInputStream(url.openConnection().getInputStream());
777 }
778 unpackConfigFile(jis);
779 jis.close();
780 } catch (FileNotFoundException e) {
781 throw new ConfigurationException("Could not unpack configuration. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e);
782 } catch (IOException e) {
783 if (configDirectory.exists()) {
784 deleteConfigDirectory();
785 }
786 throw new ConfigurationException("Could not unpack configuration. ", e);
787 }
788 initCreatedByMaltParserVersionFromInfoFile();
789 }
790
791 protected void unpackConfigFile(JarInputStream jis) throws MaltChainedException {
792 try {
793 JarEntry je;
794 byte[] readBuffer = new byte[BUFFER];
795 SortedSet<String> directoryCache = new TreeSet<String>();
796 while ((je = jis.getNextJarEntry()) != null) {
797 String entryName = je.getName();
798
799 if (entryName.startsWith("/")) {
800 entryName = entryName.substring(1);
801 }
802 if (entryName.endsWith(File.separator) || entryName.endsWith("/")) {
803 return;
804 }
805 int index = -1;
806 if (File.separator.equals("\\")) {
807 entryName = entryName.replace('/', '\\');
808 index = entryName.lastIndexOf("\\");
809 } else if (File.separator.equals("/")) {
810 entryName = entryName.replace('\\', '/');
811 index = entryName.lastIndexOf("/");
812 }
813 if (index > 0) {
814 String dirName = entryName.substring(0, index);
815 if (!directoryCache.contains(dirName)) {
816 File directory = new File(workingDirectory.getPath()+File.separator+dirName);
817 if (!(directory.exists() && directory.isDirectory())) {
818 if (!directory.mkdirs()) {
819 throw new ConfigurationException("Unable to make directory '" + dirName +"'. ");
820 }
821 directoryCache.add(dirName);
822 }
823 }
824 }
825
826 if (new File(workingDirectory.getPath()+File.separator+entryName).isDirectory() && new File(workingDirectory.getPath()+File.separator+entryName).exists()) {
827 continue;
828 }
829 BufferedOutputStream bos;
830 try {
831 bos = new BufferedOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+entryName), BUFFER);
832 } catch (FileNotFoundException e) {
833 throw new ConfigurationException("Could not unpack configuration. The file '"+workingDirectory.getPath()+File.separator+entryName+"' cannot be unpacked. ", e);
834 }
835 int n = 0;
836 while ((n = jis.read(readBuffer, 0, BUFFER)) != -1) {
837 bos.write(readBuffer, 0, n);
838 }
839 bos.flush();
840 bos.close();
841 }
842 } catch (IOException e) {
843 throw new ConfigurationException("Could not unpack configuration. ", e);
844 }
845 }
846
847 /**
848 * Returns the name of the configuration directory
849 *
850 * @return the name of the configuration directory
851 */
852 public String getName() {
853 return name;
854 }
855
856 protected void setName(String name) {
857 this.name = name;
858 }
859
860 /**
861 * Returns the type of the configuration directory
862 *
863 * @return the type of the configuration directory
864 */
865 public String getType() {
866 return type;
867 }
868
869 protected void setType(String type) {
870 this.type = type;
871 }
872
873 /**
874 * Returns a file handler object for the working directory
875 *
876 * @return a file handler object for the working directory
877 */
878 public File getWorkingDirectory() {
879 return workingDirectory;
880 }
881
882 /**
883 * Initialize the working directory
884 *
885 * @throws MaltChainedException
886 */
887 public void initWorkingDirectory() throws MaltChainedException {
888 try {
889 initWorkingDirectory(OptionManager.instance().getOptionValue(containerIndex, "config", "workingdir").toString());
890 } catch (NullPointerException e) {
891 throw new ConfigurationException("The configuration cannot be found.", e);
892 }
893 }
894
895 /**
896 * Initialize the working directory according to the path. If the path is equals to "user.dir" or current directory, then the current directory
897 * will be the working directory.
898 *
899 * @param pathPrefixString the path to the working directory
900 * @throws MaltChainedException
901 */
902 public void initWorkingDirectory(String pathPrefixString) throws MaltChainedException {
903 if (pathPrefixString == null || pathPrefixString.equalsIgnoreCase("user.dir") || pathPrefixString.equalsIgnoreCase(".")) {
904 workingDirectory = new File(System.getProperty("user.dir"));
905 } else {
906 workingDirectory = new File(pathPrefixString);
907 }
908
909 if (workingDirectory == null || !workingDirectory.isDirectory()) {
910 new ConfigurationException("The specified working directory '"+pathPrefixString+"' is not a directory. ");
911 }
912 }
913
914 /**
915 * Returns the URL to the malt configuration file (.mco)
916 *
917 * @return the URL to the malt configuration file (.mco)
918 */
919 public URL getUrl() {
920 return url;
921 }
922
923 protected void setUrl(URL url) {
924 this.url = url;
925 }
926
927 /**
928 * Returns the option container index
929 *
930 * @return the option container index
931 */
932 public int getContainerIndex() {
933 return containerIndex;
934 }
935
936 /**
937 * Sets the option container index
938 *
939 * @param containerIndex a option container index
940 */
941 public void setContainerIndex(int containerIndex) {
942 this.containerIndex = containerIndex;
943 }
944
945 /**
946 * Returns the version number of MaltParser which created the malt configuration file (.mco)
947 *
948 * @return the version number of MaltParser which created the malt configuration file (.mco)
949 */
950 public String getCreatedByMaltParserVersion() {
951 return createdByMaltParserVersion;
952 }
953
954 /**
955 * Sets the version number of MaltParser which created the malt configuration file (.mco)
956 *
957 * @param createdByMaltParserVersion a version number of MaltParser
958 */
959 public void setCreatedByMaltParserVersion(String createdByMaltParserVersion) {
960 this.createdByMaltParserVersion = createdByMaltParserVersion;
961 }
962
963 public void initCreatedByMaltParserVersionFromInfoFile() throws MaltChainedException {
964 try {
965 BufferedReader br = new BufferedReader(getInputStreamReaderFromConfigFileEntry(getName()+"_"+getType()+".info", "UTF-8"));
966 String line = null;
967 while ((line = br.readLine()) != null) {
968 if (line.startsWith("Version: ")) {
969 setCreatedByMaltParserVersion(line.substring(31));
970 break;
971 }
972 }
973 br.close();
974 } catch (FileNotFoundException e) {
975 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e);
976 } catch (IOException e) {
977 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e);
978 }
979 }
980
981 public void versioning() throws MaltChainedException {
982 initCreatedByMaltParserVersionFromInfoFile();
983 SystemLogger.logger().info("\nCurrent version : " + SystemInfo.getVersion() + "\n");
984 SystemLogger.logger().info("Parser model version : " + createdByMaltParserVersion + "\n");
985 if (SystemInfo.getVersion() == null) {
986 throw new ConfigurationException("Couln't determine the version of MaltParser");
987 } else if (createdByMaltParserVersion == null) {
988 throw new ConfigurationException("Couln't determine the version of the parser model");
989 } else if (SystemInfo.getVersion().equals(createdByMaltParserVersion)) {
990 SystemLogger.logger().info("The parser model "+getName()+".mco has already the same version as the current version of MaltParser. \n");
991 return;
992 }
993
994 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
995 File newMcoPath = new File(workingDirectory.getPath()+File.separator+getName()+"."+SystemInfo.getVersion().trim()+".mco");
996 Versioning versioning = new Versioning(name, type, mcoPath, createdByMaltParserVersion);
997 if (!versioning.support(createdByMaltParserVersion)) {
998 SystemLogger.logger().warn("The parser model '"+ name+ ".mco' is created by MaltParser "+getCreatedByMaltParserVersion()+", which cannot be converted to a MaltParser "+SystemInfo.getVersion()+" parser model.\n");
999 SystemLogger.logger().warn("Please retrain the parser model with MaltParser "+SystemInfo.getVersion() +" or download MaltParser "+getCreatedByMaltParserVersion()+" from http://maltparser.org/download.html\n");
1000 return;
1001 }
1002 SystemLogger.logger().info("Converts the parser model '"+ mcoPath.getName()+ "' into '"+newMcoPath.getName()+"'....\n");
1003 copyConfigFile(mcoPath, newMcoPath, versioning);
1004 }
1005
1006 protected void checkNConvertConfigVersion() throws MaltChainedException {
1007 if (createdByMaltParserVersion.startsWith("1.0")) {
1008 SystemLogger.logger().info(" Converts the MaltParser configuration ");
1009 SystemLogger.logger().info("1.0");
1010 SystemLogger.logger().info(" to ");
1011 SystemLogger.logger().info(SystemInfo.getVersion());
1012 SystemLogger.logger().info("\n");
1013 File[] configFiles = configDirectory.listFiles();
1014 for (int i = 0, n = configFiles.length; i < n; i++) {
1015 if (configFiles[i].getName().endsWith(".mod")) {
1016 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0."+configFiles[i].getName()));
1017 }
1018 if (configFiles[i].getName().endsWith(getName()+".dsm")) {
1019 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0.dsm"));
1020 }
1021 if (configFiles[i].getName().equals("savedoptions.sop")) {
1022 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1023 }
1024 if (configFiles[i].getName().equals("symboltables.sym")) {
1025 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"symboltables.sym.old"));
1026 }
1027 }
1028 try {
1029 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1030 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop"));
1031 String line;
1032 while ((line = br.readLine()) != null) {
1033 if (line.startsWith("0\tguide\tprediction_strategy")) {
1034 bw.write("0\tguide\tdecision_settings\tT.TRANS+A.DEPREL\n");
1035 } else {
1036 bw.write(line);
1037 bw.write('\n');
1038 }
1039 }
1040 br.close();
1041 bw.flush();
1042 bw.close();
1043 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete();
1044 } catch (FileNotFoundException e) {
1045 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1046 } catch (IOException e) {
1047 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1048 }
1049 try {
1050 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"symboltables.sym.old"));
1051 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"symboltables.sym"));
1052 String line;
1053 while ((line = br.readLine()) != null) {
1054 if (line.startsWith("AllCombinedClassTable")) {
1055 bw.write("T.TRANS+A.DEPREL\n");
1056 } else {
1057 bw.write(line);
1058 bw.write('\n');
1059 }
1060 }
1061 br.close();
1062 bw.flush();
1063 bw.close();
1064 new File(configDirectory.getPath()+File.separator+"symboltables.sym.old").delete();
1065 } catch (FileNotFoundException e) {
1066 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e);
1067 } catch (IOException e) {
1068 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e);
1069 }
1070 }
1071 if (!createdByMaltParserVersion.startsWith("1.3")) {
1072 SystemLogger.logger().info(" Converts the MaltParser configuration ");
1073 SystemLogger.logger().info(createdByMaltParserVersion);
1074 SystemLogger.logger().info(" to ");
1075 SystemLogger.logger().info(SystemInfo.getVersion());
1076 SystemLogger.logger().info("\n");
1077
1078
1079 new File(configDirectory.getPath()+File.separator+"savedoptions.sop").renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1080 try {
1081 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1082 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop"));
1083 String line;
1084 while ((line = br.readLine()) != null) {
1085 int index = line.indexOf('\t');
1086 int container = 0;
1087 if (index > -1) {
1088 container = Integer.parseInt(line.substring(0,index));
1089 }
1090
1091 if (line.startsWith(container+"\tnivre\tpost_processing")) {
1092 } else if (line.startsWith(container+"\tmalt0.4\tbehavior")) {
1093 if (line.endsWith("true")) {
1094 SystemLogger.logger().info("MaltParser 1.3 doesn't support MaltParser 0.4 emulation.");
1095 br.close();
1096 bw.flush();
1097 bw.close();
1098 deleteConfigDirectory();
1099 System.exit(0);
1100 }
1101 } else if (line.startsWith(container+"\tsinglemalt\tparsing_algorithm")) {
1102 bw.write(container);
1103 bw.write("\tsinglemalt\tparsing_algorithm\t");
1104 if (line.endsWith("NivreStandard")) {
1105 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcStandardFactory");
1106 } else if (line.endsWith("NivreEager")) {
1107 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcEagerFactory");
1108 } else if (line.endsWith("CovingtonNonProjective")) {
1109 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonNonProjFactory");
1110 } else if (line.endsWith("CovingtonProjective")) {
1111 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonProjFactory");
1112 }
1113 bw.write('\n');
1114 } else {
1115 bw.write(line);
1116 bw.write('\n');
1117 }
1118 }
1119 br.close();
1120 bw.flush();
1121 bw.close();
1122 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete();
1123 } catch (FileNotFoundException e) {
1124 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1125 } catch (IOException e) {
1126 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1127 }
1128 }
1129 }
1130
1131 /**
1132 * Terminates the configuration directory
1133 *
1134 * @throws MaltChainedException
1135 */
1136 public void terminate() throws MaltChainedException {
1137 if (infoFile != null) {
1138 try {
1139 infoFile.flush();
1140 infoFile.close();
1141 } catch (IOException e) {
1142 throw new ConfigurationException("Could not close configuration information file. ", e);
1143 }
1144 }
1145 symbolTables = null;
1146 // configuration = null;
1147 }
1148
1149 /* (non-Javadoc)
1150 * @see java.lang.Object#finalize()
1151 */
1152 protected void finalize() throws Throwable {
1153 try {
1154 if (infoFile != null) {
1155 infoFile.flush();
1156 infoFile.close();
1157 }
1158 } finally {
1159 super.finalize();
1160 }
1161 }
1162
1163 public SymbolTableHandler getSymbolTables() {
1164 return symbolTables;
1165 }
1166
1167 public void setSymbolTables(SymbolTableHandler symbolTables) {
1168 this.symbolTables = symbolTables;
1169 }
1170
1171 public DataFormatManager getDataFormatManager() {
1172 return dataFormatManager;
1173 }
1174
1175 public void setDataFormatManager(DataFormatManager dataFormatManager) {
1176 this.dataFormatManager = dataFormatManager;
1177 }
1178
1179 public Set<String> getDataFormatInstanceKeys() {
1180 return dataFormatInstances.keySet();
1181 }
1182
1183 public boolean addDataFormatInstance(String key, DataFormatInstance dataFormatInstance) {
1184 if (!dataFormatInstances.containsKey(key)) {
1185 dataFormatInstances.put(key, dataFormatInstance);
1186 return true;
1187 }
1188 return false;
1189 }
1190
1191 public DataFormatInstance getDataFormatInstance(String key) {
1192 return dataFormatInstances.get(key);
1193 }
1194
1195 public int sizeDataFormatInstance() {
1196 return dataFormatInstances.size();
1197 }
1198
1199 public DataFormatInstance getInputDataFormatInstance() {
1200 return dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName());
1201 }
1202
1203 public URL getInputFormatURL() {
1204 return inputFormatURL;
1205 }
1206
1207 public URL getOutputFormatURL() {
1208 return outputFormatURL;
1209 }
1210
1211
1212 }