001 package org.maltparser.core.io.dataformat;
002
003 import java.net.URL;
004 import java.util.LinkedHashMap;
005 import java.util.Map;
006
007 import javax.xml.parsers.DocumentBuilder;
008 import javax.xml.parsers.DocumentBuilderFactory;
009 import javax.xml.parsers.ParserConfigurationException;
010
011 import org.maltparser.core.exception.MaltChainedException;
012 import org.maltparser.core.helper.HashSet;
013 import org.maltparser.core.helper.URLFinder;
014 import org.maltparser.core.symbol.SymbolTableHandler;
015 import org.w3c.dom.Element;
016 import org.w3c.dom.NodeList;
017 import org.xml.sax.SAXException;
018
019 /**
020 *
021 *
022 * @author Johan Hall
023 * @since 1.0
024 **/
025 public class DataFormatSpecification {
026 public enum DataStructure {
027 DEPENDENCY, // Dependency structure
028 PHRASE, // Phrase structure
029 };
030 // private int entryPositionCounter;
031 private String dataFormatName;
032 private DataStructure dataStructure;
033 private final Map<String, DataFormatEntry> entries;
034 private final HashSet<Dependency> dependencies;
035 // private final HashSet<SyntaxGraphReader> supportedReaders;
036 // private final HashSet<SyntaxGraphWriter> supportedWriters;
037
038 public DataFormatSpecification() {
039 entries = new LinkedHashMap<String, DataFormatEntry>();
040 // entryPositionCounter = 0;
041 dependencies = new HashSet<Dependency>();
042 // supportedReaders = new HashSet<SyntaxGraphReader>();
043 // supportedWriters = new HashSet<SyntaxGraphWriter>();
044 }
045
046 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException {
047 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this);
048
049 }
050
051 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
052 final URLFinder f = new URLFinder();
053 URL url = f.findURL(fileName);
054 if (url == null) {
055 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
056 }
057 parseDataFormatXMLfile(url);
058 }
059
060 public HashSet<Dependency> getDependencies() {
061 return dependencies;
062 }
063
064 public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
065 if (url == null) {
066 throw new DataFormatException("The data format specifcation file cannot be found. ");
067 }
068
069 try {
070 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
071 DocumentBuilder db = dbf.newDocumentBuilder();
072
073 Element root = db.parse(url.openStream()).getDocumentElement();
074 if (root.getNodeName().equals("dataformat")) {
075 dataFormatName = root.getAttribute("name");
076 if (root.getAttribute("datastructure").length() > 0) {
077 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
078 } else {
079 dataStructure = DataStructure.DEPENDENCY;
080 }
081 } else {
082 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
083 }
084 NodeList cols = root.getElementsByTagName("column");
085 Element col = null;
086 for (int i = 0, n = cols.getLength(); i < n; i++) {
087 col = (Element)cols.item(i);
088 DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
089 entries.put(entry.getDataFormatEntryName(), entry);
090 }
091 NodeList deps = root.getElementsByTagName("dependencies");
092 if (deps.getLength() > 0) {
093 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
094 for (int i = 0, n = dep.getLength(); i < n; i++) {
095 Element e = (Element)dep.item(i);
096 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
097 }
098 }
099 } catch (java.io.IOException e) {
100 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
101 } catch (ParserConfigurationException e) {
102 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
103 } catch (SAXException e) {
104 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
105 }
106 }
107
108 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
109 DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput);
110 entries.put(entry.getDataFormatEntryName(), entry);
111 }
112
113 public DataFormatEntry getEntry(String dataFormatEntryName) {
114 return entries.get(dataFormatEntryName);
115 }
116
117 public String getDataFormatName() {
118 return dataFormatName;
119 }
120
121 public DataStructure getDataStructure() {
122 return dataStructure;
123 }
124
125 public String toString() {
126 final StringBuilder sb = new StringBuilder();
127 sb.append("Data format specification: ");
128 sb.append(dataFormatName);
129 sb.append('\n');
130 for (DataFormatEntry dfe : entries.values()) {
131 sb.append(dfe);
132 sb.append('\n');
133 }
134 return sb.toString();
135 }
136
137 public class Dependency {
138 protected String dependentOn;
139 protected String urlString;
140 protected String map;
141 protected String mapUrl;
142
143 public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
144 setDependentOn(dependentOn);
145 setUrlString(urlString);
146 setMap(map);
147 setMapUrl(mapUrl);
148 }
149
150 public String getDependentOn() {
151 return dependentOn;
152 }
153 protected void setDependentOn(String dependentOn) {
154 this.dependentOn = dependentOn;
155 }
156
157 public String getUrlString() {
158 return urlString;
159 }
160
161 public void setUrlString(String urlString) {
162 this.urlString = urlString;
163 }
164
165 public String getMap() {
166 return map;
167 }
168 protected void setMap(String map) {
169 this.map = map;
170 }
171
172 public String getMapUrl() {
173 return mapUrl;
174 }
175
176 public void setMapUrl(String mapUrl) {
177 this.mapUrl = mapUrl;
178 }
179
180 @Override
181 public String toString() {
182 return "Dependency [dependentOn=" + dependentOn + ", map=" + map
183 + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]";
184 }
185 }
186 }