001 package org.maltparser.core.io.dataformat;
002
003 import java.util.Iterator;
004 import java.util.Map;
005 import java.util.SortedMap;
006 import java.util.SortedSet;
007 import java.util.TreeMap;
008 import java.util.TreeSet;
009
010 import org.maltparser.core.exception.MaltChainedException;
011 import org.maltparser.core.symbol.SymbolTable;
012 import org.maltparser.core.symbol.SymbolTableHandler;
013
014 /**
015 *
016 *
017 * @author Johan Hall
018 * @since 1.0
019 **/
020 public class DataFormatInstance implements Iterable<ColumnDescription> {
021 private final SortedSet<ColumnDescription> columnDescriptions;
022 private SortedMap<String,ColumnDescription> headColumnDescriptions;
023 private SortedMap<String,ColumnDescription> dependencyEdgeLabelColumnDescriptions;
024 private SortedMap<String,ColumnDescription> phraseStructureEdgeLabelColumnDescriptions;
025 private SortedMap<String,ColumnDescription> phraseStructureNodeLabelColumnDescriptions;
026 private SortedMap<String,ColumnDescription> secondaryEdgeLabelColumnDescriptions;
027 private SortedMap<String,ColumnDescription> inputColumnDescriptions;
028 private SortedMap<String,ColumnDescription> ignoreColumnDescriptions;
029
030 private SortedSet<ColumnDescription> headColumnDescriptionSet;
031 private SortedSet<ColumnDescription> dependencyEdgeLabelColumnDescriptionSet;
032 private SortedSet<ColumnDescription> phraseStructureEdgeLabelColumnDescriptionSet;
033 private SortedSet<ColumnDescription> phraseStructureNodeLabelColumnDescriptionSet;
034 private SortedSet<ColumnDescription> secondaryEdgeLabelColumnDescriptionSet;
035 private SortedSet<ColumnDescription> inputColumnDescriptionSet;
036 private SortedSet<ColumnDescription> ignoreColumnDescriptionSet;
037
038 private SortedMap<String,SymbolTable> dependencyEdgeLabelSymbolTables;
039 private SortedMap<String,SymbolTable> phraseStructureEdgeLabelSymbolTables;
040 private SortedMap<String,SymbolTable> phraseStructureNodeLabelSymbolTables;
041 private SortedMap<String,SymbolTable> secondaryEdgeLabelSymbolTables;
042 private SortedMap<String,SymbolTable> inputSymbolTables;
043
044 // Internal
045 private SortedMap<String,ColumnDescription> internalColumnDescriptions;
046 private SortedSet<ColumnDescription> internalColumnDescriptionSet;
047
048 private SymbolTableHandler symbolTables;
049 private DataFormatSpecification dataFormarSpec;
050
051 public DataFormatInstance(Map<String, DataFormatEntry> entries, SymbolTableHandler symbolTables, String nullValueStrategy, DataFormatSpecification spec) throws MaltChainedException {
052 this.columnDescriptions = new TreeSet<ColumnDescription>();
053 this.symbolTables = symbolTables;
054 createColumnDescriptions(entries, nullValueStrategy);
055 setDataFormarSpec(spec);
056 }
057
058 public ColumnDescription addInternalColumnDescription(String name, String category, String type, String defaultOutput, String nullValueStrategy) throws MaltChainedException {
059 if (internalColumnDescriptions == null) {
060 internalColumnDescriptions = new TreeMap<String,ColumnDescription>();
061 internalColumnDescriptionSet = new TreeSet<ColumnDescription>();
062 }
063
064 if (!internalColumnDescriptions.containsKey(name)) {
065 ColumnDescription internalColumn = new ColumnDescription(name, category, type, defaultOutput, symbolTables, nullValueStrategy, true);
066 internalColumnDescriptions.put(name, internalColumn);
067 internalColumnDescriptionSet.add(internalColumn);
068 return internalColumn;
069 } else {
070 return internalColumnDescriptions.get(name);
071 }
072 }
073
074 public ColumnDescription addInternalColumnDescription(String name, int category, int type, String defaultOutput, String nullValueStrategy) throws MaltChainedException {
075 if (internalColumnDescriptions == null) {
076 internalColumnDescriptions = new TreeMap<String,ColumnDescription>();
077 internalColumnDescriptionSet = new TreeSet<ColumnDescription>();
078 }
079
080 if (!internalColumnDescriptions.containsKey(name)) {
081 ColumnDescription internalColumn = new ColumnDescription(name, category, type, defaultOutput, symbolTables, nullValueStrategy, true);
082 internalColumnDescriptions.put(name, internalColumn);
083 internalColumnDescriptionSet.add(internalColumn);
084 return internalColumn;
085 } else {
086 return internalColumnDescriptions.get(name);
087 }
088 }
089
090 public ColumnDescription addInternalColumnDescription(String name, ColumnDescription column) throws MaltChainedException {
091 return addInternalColumnDescription(name, column.getCategory(), column.getType(), column.getDefaultOutput(), column.getNullValueStrategy());
092 }
093
094 private void createColumnDescriptions(Map<String, DataFormatEntry> entries, String nullValueStrategy) throws MaltChainedException {
095 for (DataFormatEntry entry : entries.values()) {
096 columnDescriptions.add(new ColumnDescription(entry.getDataFormatEntryName(), entry.getCategory(), entry.getType(), entry.getDefaultOutput(), symbolTables, nullValueStrategy, false));
097 }
098 }
099
100 public ColumnDescription getColumnDescriptionByName(String name) {
101 for (ColumnDescription column : columnDescriptions) {
102 if (column.getName().equals(name)) {
103 return column;
104 }
105 }
106 if (internalColumnDescriptionSet != null) {
107 for (ColumnDescription internalColumn : internalColumnDescriptionSet) {
108 if (internalColumn.getName().equals(name)) {
109 return internalColumn;
110 }
111 }
112 }
113 return null;
114 }
115
116 // public int getNumberOfColumnDescriptions() {
117 // return columnDescriptions.size();
118 // }
119
120 public Iterator<ColumnDescription> iterator() {
121 return columnDescriptions.iterator();
122 }
123
124 public DataFormatSpecification getDataFormarSpec() {
125 return dataFormarSpec;
126 }
127
128 private void setDataFormarSpec(DataFormatSpecification dataFormarSpec) {
129 this.dataFormarSpec = dataFormarSpec;
130 }
131
132 protected void createHeadColumnDescriptions() {
133 headColumnDescriptions = new TreeMap<String,ColumnDescription>();
134 for (ColumnDescription column : columnDescriptions) {
135 if (column.getCategory() == ColumnDescription.HEAD) {
136 headColumnDescriptions.put(column.getName(), column);
137 }
138 }
139 }
140
141 public ColumnDescription getHeadColumnDescription() {
142 if (headColumnDescriptions == null) {
143 createHeadColumnDescriptions();
144 }
145 return headColumnDescriptions.get(headColumnDescriptions.firstKey());
146 }
147
148 public SortedMap<String,ColumnDescription> getHeadColumnDescriptions() {
149 if (headColumnDescriptions == null) {
150 createHeadColumnDescriptions();
151 }
152 return headColumnDescriptions;
153 }
154
155 protected void createDependencyEdgeLabelSymbolTables() {
156 dependencyEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>();
157 for (ColumnDescription column : columnDescriptions) {
158 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
159 dependencyEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable());
160 }
161 }
162 }
163
164 public SortedMap<String,SymbolTable> getDependencyEdgeLabelSymbolTables() {
165 if (dependencyEdgeLabelSymbolTables == null) {
166 createDependencyEdgeLabelSymbolTables();
167 }
168 return dependencyEdgeLabelSymbolTables;
169 }
170
171 protected void createDependencyEdgeLabelColumnDescriptions() {
172 dependencyEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>();
173 for (ColumnDescription column : columnDescriptions) {
174 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
175 dependencyEdgeLabelColumnDescriptions.put(column.getName(), column);
176 }
177 }
178 }
179
180 public SortedMap<String,ColumnDescription> getDependencyEdgeLabelColumnDescriptions() {
181 if (dependencyEdgeLabelColumnDescriptions == null) {
182 createDependencyEdgeLabelColumnDescriptions();
183 }
184 return dependencyEdgeLabelColumnDescriptions;
185 }
186
187
188
189 protected void createPhraseStructureEdgeLabelSymbolTables() {
190 phraseStructureEdgeLabelSymbolTables = new TreeMap<String, SymbolTable>();
191 for (ColumnDescription column : columnDescriptions) {
192 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
193 phraseStructureEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable());
194 }
195 }
196 }
197
198 public SortedMap<String,SymbolTable> getPhraseStructureEdgeLabelSymbolTables() {
199 if (phraseStructureEdgeLabelSymbolTables == null) {
200 createPhraseStructureEdgeLabelSymbolTables();
201 }
202 return phraseStructureEdgeLabelSymbolTables;
203 }
204
205 protected void createPhraseStructureEdgeLabelColumnDescriptions() {
206 phraseStructureEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>();
207 for (ColumnDescription column : columnDescriptions) {
208 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
209 phraseStructureEdgeLabelColumnDescriptions.put(column.getName(), column);
210 }
211 }
212 }
213
214 public SortedMap<String,ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptions() {
215 if (phraseStructureEdgeLabelColumnDescriptions == null) {
216 createPhraseStructureEdgeLabelColumnDescriptions();
217 }
218 return phraseStructureEdgeLabelColumnDescriptions;
219 }
220
221 protected void createPhraseStructureNodeLabelSymbolTables() {
222 phraseStructureNodeLabelSymbolTables = new TreeMap<String,SymbolTable>();
223 for (ColumnDescription column : columnDescriptions) {
224 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) {
225 phraseStructureNodeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable());
226 }
227 }
228 }
229
230 public SortedMap<String,SymbolTable> getPhraseStructureNodeLabelSymbolTables() {
231 if (phraseStructureNodeLabelSymbolTables == null) {
232 createPhraseStructureNodeLabelSymbolTables();
233 }
234 return phraseStructureNodeLabelSymbolTables;
235 }
236
237 protected void createPhraseStructureNodeLabelColumnDescriptions() {
238 phraseStructureNodeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>();
239 for (ColumnDescription column : columnDescriptions) {
240 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) {
241 phraseStructureNodeLabelColumnDescriptions.put(column.getName(), column);
242 }
243 }
244 }
245
246 public SortedMap<String,ColumnDescription> getPhraseStructureNodeLabelColumnDescriptions() {
247 if (phraseStructureNodeLabelColumnDescriptions == null) {
248 createPhraseStructureNodeLabelColumnDescriptions();
249 }
250 return phraseStructureNodeLabelColumnDescriptions;
251 }
252
253 protected void createSecondaryEdgeLabelSymbolTables() {
254 secondaryEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>();
255 for (ColumnDescription column : columnDescriptions) {
256 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
257 secondaryEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable());
258 }
259 }
260 }
261
262 public SortedMap<String,SymbolTable> getSecondaryEdgeLabelSymbolTables() {
263 if (secondaryEdgeLabelSymbolTables == null) {
264 createSecondaryEdgeLabelSymbolTables();
265 }
266 return secondaryEdgeLabelSymbolTables;
267 }
268
269 protected void createSecondaryEdgeLabelColumnDescriptions() {
270 secondaryEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>();
271 for (ColumnDescription column : columnDescriptions) {
272 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
273 secondaryEdgeLabelColumnDescriptions.put(column.getName(), column);
274 }
275 }
276 }
277
278 public SortedMap<String,ColumnDescription> getSecondaryEdgeLabelColumnDescriptions() {
279 if (secondaryEdgeLabelColumnDescriptions == null) {
280 createSecondaryEdgeLabelColumnDescriptions();
281 }
282 return secondaryEdgeLabelColumnDescriptions;
283 }
284
285 protected void createInputSymbolTables() {
286 inputSymbolTables = new TreeMap<String,SymbolTable>();
287 for (ColumnDescription column : columnDescriptions) {
288 if (column.getCategory() == ColumnDescription.INPUT) {
289 inputSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable());
290 }
291 }
292 }
293
294 public SortedMap<String,SymbolTable> getInputSymbolTables() {
295 if (inputSymbolTables == null) {
296 createInputSymbolTables();
297 }
298 return inputSymbolTables;
299 }
300
301 protected void createInputColumnDescriptions() {
302 inputColumnDescriptions = new TreeMap<String,ColumnDescription>();
303 for (ColumnDescription column : columnDescriptions) {
304 if (column.getCategory() == ColumnDescription.INPUT) {
305 inputColumnDescriptions.put(column.getName(), column);
306 }
307 }
308 }
309
310 public SortedMap<String,ColumnDescription> getInputColumnDescriptions() {
311 if (inputColumnDescriptions == null) {
312 createInputColumnDescriptions();
313 }
314 return inputColumnDescriptions;
315 }
316
317 protected void createIgnoreColumnDescriptions() {
318 ignoreColumnDescriptions = new TreeMap<String,ColumnDescription>();
319 for (ColumnDescription column : columnDescriptions) {
320 if (column.getCategory() == ColumnDescription.IGNORE) {
321 // if (column.getType() == ColumnDescription.IGNORE) {
322 ignoreColumnDescriptions.put(column.getName(), column);
323 }
324 }
325 }
326
327 public SortedMap<String,ColumnDescription> getIgnoreColumnDescriptions() {
328 if (ignoreColumnDescriptions == null) {
329 createIgnoreColumnDescriptions();
330 }
331 return ignoreColumnDescriptions;
332 }
333
334 public SortedSet<ColumnDescription> getHeadColumnDescriptionSet() {
335 if (headColumnDescriptionSet == null) {
336 headColumnDescriptionSet = new TreeSet<ColumnDescription>();
337 for (ColumnDescription column : columnDescriptions) {
338 if (column.getCategory() == ColumnDescription.HEAD) {
339 headColumnDescriptionSet.add(column);
340 }
341 }
342 }
343 return headColumnDescriptionSet;
344 }
345
346 public SortedSet<ColumnDescription> getDependencyEdgeLabelColumnDescriptionSet() {
347 if (dependencyEdgeLabelColumnDescriptionSet == null) {
348 dependencyEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>();
349 for (ColumnDescription column : columnDescriptions) {
350 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
351 dependencyEdgeLabelColumnDescriptionSet.add(column);
352 }
353 }
354 }
355 return dependencyEdgeLabelColumnDescriptionSet;
356 }
357
358 public SortedSet<ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptionSet() {
359 if (phraseStructureEdgeLabelColumnDescriptionSet == null) {
360 phraseStructureEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>();
361 for (ColumnDescription column : columnDescriptions) {
362 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
363 phraseStructureEdgeLabelColumnDescriptionSet.add(column);
364 }
365 }
366 }
367 return phraseStructureEdgeLabelColumnDescriptionSet;
368 }
369
370 public SortedSet<ColumnDescription> getPhraseStructureNodeLabelColumnDescriptionSet() {
371 if (phraseStructureNodeLabelColumnDescriptionSet == null) {
372 phraseStructureNodeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>();
373 for (ColumnDescription column : columnDescriptions) {
374 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) {
375 phraseStructureNodeLabelColumnDescriptionSet.add(column);
376 }
377 }
378 }
379 return phraseStructureNodeLabelColumnDescriptionSet;
380 }
381
382 public SortedSet<ColumnDescription> getSecondaryEdgeLabelColumnDescriptionSet() {
383 if (secondaryEdgeLabelColumnDescriptionSet == null) {
384 secondaryEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>();
385 for (ColumnDescription column : columnDescriptions) {
386 if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL) {
387 secondaryEdgeLabelColumnDescriptionSet.add(column);
388 }
389 }
390 }
391 return secondaryEdgeLabelColumnDescriptionSet;
392 }
393
394 public SortedSet<ColumnDescription> getInputColumnDescriptionSet() {
395 if (inputColumnDescriptionSet == null) {
396 inputColumnDescriptionSet = new TreeSet<ColumnDescription>();
397 for (ColumnDescription column : columnDescriptions) {
398 if (column.getCategory() == ColumnDescription.INPUT) {
399 inputColumnDescriptionSet.add(column);
400 }
401 }
402 }
403 return inputColumnDescriptionSet;
404 }
405
406 public SortedSet<ColumnDescription> getIgnoreColumnDescriptionSet() {
407 if (ignoreColumnDescriptionSet == null) {
408 ignoreColumnDescriptionSet = new TreeSet<ColumnDescription>();
409 for (ColumnDescription column : columnDescriptions) {
410 if (column.getCategory() == ColumnDescription.IGNORE) {
411 ignoreColumnDescriptionSet.add(column);
412 }
413 }
414 }
415 return ignoreColumnDescriptionSet;
416 }
417
418 public SymbolTableHandler getSymbolTables() {
419 return symbolTables;
420 }
421
422 public String toString() {
423 final StringBuilder sb = new StringBuilder();
424 for (ColumnDescription column : columnDescriptions) {
425 sb.append(column);
426 sb.append('\n');
427 }
428 return sb.toString();
429 }
430 }