001 package org.maltparser.core.syntaxgraph.reader;
002
003 import java.io.File;
004
005 import org.maltparser.core.config.ConfigurationDir;
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.flow.FlowChartInstance;
008 import org.maltparser.core.flow.item.ChartItem;
009 import org.maltparser.core.flow.spec.ChartItemSpecification;
010 import org.maltparser.core.helper.URLFinder;
011 import org.maltparser.core.io.dataformat.DataFormatException;
012 import org.maltparser.core.io.dataformat.DataFormatInstance;
013 import org.maltparser.core.io.dataformat.DataFormatManager;
014 import org.maltparser.core.options.OptionManager;
015 import org.maltparser.core.symbol.SymbolTableHandler;
016 import org.maltparser.core.syntaxgraph.TokenStructure;
017
018 public class ReadChartItem extends ChartItem {
019 private String idName;
020 private String inputFormatName;
021 private String inputFileName;
022 private String inputCharSet;
023 private String readerOptions;
024 private int iterations;
025 private Class<? extends SyntaxGraphReader> graphReaderClass;
026
027 private String nullValueStrategy;
028
029 private SyntaxGraphReader reader;
030 private String targetName;
031 private String optiongroupName;
032 private DataFormatInstance inputDataFormatInstance;
033 private TokenStructure cachedGraph = null;
034
035 public ReadChartItem() { super(); }
036
037 public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
038 super.initialize(flowChartinstance, chartItemSpecification);
039
040 for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
041 if (key.equals("id")) {
042 idName = chartItemSpecification.getChartItemAttributes().get(key);
043 } else if (key.equals("target")) {
044 targetName = chartItemSpecification.getChartItemAttributes().get(key);
045 } else if (key.equals("optiongroup")) {
046 optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
047 }
048 }
049
050 if (idName == null) {
051 idName = getChartElement("read").getAttributes().get("id").getDefaultValue();
052 } else if (targetName == null) {
053 targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
054 } else if (optiongroupName == null) {
055 optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
056 }
057
058 setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
059 setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
060 setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
061 setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
062 if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) {
063 setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations"));
064 } else {
065 setIterations(1);
066 }
067 setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
068
069 setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
070
071 initInput(getNullValueStrategy());
072 initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations);
073 }
074
075 public int preprocess(int signal) throws MaltChainedException {
076 return signal;
077 }
078
079 public int process(int signal) throws MaltChainedException {
080 if (cachedGraph == null) {
081 cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
082 }
083 int prevIterationCounter = reader.getIterationCounter();
084 boolean moreInput = reader.readSentence(cachedGraph);
085 if (!moreInput) {
086 return ChartItem.TERMINATE;
087 } else if (prevIterationCounter < reader.getIterationCounter()) {
088 return ChartItem.NEWITERATION;
089 }
090 return ChartItem.CONTINUE;
091 // return continueNextSentence && moreInput;
092 }
093
094 public int postprocess(int signal) throws MaltChainedException {
095 return signal;
096 }
097
098 public void terminate() throws MaltChainedException {
099 if (reader != null) {
100 reader.close();
101 reader = null;
102 }
103 cachedGraph = null;
104 inputDataFormatInstance = null;
105 }
106
107 public String getInputFormatName() {
108 if (inputFormatName == null) {
109 return "/appdata/dataformat/conllx.xml";
110 }
111 return inputFormatName;
112 }
113
114 public void setInputFormatName(String inputFormatName) {
115 this.inputFormatName = inputFormatName;
116 }
117
118 public String getInputFileName() {
119 if (inputFileName == null) {
120 return "/dev/stdin";
121 }
122 return inputFileName;
123 }
124
125 public void setInputFileName(String inputFileName) {
126 this.inputFileName = inputFileName;
127 }
128
129 public String getInputCharSet() {
130 if (inputCharSet == null) {
131 return "UTF-8";
132 }
133 return inputCharSet;
134 }
135
136 public void setInputCharSet(String inputCharSet) {
137 this.inputCharSet = inputCharSet;
138 }
139
140 public String getReaderOptions() {
141 if (readerOptions == null) {
142 return "";
143 }
144 return readerOptions;
145 }
146
147 public void setReaderOptions(String readerOptions) {
148 this.readerOptions = readerOptions;
149 }
150
151
152 public int getIterations() {
153 return iterations;
154 }
155
156 public void setIterations(int iterations) {
157 this.iterations = iterations;
158 }
159
160 public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
161 return graphReaderClass;
162 }
163
164 public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
165 try {
166 if (graphReaderClass != null) {
167 this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
168 }
169 } catch (ClassCastException e) {
170 throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
171 }
172 }
173
174 public String getNullValueStrategy() {
175 if (nullValueStrategy == null) {
176 return "one";
177 }
178 return nullValueStrategy;
179 }
180
181 public void setNullValueStrategy(String nullValueStrategy) {
182 this.nullValueStrategy = nullValueStrategy;
183 }
184
185 public String getTargetName() {
186 return targetName;
187 }
188
189 public void setTargetName(String targetName) {
190 this.targetName = targetName;
191 }
192
193 public SyntaxGraphReader getReader() {
194 return reader;
195 }
196
197 public DataFormatInstance getInputDataFormatInstance() {
198 return inputDataFormatInstance;
199 }
200
201 public void initInput(String nullValueStategy) throws MaltChainedException {
202 ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
203 DataFormatManager dataFormatManager = configDir.getDataFormatManager();
204 SymbolTableHandler symbolTables = configDir.getSymbolTables();
205 inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy);
206 configDir.addDataFormatInstance(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
207
208 }
209
210 public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException {
211 try {
212 final URLFinder f = new URLFinder();
213 reader = syntaxGraphReader.newInstance();
214 if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
215 reader.open(System.in, inputCharSet);
216 } else if (new File(inputFile).exists()) {
217 reader.setNIterations(iterations);
218 reader.open(inputFile, inputCharSet);
219 } else {
220 reader.setNIterations(iterations);
221 reader.open(f.findURL(inputFile), inputCharSet);
222 }
223 reader.setDataFormatInstance(inputDataFormatInstance);
224 reader.setOptions(readerOptions);
225 } catch (InstantiationException e) {
226 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
227 } catch (IllegalAccessException e) {
228 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
229 }
230 }
231
232 public boolean equals(Object obj) {
233 if (this == obj)
234 return true;
235 if (obj == null)
236 return false;
237 if (getClass() != obj.getClass())
238 return false;
239 return obj.toString().equals(this.toString());
240 }
241
242 public int hashCode() {
243 return 217 + (null == toString() ? 0 : toString().hashCode());
244 }
245
246 public String toString() {
247 final StringBuilder sb = new StringBuilder();
248 sb.append(" read ");
249 sb.append("id:");sb.append(idName);
250 sb.append(' ');
251 sb.append("target:");
252 sb.append(targetName);
253 sb.append(' ');
254 sb.append("optiongroup:");
255 sb.append(optiongroupName);
256 return sb.toString();
257 }
258 }