001 package org.maltparser.core.feature.map;
002
003 import java.util.regex.Pattern;
004 import java.util.regex.PatternSyntaxException;
005
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.feature.FeatureException;
008 import org.maltparser.core.feature.function.FeatureFunction;
009 import org.maltparser.core.feature.function.FeatureMapFunction;
010 import org.maltparser.core.feature.value.FeatureValue;
011 import org.maltparser.core.feature.value.FunctionValue;
012 import org.maltparser.core.feature.value.MultipleFeatureValue;
013 import org.maltparser.core.feature.value.SingleFeatureValue;
014 import org.maltparser.core.io.dataformat.ColumnDescription;
015 import org.maltparser.core.io.dataformat.DataFormatInstance;
016 import org.maltparser.core.symbol.SymbolTable;
017 import org.maltparser.core.symbol.SymbolTableHandler;
018
019 /**
020 *
021 *
022 * @author Johan Hall
023 */
024 public class SplitFeature implements FeatureMapFunction {
025 protected FeatureFunction parentFeature;
026 protected MultipleFeatureValue multipleFeatureValue;
027 protected DataFormatInstance dataFormatInstance;
028 protected ColumnDescription column;
029 protected SymbolTable table;
030 protected String separators;
031 protected Pattern separatorsPattern;
032
033 public SplitFeature(DataFormatInstance dataFormatInstance) throws MaltChainedException {
034 super();
035 setDataFormatInstance(dataFormatInstance);
036 multipleFeatureValue = new MultipleFeatureValue(this);
037 }
038
039 public void initialize(Object[] arguments) throws MaltChainedException {
040 if (arguments.length != 2) {
041 throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. ");
042 }
043 if (!(arguments[0] instanceof FeatureFunction)) {
044 throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. ");
045 }
046 if (!(arguments[1] instanceof String)) {
047 throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. ");
048 }
049 setParentFeature((FeatureFunction)arguments[0]);
050 setSeparators((String)arguments[1]);
051 ColumnDescription parentColumn = dataFormatInstance.getColumnDescriptionByName(parentFeature.getSymbolTable().getName());
052 if (parentColumn.getType() != ColumnDescription.STRING) {
053 throw new FeatureException("Could not initialize SplitFeature: the first argument must be a string. ");
054 }
055 setColumn(dataFormatInstance.addInternalColumnDescription("SPLIT_"+parentFeature.getSymbolTable().getName(), parentColumn));
056 setSymbolTable(column.getSymbolTable());
057 // setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable()));
058 }
059
060 public Class<?>[] getParameterTypes() {
061 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class };
062 return paramTypes;
063 }
064
065 public FeatureValue getFeatureValue() {
066 return multipleFeatureValue;
067 }
068
069 public String getSymbol(int code) throws MaltChainedException {
070 return table.getSymbolCodeToString(code);
071 }
072
073 public int getCode(String symbol) throws MaltChainedException {
074 return table.getSymbolStringToCode(symbol);
075 }
076
077 public void update() throws MaltChainedException {
078 multipleFeatureValue.reset();
079 parentFeature.update();
080 FunctionValue value = parentFeature.getFeatureValue();
081 if (value instanceof SingleFeatureValue) {
082 String symbol = ((SingleFeatureValue)value).getSymbol();
083 if (((FeatureValue)value).isNullValue()) {
084 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol);
085 multipleFeatureValue.setNullValue(true);
086 } else {
087 String items[];
088 try {
089 items = separatorsPattern.split(symbol);
090 } catch (PatternSyntaxException e) {
091 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e);
092 }
093 for (int i = 0; i < items.length; i++) {
094 if (items[i].length() > 0) {
095 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
096 }
097 }
098 multipleFeatureValue.setNullValue(false);
099 }
100 } else if (value instanceof MultipleFeatureValue) {
101 if (((MultipleFeatureValue)value).isNullValue()) {
102 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol());
103 multipleFeatureValue.setNullValue(true);
104 } else {
105 for (String symbol : ((MultipleFeatureValue)value).getSymbols()) {
106 String items[];
107 try {
108 items = separatorsPattern.split(symbol);
109 } catch (PatternSyntaxException e) {
110 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e);
111 }
112 for (int i = 0; i < items.length; i++) {
113 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
114 }
115 multipleFeatureValue.setNullValue(false);
116 }
117 }
118 }
119 }
120
121 public void updateCardinality() throws MaltChainedException {
122 // parentFeature.updateCardinality();
123 // multipleFeatureValue.setCardinality(table.getValueCounter());
124 }
125
126 public boolean equals(Object obj) {
127 if (this == obj)
128 return true;
129 if (obj == null)
130 return false;
131 if (getClass() != obj.getClass())
132 return false;
133 return obj.toString().equals(this.toString());
134 }
135
136 public FeatureFunction getParentFeature() {
137 return parentFeature;
138 }
139
140 public void setParentFeature(FeatureFunction parentFeature) {
141 this.parentFeature = parentFeature;
142 }
143
144 public String getSeparators() {
145 return separators;
146 }
147
148 public void setSeparators(String separators) {
149 this.separators = separators;
150 separatorsPattern = Pattern.compile(separators);
151 }
152
153 public SymbolTable getSymbolTable() {
154 return table;
155 }
156
157 public void setSymbolTable(SymbolTable table) {
158 this.table = table;
159 }
160
161 public SymbolTableHandler getTableHandler() {
162 return dataFormatInstance.getSymbolTables();
163 }
164
165 public DataFormatInstance getDataFormatInstance() {
166 return dataFormatInstance;
167 }
168
169 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
170 this.dataFormatInstance = dataFormatInstance;
171 }
172
173 public ColumnDescription getColumn() {
174 return column;
175 }
176
177 protected void setColumn(ColumnDescription column) {
178 this.column = column;
179 }
180
181 public int getType() {
182 return column.getType();
183 }
184
185 public String getMapIdentifier() {
186 return getSymbolTable().getName();
187 }
188
189 public String toString() {
190 final StringBuilder sb = new StringBuilder();
191 sb.append("Split(");
192 sb.append(parentFeature.toString());
193 sb.append(", ");
194 sb.append(separators);
195 sb.append(')');
196 return sb.toString();
197 }
198 }
199