SubstrFunctionExtension.java
/*
* Copyright (c) 2016, WSO2 Inc. (http://www.wso2.org) All Rights Reserved.
*
* WSO2 Inc. licenses this file to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file except
* in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.wso2.extension.siddhi.execution.string;
import org.wso2.siddhi.annotation.Example;
import org.wso2.siddhi.annotation.Extension;
import org.wso2.siddhi.annotation.Parameter;
import org.wso2.siddhi.annotation.ReturnAttribute;
import org.wso2.siddhi.annotation.util.DataType;
import org.wso2.siddhi.core.config.SiddhiAppContext;
import org.wso2.siddhi.core.exception.SiddhiAppRuntimeException;
import org.wso2.siddhi.core.executor.ConstantExpressionExecutor;
import org.wso2.siddhi.core.executor.ExpressionExecutor;
import org.wso2.siddhi.core.executor.function.FunctionExecutor;
import org.wso2.siddhi.core.util.config.ConfigReader;
import org.wso2.siddhi.query.api.definition.Attribute;
import org.wso2.siddhi.query.api.exception.SiddhiAppValidationException;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* substr(sourceText, beginIndex) or substr(sourceText, beginIndex, length) or substr(sourceText, regex)
* or substr(sourceText, regex, groupNumber)
* Returns a new string that is a substring of this string.
* Accept Type(s): (STRING,INT) or (STRING,INT,INT) or (STRING,STRING) or (STRING,STRING,INT)
* Return Type(s): STRING
*/
@Extension(
name = "substr",
namespace = "str",
description = "This returns a new string that is a substring of this string",
parameters = {
@Parameter(name = "input.string",
description = "The input string to be processed.",
type = {DataType.STRING}),
@Parameter(name = "begin.index",
description = "Starting index to consider for the substring.",
type = {DataType.INT}),
@Parameter(name = "length",
description = "The length of the substring.",
type = {DataType.INT}),
@Parameter(name = "regex",
description = "The regular expression that should be matched with the input string..",
type = {DataType.STRING}),
@Parameter(name = "group.number",
description = "The regex group number",
type = {DataType.INT})
},
returnAttributes = @ReturnAttribute(
description = "This returns a new string that is a substring of the `input.string`.",
type = {DataType.STRING}),
examples = {
@Example(description = "This outputs the substring based on the given `begin.index`. In this " +
"scenario, the output is \"efghiJ KLMN\".", syntax = "substr(\"AbCDefghiJ KLMN\", 4)"),
@Example(description = "This outputs the substring based on the given `begin.index` and length. In " +
"this scenario, the output is \"CDef\".", syntax = "substr(\"AbCDefghiJ KLMN\", 2, 4) "),
@Example(description = "This outputs the substring by applying the regex. In this scenario, the " +
"output is \"WSO2D efghiJ KLMN\".", syntax = "substr(\"WSO2D efghiJ KLMN\", '^WSO2(.*)')"),
@Example(description = "This outputs the substring by applying the regex and considering the " +
"`group.number`. In this scenario, the output is \" ello\".",
syntax = "substr(\"WSO2 cep WSO2 XX E hi hA WSO2 heAllo\", 'WSO2(.*)A(.*)', 2)")
}
)
public class SubstrFunctionExtension extends FunctionExecutor {
Attribute.Type returnType = Attribute.Type.STRING;
//state-variables
private boolean isRegexConstant = false;
private String regexConstant;
private Pattern patternConstant;
private SubstrType substrType;
@Override
protected void init(ExpressionExecutor[] attributeExpressionExecutors, ConfigReader configReader,
SiddhiAppContext siddhiAppContext) {
if (attributeExpressionExecutors[0].getReturnType() != Attribute.Type.STRING) {
throw new SiddhiAppValidationException("Invalid parameter type found for the first argument of " +
"str:substr() function, " + "required " + Attribute.Type.STRING + ", but found " +
attributeExpressionExecutors[0].getReturnType().toString());
}
if (attributeExpressionExecutors.length == 2) {
if (attributeExpressionExecutors[1].getReturnType() == Attribute.Type.INT) {
substrType = SubstrType.ONE;
} else if (attributeExpressionExecutors[1].getReturnType() == Attribute.Type.STRING) {
substrType = SubstrType.THREE;
if (attributeExpressionExecutors[1] instanceof ConstantExpressionExecutor) {
isRegexConstant = true;
regexConstant = (String) ((ConstantExpressionExecutor) attributeExpressionExecutors[1]).getValue();
patternConstant = Pattern.compile(regexConstant);
}
} else {
throw new SiddhiAppValidationException("Invalid parameter type found for the second argument of " +
"str:substr() function, " + "required " + Attribute.Type.STRING + " or " + Attribute.Type.INT +
", but found " + attributeExpressionExecutors[1].getReturnType().toString());
}
} else if (attributeExpressionExecutors.length == 3) {
if (attributeExpressionExecutors[2].getReturnType() != Attribute.Type.INT) {
throw new SiddhiAppValidationException("Invalid parameter type found for the third argument of " +
"str:substr() function, " + "required " + Attribute.Type.INT + ", but found " +
attributeExpressionExecutors[2].getReturnType().toString());
}
if (attributeExpressionExecutors[1].getReturnType() == Attribute.Type.INT) {
substrType = SubstrType.TWO;
} else if (attributeExpressionExecutors[1].getReturnType() == Attribute.Type.STRING) {
substrType = SubstrType.FOUR;
if (attributeExpressionExecutors[1] instanceof ConstantExpressionExecutor) {
isRegexConstant = true;
regexConstant = (String) ((ConstantExpressionExecutor) attributeExpressionExecutors[1]).getValue();
patternConstant = Pattern.compile(regexConstant);
}
} else {
throw new SiddhiAppValidationException("Invalid parameter type found for the second argument of " +
"str:substr() function, " + "required " + Attribute.Type.STRING + " or " + Attribute.Type.INT +
", but found " + attributeExpressionExecutors[1].getReturnType().toString());
}
} else {
throw new SiddhiAppValidationException("Invalid no of Arguments passed to str:substr() function, " +
"required 2 or 3, but found "
+ attributeExpressionExecutors.length);
}
}
@Override
protected Object execute(Object[] data) {
int beginIndex;
int length;
int groupNo;
String regex;
String output = "";
Pattern pattern;
Matcher matcher;
if (data[0] == null) {
throw new SiddhiAppRuntimeException("Invalid input given to str:substr() function. " +
"First argument cannot be null");
}
if (data[1] == null) {
throw new SiddhiAppRuntimeException("Invalid input given to str:substr() function. " +
"Second argument cannot be null");
}
String source = (String) data[0];
switch (substrType) {
case ONE:
beginIndex = (Integer) data[1];
output = source.substring(beginIndex);
break;
case TWO:
if (data[2] == null) {
throw new SiddhiAppRuntimeException("Invalid input given to str:substr() function. " +
"Third argument cannot be null");
}
beginIndex = (Integer) data[1];
length = (Integer) data[2];
output = source.substring(beginIndex, (beginIndex + length));
break;
case THREE:
if (!isRegexConstant) {
regex = (String) data[1];
pattern = Pattern.compile(regex);
matcher = pattern.matcher(source);
if (matcher.find()) {
output = matcher.group(0);
}
} else {
matcher = patternConstant.matcher(source);
if (matcher.find()) {
output = matcher.group(0);
}
}
break;
case FOUR:
if (data[2] == null) {
throw new SiddhiAppRuntimeException("Invalid input given to str:substr() function. " +
"Third argument cannot be null");
}
groupNo = (Integer) data[2];
if (!isRegexConstant) {
regex = (String) data[1];
pattern = Pattern.compile(regex);
matcher = pattern.matcher(source);
if (matcher.find()) {
output = matcher.group(groupNo);
}
} else {
matcher = patternConstant.matcher(source);
if (matcher.find()) {
output = matcher.group(groupNo);
}
}
break;
}
return output;
}
@Override
protected Object execute(Object data) {
return null; //Since the substr function takes in at least 2 parameters, this method does not get called.
// Hence, not implemented.
}
@Override
public void start() {
//Nothing to start.
}
@Override
public void stop() {
//Nothing to stop.
}
@Override
public Attribute.Type getReturnType() {
return returnType;
}
@Override
public Map<String, Object> currentState() {
return null; //No need to maintain a state.
}
@Override
public void restoreState(Map<String, Object> map) {
}
/*
* Sub-string Types are as follows:
* ONE: str:substr(<string sourceText> , <int beginIndex>)
* TWO: str:substr(<string sourceText> , <int beginIndex>, <int length>)
* THREE: str:substr(<string sourceText> , <string regex>)
* FOUR: str:substr(<string sourceText> , <string regex>, <int groupNumber>)
* */
private enum SubstrType {
ONE, TWO, THREE, FOUR
}
}