package org.wso2.extension.siddhi.execution.tokenizer;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.wso2.siddhi.annotation.Example;
import org.wso2.siddhi.annotation.Extension;
import org.wso2.siddhi.annotation.Parameter;
import org.wso2.siddhi.annotation.util.DataType;
import org.wso2.siddhi.core.config.SiddhiAppContext;
import org.wso2.siddhi.core.event.ComplexEventChunk;
import org.wso2.siddhi.core.event.stream.StreamEvent;
import org.wso2.siddhi.core.event.stream.StreamEventCloner;
import org.wso2.siddhi.core.event.stream.populater.ComplexEventPopulater;
import org.wso2.siddhi.core.exception.SiddhiAppCreationException;
import org.wso2.siddhi.core.executor.ExpressionExecutor;
import org.wso2.siddhi.core.query.processor.Processor;
import org.wso2.siddhi.core.query.processor.stream.StreamProcessor;
import org.wso2.siddhi.core.util.config.ConfigReader;
import org.wso2.siddhi.query.api.definition.AbstractDefinition;
import org.wso2.siddhi.query.api.definition.Attribute;

@Extension(name = "tokenize", namespace = "text", description = "This splits a string into words", parameters = {@Parameter(name = "text", description = "The input text which should be split.", type = {DataType.STRING})}, examples = {@Example(syntax = "define stream inputStream (text string);\n@info(name = 'query1')\nfrom inputStream#text:tokenize(text)\nselect text\ninsert into outputStream;", description = "This query performs tokenization for the given string.")})
/* loaded from: input_file:org/wso2/extension/siddhi/execution/tokenizer/TweetTextTokenizer.class */
public class TweetTextTokenizer extends StreamProcessor {
    private static final Logger log = Logger.getLogger(TweetTextTokenizer.class);
    private List<String> wordList = new ArrayList();

    protected void process(ComplexEventChunk<StreamEvent> complexEventChunk, Processor processor, StreamEventCloner streamEventCloner, ComplexEventPopulater complexEventPopulater) {
        Pattern compile = Pattern.compile("[\\s+'“”‘’\\\".?!,:;&]|[<>«»{}\\(\\)\\[\\]]|\\d+:\\d+|\\d+\\.\\d+|[♫♪]+");
        String str = "(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]|@(.*)|#(.*)|[0-9]+|‼|…";
        while (complexEventChunk.hasNext()) {
            StreamEvent next = complexEventChunk.next();
            for (String str2 : compile.split(removeEmojis((String) this.attributeExpressionExecutors[0].execute(next)).replaceAll(str, ""))) {
                if (!str2.equals("") && isMeaningful(str2)) {
                    complexEventPopulater.populateComplexEvent(next, new Object[]{str2});
                    processor.process(complexEventChunk);
                }
            }
        }
    }

    protected List<Attribute> init(AbstractDefinition abstractDefinition, ExpressionExecutor[] expressionExecutorArr, ConfigReader configReader, SiddhiAppContext siddhiAppContext) {
        if (expressionExecutorArr.length != 1) {
            throw new IllegalArgumentException("Invalid no of arguments passed to text:tokenize() function, required 1, but found " + expressionExecutorArr.length);
        }
        if (expressionExecutorArr[0].getReturnType() != Attribute.Type.STRING) {
            throw new SiddhiAppCreationException("Text should be of type string. But found " + expressionExecutorArr[0].getReturnType());
        }
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(TweetTextTokenizer.class.getResourceAsStream("/words.csv"), StandardCharsets.UTF_8));
            Throwable th = null;
            while (true) {
                try {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        this.wordList.add(readLine);
                    } finally {
                    }
                } catch (Throwable th2) {
                    if (bufferedReader != null) {
                        if (th != null) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th3) {
                                th.addSuppressed(th3);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                    throw th2;
                }
            }
            if (bufferedReader != null) {
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    bufferedReader.close();
                }
            }
        } catch (FileNotFoundException e) {
            log.error("File is not found : " + e.getMessage());
        } catch (IOException e2) {
            log.error("Error occurred while reading file : " + e2.getMessage());
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(new Attribute("token", Attribute.Type.STRING));
        return arrayList;
    }

    public void start() {
    }

    public void stop() {
    }

    public Map<String, Object> currentState() {
        return null;
    }

    public void restoreState(Map<String, Object> map) {
    }

    private boolean isMeaningful(String str) {
        Iterator<String> it = this.wordList.iterator();
        while (it.hasNext()) {
            if (it.next().equalsIgnoreCase(str)) {
                return false;
            }
        }
        return true;
    }

    private String removeEmojis(String str) {
        return Pattern.compile("[��-��]|[��-��]|[☀-⟿]", 194).matcher(str).replaceAll("");
    }
}
