package org.apache.nifi.processors.standard;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.util.StringUtils;

@CapabilityDescription("Counts various metrics on incoming text. The requested results will be recorded as attributes. The resulting flowfile will not have its content modified.")
@SupportsBatching
@WritesAttributes({@WritesAttribute(attribute = "text.line.count", description = "The number of lines of text present in the FlowFile content"), @WritesAttribute(attribute = CountText.TEXT_LINE_NONEMPTY_COUNT, description = "The number of lines of text (with at least one non-whitespace character) present in the original FlowFile"), @WritesAttribute(attribute = CountText.TEXT_WORD_COUNT, description = "The number of words present in the original FlowFile"), @WritesAttribute(attribute = CountText.TEXT_CHARACTER_COUNT, description = "The number of characters (given the specified character encoding) present in the original FlowFile")})
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"count", "text", "line", "word", "character"})
@SeeAlso({SplitText.class})
@SideEffectFree
/* loaded from: input_file:org/apache/nifi/processors/standard/CountText.class */
public class CountText extends AbstractProcessor {
    public static final String TEXT_LINE_COUNT = "text.line.count";
    public static final String TEXT_LINE_NONEMPTY_COUNT = "text.line.nonempty.count";
    public static final String TEXT_WORD_COUNT = "text.word.count";
    public static final String TEXT_CHARACTER_COUNT = "text.character.count";
    private volatile boolean countLines;
    private volatile boolean countLinesNonEmpty;
    private volatile boolean countWords;
    private volatile boolean countCharacters;
    private volatile boolean splitWordsOnSymbols;
    private volatile boolean adjustImmediately;
    private volatile String characterEncoding = StandardCharsets.UTF_8.name();
    private static final List<Charset> STANDARD_CHARSETS = Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.US_ASCII, StandardCharsets.ISO_8859_1, StandardCharsets.UTF_16, StandardCharsets.UTF_16LE, StandardCharsets.UTF_16BE);
    private static final Pattern SYMBOL_PATTERN = Pattern.compile("[\\s-\\._]");
    private static final Pattern WHITESPACE_ONLY_PATTERN = Pattern.compile("\\s");
    public static final PropertyDescriptor TEXT_LINE_COUNT_PD = new PropertyDescriptor.Builder().name("text-line-count").displayName("Count Lines").description("If enabled, will count the number of lines present in the incoming text.").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("true").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor TEXT_LINE_NONEMPTY_COUNT_PD = new PropertyDescriptor.Builder().name("text-line-nonempty-count").displayName("Count Non-Empty Lines").description("If enabled, will count the number of lines that contain a non-whitespace character present in the incoming text.").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("false").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor TEXT_WORD_COUNT_PD = new PropertyDescriptor.Builder().name("text-word-count").displayName("Count Words").description("If enabled, will count the number of words (alphanumeric character groups bounded by whitespace) present in the incoming text. Common logical delimiters [_-.] do not bound a word unless 'Split Words on Symbols' is true.").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("false").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor TEXT_CHARACTER_COUNT_PD = new PropertyDescriptor.Builder().name("text-character-count").displayName("Count Characters").description("If enabled, will count the number of characters (including whitespace and symbols, but not including newlines and carriage returns) present in the incoming text.").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("false").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor SPLIT_WORDS_ON_SYMBOLS_PD = new PropertyDescriptor.Builder().name("split-words-on-symbols").displayName("Split Words on Symbols").description("If enabled, the word count will identify strings separated by common logical delimiters [ _ - . ] as independent words (ex. split-words-on-symbols = 4 words).").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("false").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final PropertyDescriptor CHARACTER_ENCODING_PD = new PropertyDescriptor.Builder().name("character-encoding").displayName("Character Encoding").description("Specifies a character encoding to use.").required(true).allowableValues(getStandardCharsetNames()).defaultValue(StandardCharsets.UTF_8.displayName()).build();
    public static final PropertyDescriptor ADJUST_IMMEDIATELY = new PropertyDescriptor.Builder().name("ajust-immediately").displayName("Call Immediate Adjustment").description("If true, the counter will be updated immediately, without regard to whether the ProcessSession is commit or rolled back;otherwise, the counter will be incremented only if and when the ProcessSession is committed.").required(true).allowableValues(new String[]{"true", "false"}).defaultValue("false").addValidator(StandardValidators.BOOLEAN_VALIDATOR).build();
    public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success").description("The flowfile contains the original content with one or more attributes added containing the respective counts").build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description("If the flowfile text cannot be counted for some reason, the original file will be routed to this destination and nothing will be routed elsewhere").build();
    private static final List<PropertyDescriptor> properties = Collections.unmodifiableList(Arrays.asList(TEXT_LINE_COUNT_PD, TEXT_LINE_NONEMPTY_COUNT_PD, TEXT_WORD_COUNT_PD, TEXT_CHARACTER_COUNT_PD, SPLIT_WORDS_ON_SYMBOLS_PD, CHARACTER_ENCODING_PD, ADJUST_IMMEDIATELY));
    private static final Set<Relationship> relationships = Collections.unmodifiableSet(new HashSet(Arrays.asList(REL_SUCCESS, REL_FAILURE)));

    private static Set<String> getStandardCharsetNames() {
        return (Set) STANDARD_CHARSETS.stream().map(charset -> {
            return charset.displayName();
        }).collect(Collectors.toSet());
    }

    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @OnScheduled
    public void onSchedule(ProcessContext processContext) {
        this.countLines = processContext.getProperty(TEXT_LINE_COUNT_PD).isSet() ? processContext.getProperty(TEXT_LINE_COUNT_PD).asBoolean().booleanValue() : false;
        this.countLinesNonEmpty = processContext.getProperty(TEXT_LINE_NONEMPTY_COUNT_PD).isSet() ? processContext.getProperty(TEXT_LINE_NONEMPTY_COUNT_PD).asBoolean().booleanValue() : false;
        this.countWords = processContext.getProperty(TEXT_WORD_COUNT_PD).isSet() ? processContext.getProperty(TEXT_WORD_COUNT_PD).asBoolean().booleanValue() : false;
        this.countCharacters = processContext.getProperty(TEXT_CHARACTER_COUNT_PD).isSet() ? processContext.getProperty(TEXT_CHARACTER_COUNT_PD).asBoolean().booleanValue() : false;
        this.splitWordsOnSymbols = processContext.getProperty(SPLIT_WORDS_ON_SYMBOLS_PD).isSet() ? processContext.getProperty(SPLIT_WORDS_ON_SYMBOLS_PD).asBoolean().booleanValue() : false;
        this.adjustImmediately = processContext.getProperty(ADJUST_IMMEDIATELY).isSet() ? processContext.getProperty(ADJUST_IMMEDIATELY).asBoolean().booleanValue() : false;
        this.characterEncoding = processContext.getProperty(CHARACTER_ENCODING_PD).getValue();
    }

    public void onTrigger(ProcessContext processContext, ProcessSession processSession) throws ProcessException {
        FlowFile flowFile = processSession.get();
        if (flowFile == null) {
            return;
        }
        AtomicBoolean atomicBoolean = new AtomicBoolean();
        AtomicInteger atomicInteger = new AtomicInteger(0);
        AtomicInteger atomicInteger2 = new AtomicInteger(0);
        AtomicInteger atomicInteger3 = new AtomicInteger(0);
        AtomicInteger atomicInteger4 = new AtomicInteger(0);
        processSession.read(flowFile, inputStream -> {
            long nanoTime = System.nanoTime();
            try {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, this.characterEncoding));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    if (this.countLines) {
                        atomicInteger.incrementAndGet();
                    }
                    if (this.countLinesNonEmpty && readLine.trim().length() > 0) {
                        atomicInteger2.incrementAndGet();
                    }
                    if (this.countWords) {
                        atomicInteger3.addAndGet(countWordsInLine(readLine, this.splitWordsOnSymbols));
                    }
                    if (this.countCharacters) {
                        atomicInteger4.addAndGet(readLine.length());
                    }
                }
                long nanoTime2 = System.nanoTime();
                if (getLogger().isDebugEnabled()) {
                    long j = nanoTime2 - nanoTime;
                    getLogger().debug("Computed metrics in {} nanoseconds ({} seconds).", new Object[]{Long.valueOf(j), new DecimalFormat("#.###").format(j / 1.0E9d)});
                }
                if (getLogger().isInfoEnabled()) {
                    getLogger().info(generateMetricsMessage(atomicInteger.get(), atomicInteger2.get(), atomicInteger3.get(), atomicInteger4.get()));
                }
                processSession.adjustCounter("Lines Counted", atomicInteger.get(), this.adjustImmediately);
                processSession.adjustCounter("Lines (non-empty) Counted", atomicInteger2.get(), this.adjustImmediately);
                processSession.adjustCounter("Words Counted", atomicInteger3.get(), this.adjustImmediately);
                processSession.adjustCounter("Characters Counted", atomicInteger4.get(), this.adjustImmediately);
            } catch (IOException e) {
                atomicBoolean.set(true);
                getLogger().error("Routing to failure.", e);
            }
        });
        if (atomicBoolean.get()) {
            processSession.transfer(flowFile, REL_FAILURE);
            return;
        }
        HashMap hashMap = new HashMap();
        if (this.countLines) {
            hashMap.put("text.line.count", String.valueOf(atomicInteger.get()));
        }
        if (this.countLinesNonEmpty) {
            hashMap.put(TEXT_LINE_NONEMPTY_COUNT, String.valueOf(atomicInteger2.get()));
        }
        if (this.countWords) {
            hashMap.put(TEXT_WORD_COUNT, String.valueOf(atomicInteger3.get()));
        }
        if (this.countCharacters) {
            hashMap.put(TEXT_CHARACTER_COUNT, String.valueOf(atomicInteger4.get()));
        }
        processSession.transfer(processSession.putAllAttributes(flowFile, hashMap), REL_SUCCESS);
    }

    private String generateMetricsMessage(int i, int i2, int i3, int i4) {
        StringBuilder sb = new StringBuilder("Counted ");
        ArrayList arrayList = new ArrayList();
        if (this.countLines) {
            arrayList.add(i + " lines");
        }
        if (this.countLinesNonEmpty) {
            arrayList.add(i2 + " non-empty lines");
        }
        if (this.countWords) {
            arrayList.add(i3 + " words");
        }
        if (this.countCharacters) {
            arrayList.add(i4 + " characters");
        }
        sb.append(StringUtils.join(arrayList, ", "));
        return sb.toString();
    }

    int countWordsInLine(String str, boolean z) throws IOException {
        if (str == null || str.trim().length() == 0) {
            return 0;
        }
        Stream<String> filter = (z ? SYMBOL_PATTERN : WHITESPACE_ONLY_PATTERN).splitAsStream(str).filter(str2 -> {
            return !str2.trim().isEmpty();
        });
        if (!getLogger().isDebugEnabled()) {
            return Math.toIntExact(filter.count());
        }
        List list = (List) filter.collect(Collectors.toList());
        getLogger().debug("Split [{}] to [{}] ({})", new Object[]{str, StringUtils.join(list, ", "), Integer.valueOf(list.size())});
        return Math.toIntExact(list.size());
    }

    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return properties;
    }
}
