package org.apache.nifi.processors.standard;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.flowfile.attributes.FragmentAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.stream.io.util.TextLineDemarcator;

@CapabilityDescription("Splits a text file into multiple smaller text files on line boundaries limited by maximum number of lines or total size of fragment. Each output split file will contain no more than the configured number of lines or bytes. If both Line Split Count and Maximum Fragment Size are specified, the split occurs at whichever limit is reached first. If the first line of a fragment exceeds the Maximum Fragment Size, that line will be output in a single split file which exceeds the configured maximum size limit. This component also allows one to specify that each split should include a header lines. Header lines can be computed by either specifying the amount of lines that should constitute a header or by using header marker to match against the read lines. If such match happens then the corresponding line will be treated as header. Keep in mind that upon the first failure of header marker match, no more matches will be performed and the rest of the data will be parsed as regular lines for a given split. If after computation of the header there are no more data, the resulting split will consists of only header lines.")
@SupportsBatching
@WritesAttributes({@WritesAttribute(attribute = SplitText.SPLIT_LINE_COUNT, description = "The number of lines of text from the original FlowFile that were copied to this FlowFile"), @WritesAttribute(attribute = "fragment.size", description = "The number of bytes from the original FlowFile that were copied to this FlowFile, including header, if applicable, which is duplicated in each split FlowFile"), @WritesAttribute(attribute = "fragment.identifier", description = "All split FlowFiles produced from the same parent FlowFile will have the same randomly generated UUID added for this attribute"), @WritesAttribute(attribute = "fragment.index", description = "A one-up number that indicates the ordering of the split FlowFiles that were created from a single parent FlowFile"), @WritesAttribute(attribute = "fragment.count", description = "The number of split FlowFiles generated from the parent FlowFile"), @WritesAttribute(attribute = "segment.original.filename ", description = "The filename of the parent FlowFile")})
@EventDriven
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"split", "text"})
@SeeAlso({MergeContent.class})
@SideEffectFree
/* loaded from: input_file:org/apache/nifi/processors/standard/SplitText.class */
public class SplitText extends AbstractProcessor {
    public static final String SPLIT_LINE_COUNT = "text.line.count";
    public static final String FRAGMENT_SIZE = FragmentAttributes.FRAGMENT_SIZE.key();
    public static final String FRAGMENT_ID = FragmentAttributes.FRAGMENT_ID.key();
    public static final String FRAGMENT_INDEX = FragmentAttributes.FRAGMENT_INDEX.key();
    public static final String FRAGMENT_COUNT = FragmentAttributes.FRAGMENT_COUNT.key();
    public static final String SEGMENT_ORIGINAL_FILENAME = FragmentAttributes.SEGMENT_ORIGINAL_FILENAME.key();
    public static final PropertyDescriptor LINE_SPLIT_COUNT = new PropertyDescriptor.Builder().name("Line Split Count").description("The number of lines that will be added to each split file, excluding header lines. A value of zero requires Maximum Fragment Size to be set, and line count will not be considered in determining splits.").required(true).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).build();
    public static final PropertyDescriptor FRAGMENT_MAX_SIZE = new PropertyDescriptor.Builder().name("Maximum Fragment Size").description("The maximum size of each split file, including header lines. NOTE: in the case where a single line exceeds this property (including headers, if applicable), that line will be output in a split of its own which exceeds this Maximum Fragment Size setting.").required(false).addValidator(StandardValidators.DATA_SIZE_VALIDATOR).build();
    public static final PropertyDescriptor HEADER_LINE_COUNT = new PropertyDescriptor.Builder().name("Header Line Count").description("The number of lines that should be considered part of the header; the header lines will be duplicated to all split files").required(true).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).defaultValue("0").build();
    public static final PropertyDescriptor HEADER_MARKER = new PropertyDescriptor.Builder().name("Header Line Marker Characters").description("The first character(s) on the line of the datafile which signifies a header line. This value is ignored when Header Line Count is non-zero. The first line not containing the Header Line Marker Characters and all subsequent lines are considered non-header").required(false).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor REMOVE_TRAILING_NEWLINES = new PropertyDescriptor.Builder().name("Remove Trailing Newlines").description("Whether to remove newlines at the end of each split file. This should be false if you intend to merge the split files later. If this is set to 'true' and a FlowFile is generated that contains only 'empty lines' (i.e., consists only of \r and \n characters), the FlowFile will not be emitted. Note, however, that if header lines are specified, the resultant FlowFile will never be empty as it will consist of the header lines, so a FlowFile may be emitted that contains only the header lines.").required(true).addValidator(StandardValidators.BOOLEAN_VALIDATOR).allowableValues(new String[]{"true", "false"}).defaultValue("true").build();
    public static final Relationship REL_ORIGINAL = new Relationship.Builder().name("original").description("The original input file will be routed to this destination when it has been successfully split into 1 or more files").build();
    public static final Relationship REL_SPLITS = new Relationship.Builder().name("splits").description("The split files will be routed to this destination when an input file is successfully split into 1 or more split files").build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description("If a file cannot be split for some reason, the original file will be routed to this destination and nothing will be routed elsewhere").build();
    private static final List<PropertyDescriptor> properties = Collections.unmodifiableList(Arrays.asList(LINE_SPLIT_COUNT, FRAGMENT_MAX_SIZE, HEADER_LINE_COUNT, HEADER_MARKER, REMOVE_TRAILING_NEWLINES));
    private static final Set<Relationship> relationships = Collections.unmodifiableSet(new HashSet(Arrays.asList(REL_ORIGINAL, REL_SPLITS, REL_FAILURE)));
    private volatile boolean removeTrailingNewLines;
    private volatile long maxSplitSize;
    private volatile int lineCount;
    private volatile int headerLineCount;
    private volatile String headerMarker;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nifi/processors/standard/SplitText$SplitInfo.class */
    public class SplitInfo {
        final long startOffset;
        final long length;
        final long trimmedLength;
        final long lineCount;
        TextLineDemarcator.OffsetInfo remaningOffsetInfo;

        SplitInfo(long j, long j2, long j3, long j4, TextLineDemarcator.OffsetInfo offsetInfo) {
            this.startOffset = j;
            this.length = j2;
            this.lineCount = j4;
            this.remaningOffsetInfo = offsetInfo;
            this.trimmedLength = j3;
        }

        public String toString() {
            return "offset:" + this.startOffset + "; length:" + this.length + "; lineCount:" + this.lineCount;
        }
    }

    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @OnScheduled
    public void onSchedule(ProcessContext processContext) {
        this.removeTrailingNewLines = processContext.getProperty(REMOVE_TRAILING_NEWLINES).isSet() ? processContext.getProperty(REMOVE_TRAILING_NEWLINES).asBoolean().booleanValue() : false;
        this.maxSplitSize = processContext.getProperty(FRAGMENT_MAX_SIZE).isSet() ? processContext.getProperty(FRAGMENT_MAX_SIZE).asDataSize(DataUnit.B).longValue() : Long.MAX_VALUE;
        this.lineCount = processContext.getProperty(LINE_SPLIT_COUNT).asInteger().intValue();
        this.headerLineCount = processContext.getProperty(HEADER_LINE_COUNT).asInteger().intValue();
        this.headerMarker = processContext.getProperty(HEADER_MARKER).getValue();
    }

    public void onTrigger(ProcessContext processContext, ProcessSession processSession) throws ProcessException {
        final FlowFile flowFile = processSession.get();
        if (flowFile == null) {
            return;
        }
        final AtomicBoolean atomicBoolean = new AtomicBoolean();
        final ArrayList arrayList = new ArrayList();
        final AtomicReference atomicReference = new AtomicReference();
        processSession.read(flowFile, new InputStreamCallback() { // from class: org.apache.nifi.processors.standard.SplitText.1
            public void process(InputStream inputStream) throws IOException {
                TextLineDemarcator textLineDemarcator = new TextLineDemarcator(inputStream);
                SplitInfo splitInfo = null;
                long j = 0;
                long nanoTime = System.nanoTime();
                try {
                    if (SplitText.this.headerLineCount > 0) {
                        splitInfo = SplitText.this.computeHeader(textLineDemarcator, 0L, SplitText.this.headerLineCount, null, null);
                        if (splitInfo != null && splitInfo.lineCount < SplitText.this.headerLineCount) {
                            atomicBoolean.set(true);
                            SplitText.this.getLogger().error("Unable to split " + flowFile + " due to insufficient amount of header lines. Required " + SplitText.this.headerLineCount + " but was " + splitInfo.lineCount + ". Routing to failure.");
                        }
                    } else if (SplitText.this.headerMarker != null) {
                        splitInfo = SplitText.this.computeHeader(textLineDemarcator, 0L, Long.MAX_VALUE, SplitText.this.headerMarker.getBytes(StandardCharsets.UTF_8), null);
                    }
                    atomicReference.set(splitInfo);
                } catch (IllegalStateException e) {
                    atomicBoolean.set(true);
                    SplitText.this.getLogger().error(e.getMessage() + " Routing to failure.", e);
                }
                if (atomicBoolean.get()) {
                    return;
                }
                if (atomicReference.get() != null) {
                    j = ((SplitInfo) atomicReference.get()).length;
                }
                long j2 = j;
                while (true) {
                    SplitInfo nextSplit = SplitText.this.nextSplit(textLineDemarcator, j, SplitText.this.lineCount, splitInfo, j2);
                    splitInfo = nextSplit;
                    if (nextSplit == null) {
                        break;
                    }
                    arrayList.add(splitInfo);
                    j += splitInfo.length;
                }
                long nanoTime2 = System.nanoTime();
                if (SplitText.this.getLogger().isDebugEnabled()) {
                    SplitText.this.getLogger().debug("Computed splits in " + (nanoTime2 - nanoTime) + " milliseconds.");
                }
            }
        });
        if (atomicBoolean.get()) {
            processSession.transfer(flowFile, REL_FAILURE);
            return;
        }
        String uuid = UUID.randomUUID().toString();
        List<FlowFile> generateSplitFlowFiles = generateSplitFlowFiles(uuid, flowFile, (SplitInfo) atomicReference.get(), arrayList, processSession);
        processSession.transfer(FragmentAttributes.copyAttributesToOriginal(processSession, flowFile, uuid, generateSplitFlowFiles.size()), REL_ORIGINAL);
        if (generateSplitFlowFiles.isEmpty()) {
            return;
        }
        processSession.transfer(generateSplitFlowFiles, REL_SPLITS);
    }

    protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new ValidationResult.Builder().subject("Maximum Fragment Size").valid(!(validationContext.getProperty(LINE_SPLIT_COUNT).asInteger().intValue() == 0 && !validationContext.getProperty(FRAGMENT_MAX_SIZE).isSet())).explanation("Property must be specified when Line Split Count is 0").build());
        return arrayList;
    }

    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return properties;
    }

    private List<FlowFile> generateSplitFlowFiles(String str, FlowFile flowFile, SplitInfo splitInfo, List<SplitInfo> list, ProcessSession processSession) {
        ArrayList arrayList = new ArrayList();
        FlowFile flowFile2 = null;
        long j = 0;
        if (splitInfo != null) {
            flowFile2 = processSession.clone(flowFile, splitInfo.startOffset, splitInfo.length);
            j = splitInfo.trimmedLength;
        }
        int i = 1;
        if (list.size() != 0 || flowFile2 == null) {
            for (SplitInfo splitInfo2 : list) {
                long j2 = this.removeTrailingNewLines ? splitInfo2.trimmedLength : splitInfo2.length;
                if (flowFile2 != null || j2 > 0) {
                    FlowFile concatenateContents = flowFile2 != null ? j2 > 0 ? concatenateContents(flowFile, processSession, flowFile2, processSession.clone(flowFile, splitInfo2.startOffset, j2)) : processSession.clone(flowFile, 0L, flowFile2.getSize() - j) : processSession.clone(flowFile, splitInfo2.startOffset, j2);
                    int i2 = i;
                    i++;
                    arrayList.add(updateAttributes(processSession, concatenateContents, splitInfo2.lineCount, concatenateContents.getSize(), str, i2, list.size(), flowFile.getAttribute(CoreAttributes.FILENAME.key())));
                }
            }
        } else {
            FlowFile clone = processSession.clone(flowFile, 0L, flowFile2.getSize() - j);
            int i3 = 1 + 1;
            arrayList.add(updateAttributes(processSession, clone, 0L, clone.getSize(), str, 1, 0, flowFile.getAttribute(CoreAttributes.FILENAME.key())));
        }
        getLogger().info("Split " + flowFile + " into " + arrayList.size() + " flow files" + (flowFile2 != null ? " containing headers." : "."));
        if (flowFile2 != null) {
            processSession.remove(flowFile2);
        }
        return arrayList;
    }

    private FlowFile concatenateContents(FlowFile flowFile, final ProcessSession processSession, FlowFile... flowFileArr) {
        FlowFile create = processSession.create(flowFile);
        for (final FlowFile flowFile2 : flowFileArr) {
            create = processSession.append(create, new OutputStreamCallback() { // from class: org.apache.nifi.processors.standard.SplitText.2
                public void process(OutputStream outputStream) throws IOException {
                    InputStream read = processSession.read(flowFile2);
                    Throwable th = null;
                    try {
                        try {
                            IOUtils.copy(read, outputStream);
                            if (read != null) {
                                if (0 == 0) {
                                    read.close();
                                    return;
                                }
                                try {
                                    read.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            }
                        } catch (Throwable th3) {
                            th = th3;
                            throw th3;
                        }
                    } catch (Throwable th4) {
                        if (read != null) {
                            if (th != null) {
                                try {
                                    read.close();
                                } catch (Throwable th5) {
                                    th.addSuppressed(th5);
                                }
                            } else {
                                read.close();
                            }
                        }
                        throw th4;
                    }
                }
            });
        }
        processSession.remove(flowFileArr[1]);
        return create;
    }

    private FlowFile updateAttributes(ProcessSession processSession, FlowFile flowFile, long j, long j2, String str, int i, int i2, String str2) {
        HashMap hashMap = new HashMap();
        hashMap.put(SPLIT_LINE_COUNT, String.valueOf(j));
        hashMap.put(FRAGMENT_SIZE, String.valueOf(flowFile.getSize()));
        hashMap.put(FRAGMENT_ID, str);
        hashMap.put(FRAGMENT_INDEX, String.valueOf(i));
        hashMap.put(FRAGMENT_COUNT, String.valueOf(i2));
        hashMap.put(SEGMENT_ORIGINAL_FILENAME, str2);
        return processSession.putAllAttributes(flowFile, hashMap);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public SplitInfo computeHeader(TextLineDemarcator textLineDemarcator, long j, long j2, byte[] bArr, SplitInfo splitInfo) throws IOException {
        long j3 = 0;
        long j4 = 0;
        SplitInfo splitInfo2 = null;
        TextLineDemarcator.OffsetInfo offsetInfo = null;
        long j5 = 0;
        while (true) {
            TextLineDemarcator.OffsetInfo nextOffsetInfo = textLineDemarcator.nextOffsetInfo(bArr);
            if (nextOffsetInfo == null) {
                break;
            }
            j5 = nextOffsetInfo.getCrlfLength();
            if (bArr == null || nextOffsetInfo.isStartsWithMatch()) {
                if (j3 + nextOffsetInfo.getLength() > this.maxSplitSize) {
                    throw new IllegalStateException("Computing header resulted in header size being > MAX split size of " + this.maxSplitSize + ".");
                }
                j3 += nextOffsetInfo.getLength();
                j4++;
                if (j4 == j2) {
                    break;
                }
            } else if (nextOffsetInfo.getCrlfLength() != -1) {
                offsetInfo = nextOffsetInfo;
            }
        }
        if (j4 > 0) {
            splitInfo2 = new SplitInfo(j, j3, j5, j4, offsetInfo);
        }
        return splitInfo2;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public SplitInfo nextSplit(TextLineDemarcator textLineDemarcator, long j, long j2, SplitInfo splitInfo, long j3) throws IOException {
        long j4 = 0;
        long j5 = 0;
        long j6 = 0;
        SplitInfo splitInfo2 = null;
        if (splitInfo != null && splitInfo.remaningOffsetInfo != null) {
            j4 = 0 + splitInfo.remaningOffsetInfo.getLength();
            j6 = 0 + 1;
        }
        TextLineDemarcator.OffsetInfo offsetInfo = null;
        long j7 = 0;
        while (true) {
            TextLineDemarcator.OffsetInfo nextOffsetInfo = textLineDemarcator.nextOffsetInfo();
            if (nextOffsetInfo == null) {
                break;
            }
            j7 = nextOffsetInfo.getCrlfLength();
            if (nextOffsetInfo.getLength() == nextOffsetInfo.getCrlfLength()) {
                j5 += nextOffsetInfo.getCrlfLength();
            } else if (nextOffsetInfo.getLength() > nextOffsetInfo.getCrlfLength()) {
                j5 = 0;
            }
            if (j4 + nextOffsetInfo.getLength() + j3 <= this.maxSplitSize) {
                j4 += nextOffsetInfo.getLength();
                j6++;
                if (j2 > 0 && j6 >= j2) {
                    break;
                }
            } else if (j4 == 0) {
                j4 += nextOffsetInfo.getLength();
                j6++;
            } else {
                offsetInfo = nextOffsetInfo;
            }
        }
        if (j6 > 0) {
            if (j4 - j5 >= j7) {
                j5 += j7;
            }
            splitInfo2 = new SplitInfo(j, j4, j4 - j5, j6, offsetInfo);
        }
        return splitInfo2;
    }
}
