package io.quarkus.tika;

import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.apache.tika.sax.ToTextContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:io/quarkus/tika/TikaParser.class */
public class TikaParser {
    private Parser parser;
    private boolean appendEmbeddedContent;

    public TikaParser(Parser parser, boolean z) {
        this.parser = parser;
        this.appendEmbeddedContent = z;
    }

    public TikaContent parse(InputStream inputStream) throws TikaParseException {
        return parse(inputStream, (String) null);
    }

    public TikaContent parse(InputStream inputStream, ContentHandler contentHandler) throws TikaParseException {
        return parse(inputStream, null, validateContentHandler(contentHandler));
    }

    public TikaContent parse(InputStream inputStream, String str) throws TikaParseException {
        return parse(inputStream, str, createContentHandler());
    }

    public TikaContent parse(InputStream inputStream, String str, ContentHandler contentHandler) throws TikaParseException {
        return parseStream(inputStream, str, validateContentHandler(contentHandler));
    }

    public String getText(InputStream inputStream) throws TikaParseException {
        return parse(inputStream).getText();
    }

    public String getText(InputStream inputStream, ContentHandler contentHandler) throws TikaParseException {
        return parse(inputStream, validateContentHandler(contentHandler)).getText();
    }

    public String getText(InputStream inputStream, String str) throws TikaParseException {
        return parse(inputStream, str).getText();
    }

    public String getText(InputStream inputStream, String str, ContentHandler contentHandler) throws TikaParseException {
        return parse(inputStream, str, validateContentHandler(contentHandler)).getText();
    }

    public TikaMetadata getMetadata(InputStream inputStream) throws TikaParseException {
        return getMetadata(inputStream, null);
    }

    public TikaMetadata getMetadata(InputStream inputStream, String str) throws TikaParseException {
        return parseStream(inputStream, str, createContentHandlerForMetadataOnly(str)).getMetadata();
    }

    protected TikaContent parseStream(InputStream inputStream, String str, ContentHandler contentHandler) throws TikaParseException {
        try {
            ParseContext parseContext = new ParseContext();
            parseContext.set(Parser.class, this.appendEmbeddedContent ? this.parser : this.parser.getWrappedParser());
            Metadata metadata = new Metadata();
            if (str != null) {
                metadata.set("Content-Type", str);
            }
            TikaInputStream tikaInputStream = TikaInputStream.get(inputStream);
            try {
                this.parser.parse(tikaInputStream, contentHandler, metadata, parseContext);
                if (this.appendEmbeddedContent) {
                    TikaContent tikaContent = new TikaContent(contentHandler == null ? null : contentHandler.toString().trim(), convert(metadata));
                    if (tikaInputStream != null) {
                        tikaInputStream.close();
                    }
                    return tikaContent;
                }
                List metadataList = ((RecursiveParserWrapperHandler) contentHandler).getMetadataList();
                String str2 = ((Metadata) metadataList.get(0)).get(TikaCoreProperties.TIKA_CONTENT);
                LinkedList linkedList = new LinkedList();
                for (int i = 1; i < metadataList.size(); i++) {
                    String str3 = ((Metadata) metadataList.get(i)).get(TikaCoreProperties.TIKA_CONTENT);
                    if (str3 != null) {
                        linkedList.add(new TikaContent(str3.trim(), convert((Metadata) metadataList.get(i))));
                    }
                }
                TikaContent tikaContent2 = new TikaContent(str2, convert((Metadata) metadataList.get(0)), linkedList);
                if (tikaInputStream != null) {
                    tikaInputStream.close();
                }
                return tikaContent2;
            } finally {
            }
        } catch (Exception e) {
            throw new TikaParseException("Unable to parse the stream" + (str == null ? "" : " for content-type: " + str), e);
        }
    }

    private ContentHandler validateContentHandler(ContentHandler contentHandler) {
        if (this.appendEmbeddedContent || (contentHandler instanceof RecursiveParserWrapperHandler)) {
            return contentHandler;
        }
        throw new IllegalStateException("The main document and every embedded document will require a unique ContentHandler instance");
    }

    private ContentHandler createContentHandler() {
        return this.appendEmbeddedContent ? new ToTextContentHandler() : new RecursiveParserWrapperHandler(new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
    }

    private ContentHandler createContentHandlerForMetadataOnly(String str) {
        if (!this.appendEmbeddedContent) {
            return new RecursiveParserWrapperHandler(new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1));
        }
        if (str == null || !str.contains("pdf")) {
            return new DefaultHandler();
        }
        return null;
    }

    private static TikaMetadata convert(Metadata metadata) {
        HashMap hashMap = new HashMap();
        for (String str : metadata.names()) {
            hashMap.put(str, Arrays.asList(metadata.getValues(str)));
        }
        return new TikaMetadata(hashMap);
    }
}
