package org.apache.tika.parser.microsoft;

import com.ecyrd.jspwiki.VariableManager;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.hslf.dev.PPTXMLDump;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.Ole10Native;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/tika-parsers.jar:org/apache/tika/parser/microsoft/OfficeParser.class */
public class OfficeParser implements Parser {
    private static final long serialVersionUID = 7393462244028653479L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(POIFSDocumentType.WORKBOOK.type, POIFSDocumentType.OLE10_NATIVE.type, POIFSDocumentType.WORDDOCUMENT.type, POIFSDocumentType.UNKNOWN.type, POIFSDocumentType.ENCRYPTED.type, POIFSDocumentType.POWERPOINT.type, POIFSDocumentType.PUBLISHER.type, POIFSDocumentType.VISIO.type, POIFSDocumentType.OUTLOOK.type, MediaType.application("vnd.ms-excel.sheet.binary.macroenabled.12"))));

    /* loaded from: input_file:WEB-INF/lib/tika-parsers.jar:org/apache/tika/parser/microsoft/OfficeParser$POIFSDocumentType.class */
    public enum POIFSDocumentType {
        WORKBOOK("xls", MediaType.application("vnd.ms-excel")),
        OLE10_NATIVE("ole", MediaType.application("x-tika-msoffice")),
        WORDDOCUMENT("doc", MediaType.application("msword")),
        UNKNOWN("unknown", MediaType.application("x-tika-msoffice")),
        ENCRYPTED("ole", MediaType.application("x-tika-msoffice")),
        POWERPOINT("ppt", MediaType.application("vnd.ms-powerpoint")),
        PUBLISHER("pub", MediaType.application("x-mspublisher")),
        VISIO("vsd", MediaType.application("vnd.visio")),
        WORKS("wps", MediaType.application("vnd.ms-works")),
        OUTLOOK(VariableManager.VAR_MSG, MediaType.application("vnd.ms-outlook"));

        private final String extension;
        private final MediaType type;

        POIFSDocumentType(String str, MediaType mediaType) {
            this.extension = str;
            this.type = mediaType;
        }

        public String getExtension() {
            return this.extension;
        }

        public MediaType getType() {
            return this.type;
        }

        public static POIFSDocumentType detectType(POIFSFileSystem pOIFSFileSystem) {
            return detectType((DirectoryEntry) pOIFSFileSystem.getRoot());
        }

        public static POIFSDocumentType detectType(DirectoryEntry directoryEntry) {
            Iterator<Entry> it2 = directoryEntry.iterator();
            while (it2.hasNext()) {
                POIFSDocumentType detectType = detectType(it2.next());
                if (detectType != UNKNOWN) {
                    return detectType;
                }
            }
            return UNKNOWN;
        }

        public static POIFSDocumentType detectType(Entry entry) {
            String name = entry.getName();
            return "Workbook".equals(name) ? WORKBOOK : "EncryptedPackage".equals(name) ? ENCRYPTED : "WordDocument".equals(name) ? WORDDOCUMENT : "Quill".equals(name) ? PUBLISHER : PPTXMLDump.PPDOC_ENTRY.equals(entry.getName()) ? POWERPOINT : "VisioDocument".equals(entry.getName()) ? VISIO : "CONTENTS".equals(entry.getName()) ? WORKS : entry.getName().startsWith(Chunk.DEFAULT_NAME_PREFIX) ? OUTLOOK : Ole10Native.OLE10_NATIVE.equals(name) ? OLE10_NATIVE : UNKNOWN;
        }
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        POIFSFileSystem pOIFSFileSystem = (!(inputStream instanceof TikaInputStream) || ((TikaInputStream) inputStream).getOpenContainer() == null) ? new POIFSFileSystem(inputStream) : (POIFSFileSystem) ((TikaInputStream) inputStream).getOpenContainer();
        new SummaryExtractor(metadata).parseSummaries(pOIFSFileSystem);
        boolean z = false;
        Iterator<Entry> it2 = pOIFSFileSystem.getRoot().iterator();
        while (it2.hasNext()) {
            POIFSDocumentType detectType = POIFSDocumentType.detectType(it2.next());
            if (detectType != POIFSDocumentType.UNKNOWN) {
                setType(metadata, detectType.getType());
            }
            switch (detectType) {
                case PUBLISHER:
                    xHTMLContentHandler.element("p", new PublisherTextExtractor(pOIFSFileSystem).getText());
                    break;
                case WORDDOCUMENT:
                    new WordExtractor(parseContext).parse(pOIFSFileSystem, xHTMLContentHandler);
                    break;
                case POWERPOINT:
                    new HSLFExtractor(parseContext).parse(pOIFSFileSystem, xHTMLContentHandler);
                    break;
                case WORKBOOK:
                    new ExcelExtractor(parseContext).parse(pOIFSFileSystem, xHTMLContentHandler, (Locale) parseContext.get(Locale.class, Locale.getDefault()));
                    break;
                case VISIO:
                    for (String str : new VisioTextExtractor(pOIFSFileSystem).getAllText()) {
                        xHTMLContentHandler.element("p", str);
                    }
                    break;
                case OUTLOOK:
                    if (z) {
                        break;
                    } else {
                        z = true;
                        new OutlookExtractor(pOIFSFileSystem, parseContext).parse(xHTMLContentHandler, metadata);
                        break;
                    }
                case ENCRYPTED:
                    Decryptor decryptor = new Decryptor(new EncryptionInfo(pOIFSFileSystem));
                    try {
                        if (!decryptor.verifyPassword(Decryptor.DEFAULT_PASSWORD)) {
                            throw new TikaException("Unable to process: document is encrypted");
                        }
                        new OOXMLParser().parse(decryptor.getDataStream(pOIFSFileSystem), new EmbeddedContentHandler(new BodyContentHandler(xHTMLContentHandler)), metadata, parseContext);
                        break;
                    } catch (GeneralSecurityException e) {
                        throw new TikaException("Unable to process encrypted document", e);
                    }
            }
        }
        xHTMLContentHandler.endDocument();
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        parse(inputStream, contentHandler, metadata, new ParseContext());
    }

    private void setType(Metadata metadata, MediaType mediaType) {
        metadata.set("Content-Type", mediaType.toString());
    }
}
