package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.utils.Utils;
import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/html/HtmlParser.class */
public class HtmlParser implements Parser {

    /* loaded from: input_file:org/apache/tika/parser/html/HtmlParser$TitleExtractingContentHandler.class */
    private static class TitleExtractingContentHandler extends ContentHandlerDecorator {
        private static final String TAG_TITLE = "TITLE";
        private static final String TAG_HEAD = "HEAD";
        private static final String TAG_HTML = "HTML";
        private Phase phase;
        private Metadata metadata;
        private StringBuilder title;

        /* loaded from: input_file:org/apache/tika/parser/html/HtmlParser$TitleExtractingContentHandler$Phase.class */
        private enum Phase {
            START,
            HTML,
            HEAD,
            TITLE,
            IGNORE
        }

        public TitleExtractingContentHandler(ContentHandler contentHandler, Metadata metadata) {
            super(contentHandler);
            this.phase = Phase.START;
            this.title = new StringBuilder();
            this.metadata = metadata;
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            switch (this.phase) {
                case START:
                    if (TAG_HTML.equals(str2)) {
                        this.phase = Phase.HTML;
                        break;
                    }
                    break;
                case HTML:
                    if (TAG_HEAD.equals(str2)) {
                        this.phase = Phase.HEAD;
                        break;
                    }
                    break;
                case HEAD:
                    if (TAG_TITLE.equals(str2)) {
                        this.phase = Phase.TITLE;
                        break;
                    }
                    break;
            }
            super.startElement(str, str2, str3, attributes);
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            switch (this.phase) {
                case TITLE:
                    this.title.append(cArr, i, i2);
                    break;
            }
            super.characters(cArr, i, i2);
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            switch (this.phase) {
                case TITLE:
                    if (TAG_TITLE.equals(str2)) {
                        this.phase = Phase.IGNORE;
                        break;
                    }
                    break;
            }
            super.endElement(str, str2, str3);
        }

        @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
            this.metadata.set(DublinCore.TITLE, this.title.toString());
            super.endDocument();
        }
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        Reader reader;
        SAXParser sAXParser = new SAXParser();
        try {
            reader = Utils.getUTF8Reader(inputStream, metadata);
        } catch (TikaException e) {
            reader = null;
        }
        InputSource inputSource = reader == null ? new InputSource(inputStream) : new InputSource(reader);
        sAXParser.setContentHandler(new TitleExtractingContentHandler(contentHandler, metadata));
        sAXParser.parse(inputSource);
    }
}
