package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.Image;
import de.l3s.boilerpipe.document.Media;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.document.VimeoVideo;
import de.l3s.boilerpipe.document.YoutubeVideo;
import java.io.IOException;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/l3s/boilerpipe/sax/MediaExtractor.class */
public final class MediaExtractor {
    public static final MediaExtractor INSTANCE = new MediaExtractor();
    private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction() { // from class: de.l3s.boilerpipe.sax.MediaExtractor.1
        @Override // de.l3s.boilerpipe.sax.MediaExtractor.TagAction
        void beforeStart(Implementation implementation, String str) {
            implementation.inIgnorableElement++;
        }

        @Override // de.l3s.boilerpipe.sax.MediaExtractor.TagAction
        void afterEnd(Implementation implementation, String str) {
            implementation.inIgnorableElement--;
        }
    };
    private static Map<String, TagAction> TAG_ACTIONS = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/l3s/boilerpipe/sax/MediaExtractor$Implementation.class */
    public final class Implementation extends AbstractSAXParser implements ContentHandler {
        List<Media> linksHighlight;
        private List<Media> linksBuffer;
        private int inIgnorableElement;
        private int characterElementIdx;
        private final BitSet contentBitSet;
        private boolean inHighlight;

        Implementation() {
            super(new HTMLConfiguration());
            this.linksHighlight = new ArrayList();
            this.linksBuffer = new ArrayList();
            this.inIgnorableElement = 0;
            this.characterElementIdx = 0;
            this.contentBitSet = new BitSet();
            this.inHighlight = false;
            setContentHandler(this);
        }

        void process(TextDocument textDocument, InputSource inputSource) throws BoilerpipeProcessingException {
            BitSet containedTextElements;
            for (TextBlock textBlock : textDocument.getTextBlocks()) {
                if (textBlock.isContent() && (containedTextElements = textBlock.getContainedTextElements()) != null) {
                    this.contentBitSet.or(containedTextElements);
                }
            }
            try {
                parse(inputSource);
            } catch (IOException e) {
                throw new BoilerpipeProcessingException(e);
            } catch (SAXException e2) {
                throw new BoilerpipeProcessingException(e2);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void endPrefixMapping(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void processingInstruction(String str, String str2) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void setDocumentLocator(Locator locator) {
        }

        @Override // org.xml.sax.ContentHandler
        public void skippedEntity(String str) throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startDocument() throws SAXException {
        }

        @Override // org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            TagAction tagAction = (TagAction) MediaExtractor.TAG_ACTIONS.get(str2);
            if (tagAction != null) {
                tagAction.beforeStart(this, str2);
            }
            try {
                if (this.inIgnorableElement == 0) {
                    if (this.inHighlight && "IFRAME".equalsIgnoreCase(str2)) {
                        String value = attributes.getValue("src");
                        if (value != null) {
                            value = value.replaceAll("\\\\\"", "");
                        }
                        if (value != null && value.length() > 0 && value.contains("youtube.com/embed/")) {
                            try {
                                String[] split = new URL(value).getPath().split("/");
                                this.linksBuffer.add(new YoutubeVideo("http://www.youtube.com/watch?v=" + split[split.length - 1], value));
                            } catch (MalformedURLException e) {
                            }
                        }
                        if (value != null && value.length() > 0 && value.contains("player.vimeo.com")) {
                            try {
                                String[] split2 = new URL(value).getPath().split("/");
                                this.linksBuffer.add(new VimeoVideo("http://vimeo.com/" + split2[split2.length - 1], value));
                            } catch (MalformedURLException e2) {
                            }
                        }
                    }
                    if (this.inHighlight && "IMG".equalsIgnoreCase(str2)) {
                        String value2 = attributes.getValue("src");
                        try {
                            new URI(value2);
                            if (value2 != null && value2.length() > 0) {
                                this.linksBuffer.add(new Image(value2, attributes.getValue("width"), attributes.getValue("height"), attributes.getValue("alt")));
                            }
                        } catch (URISyntaxException e3) {
                        }
                    }
                }
            } finally {
                if (tagAction != null) {
                    tagAction.afterStart(this, str2);
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            TagAction tagAction = (TagAction) MediaExtractor.TAG_ACTIONS.get(str2);
            if (tagAction != null) {
                tagAction.beforeEnd(this, str2);
            }
            if (tagAction != null) {
                tagAction.afterEnd(this, str2);
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            this.characterElementIdx++;
            if (this.inIgnorableElement == 0) {
                boolean z = this.contentBitSet.get(this.characterElementIdx);
                if (!z) {
                    if (i2 == 0) {
                        return;
                    }
                    boolean z2 = true;
                    int i3 = i;
                    while (true) {
                        if (i3 >= i + i2) {
                            break;
                        }
                        if (!Character.isWhitespace(cArr[i3])) {
                            z2 = false;
                            break;
                        }
                        i3++;
                    }
                    if (z2) {
                        return;
                    }
                }
                this.inHighlight = z;
                if (this.inHighlight) {
                    this.linksHighlight.addAll(this.linksBuffer);
                    this.linksBuffer.clear();
                }
            }
        }

        @Override // org.xml.sax.ContentHandler
        public void startPrefixMapping(String str, String str2) throws SAXException {
        }
    }

    /* loaded from: input_file:de/l3s/boilerpipe/sax/MediaExtractor$TagAction.class */
    private static abstract class TagAction {
        private TagAction() {
        }

        void beforeStart(Implementation implementation, String str) {
        }

        void afterStart(Implementation implementation, String str) {
        }

        void beforeEnd(Implementation implementation, String str) {
        }

        void afterEnd(Implementation implementation, String str) {
        }

        /* synthetic */ TagAction(TagAction tagAction) {
            this();
        }
    }

    static {
        TAG_ACTIONS.put("STYLE", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("SCRIPT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("OPTION", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("NOSCRIPT", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("EMBED", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("APPLET", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("LINK", TA_IGNORABLE_ELEMENT);
        TAG_ACTIONS.put("HEAD", TA_IGNORABLE_ELEMENT);
    }

    public static MediaExtractor getInstance() {
        return INSTANCE;
    }

    public List<Media> process(TextDocument textDocument, String str) throws BoilerpipeProcessingException {
        return process(textDocument, new InputSource(new StringReader(str)));
    }

    public List<Media> process(TextDocument textDocument, InputSource inputSource) throws BoilerpipeProcessingException {
        Implementation implementation = new Implementation();
        implementation.process(textDocument, inputSource);
        return implementation.linksHighlight;
    }

    public List<Media> process(URL url, BoilerpipeExtractor boilerpipeExtractor) throws IOException, BoilerpipeProcessingException, SAXException {
        HTMLDocument fetch = HTMLFetcher.fetch(url);
        TextDocument textDocument = new BoilerpipeSAXInput(fetch.toInputSource()).getTextDocument();
        boilerpipeExtractor.process(textDocument);
        return process(textDocument, fetch.toInputSource());
    }

    public List<Media> process(String str, BoilerpipeExtractor boilerpipeExtractor) {
        HTMLDocument hTMLDocument = new HTMLDocument(str);
        new ArrayList();
        try {
            TextDocument textDocument = new BoilerpipeSAXInput(hTMLDocument.toInputSource()).getTextDocument();
            boilerpipeExtractor.process(textDocument);
            return process(textDocument, hTMLDocument.toInputSource());
        } catch (Exception e) {
            return null;
        }
    }
}
