package edu.stanford.nlp.util;

import edu.stanford.nlp.io.IOUtils;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.validation.SchemaFactory;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/* loaded from: input_file:edu/stanford/nlp/util/XMLUtils.class */
public class XMLUtils {
    public static final Set<String> breakingTags = new HashSet(Arrays.asList("blockquote", "br", "div", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "li", "ol", "p", "pre", "ul", "tr", "td"));
    static final Pattern xmlEscapingPattern = Pattern.compile("\\&.+?;");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/util/XMLUtils$SAXErrorHandler.class */
    public static class SAXErrorHandler implements ErrorHandler {
        private SAXErrorHandler() {
        }

        public static String makeBetterErrorString(String str, SAXParseException sAXParseException) {
            StringBuilder sb = new StringBuilder(str);
            sb.append(": ");
            String message = sAXParseException.getMessage();
            if (message.lastIndexOf(".") == message.length() - 1) {
                message = message.substring(0, message.length() - 1);
            }
            sb.append(message);
            sb.append(" at document line ").append(sAXParseException.getLineNumber());
            sb.append(", column ").append(sAXParseException.getColumnNumber());
            if (sAXParseException.getSystemId() != null) {
                sb.append(" in entity from systemID ").append(sAXParseException.getSystemId());
            } else if (sAXParseException.getPublicId() != null) {
                sb.append(" in entity from publicID ").append(sAXParseException.getPublicId());
            }
            sb.append(".");
            return sb.toString();
        }

        @Override // org.xml.sax.ErrorHandler
        public void warning(SAXParseException sAXParseException) {
            System.err.println(makeBetterErrorString("Warning", sAXParseException));
        }

        @Override // org.xml.sax.ErrorHandler
        public void error(SAXParseException sAXParseException) {
            System.err.println(makeBetterErrorString("Error", sAXParseException));
        }

        @Override // org.xml.sax.ErrorHandler
        public void fatalError(SAXParseException sAXParseException) throws SAXParseException {
            throw new SAXParseException(makeBetterErrorString("Fatal Error", sAXParseException), sAXParseException.getPublicId(), sAXParseException.getSystemId(), sAXParseException.getLineNumber(), sAXParseException.getColumnNumber());
        }
    }

    /* loaded from: input_file:edu/stanford/nlp/util/XMLUtils$XMLTag.class */
    public static class XMLTag {
        public String text;
        public String name;
        public Map<String, String> attributes;
        public boolean isEndTag;
        public boolean isSingleTag;

        public XMLTag(String str) {
            if (str == null || str.length() == 0) {
                throw new NullPointerException("Attempted to parse empty/null tag");
            }
            if (str.charAt(0) != '<') {
                throw new IllegalArgumentException("Tag did not start with <");
            }
            if (str.charAt(str.length() - 1) != '>') {
                throw new IllegalArgumentException("Tag did not end with >");
            }
            this.text = str;
            int i = 1;
            if (str.charAt(1) == '/') {
                i = 2;
                this.isEndTag = true;
            } else {
                this.isEndTag = false;
            }
            int length = str.length() - 1;
            if (str.charAt(str.length() - 2) == '/') {
                length = str.length() - 2;
                this.isSingleTag = true;
            } else {
                this.isSingleTag = false;
            }
            String substring = str.substring(i, length);
            this.attributes = new HashMap();
            int findSpace = XMLUtils.findSpace(substring, 0);
            if (findSpace < 0) {
                this.name = substring;
                return;
            }
            this.name = substring.substring(0, findSpace);
            do {
                int i2 = findSpace + 1;
                while (i2 < substring.length() && substring.charAt(i2) < '!') {
                    i2++;
                }
                if (i2 == substring.length()) {
                    return;
                }
                findSpace = substring.indexOf(61, i2);
                if (findSpace < 0) {
                    this.attributes.put(substring.substring(i2), "");
                    return;
                }
                String trim = substring.substring(i2, findSpace).trim();
                int i3 = findSpace + 1;
                String str2 = null;
                if (substring.length() > i3) {
                    while (i3 < substring.length() && substring.charAt(i3) < '!') {
                        i3++;
                    }
                    if (i3 >= substring.length() || substring.charAt(i3) != '\"') {
                        findSpace = XMLUtils.findSpace(substring, i3);
                        findSpace = findSpace < 0 ? substring.length() : findSpace;
                        str2 = substring.substring(i3, findSpace);
                    } else {
                        int i4 = i3 + 1;
                        int indexOf = substring.indexOf(34, i4);
                        if (indexOf < 0) {
                            return;
                        }
                        str2 = substring.substring(i4, indexOf);
                        findSpace = indexOf + 1;
                    }
                }
                this.attributes.put(trim, str2);
            } while (findSpace < substring.length() - 3);
        }

        public String toString() {
            return this.text;
        }
    }

    private XMLUtils() {
    }

    public static DocumentBuilder getXmlParser() {
        DocumentBuilder documentBuilder = null;
        try {
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setValidating(false);
            newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
            newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            documentBuilder = newInstance.newDocumentBuilder();
            documentBuilder.setErrorHandler(new SAXErrorHandler());
        } catch (UnsupportedOperationException e) {
            System.err.printf("%s: API error while setting up XML parser. Check your JAXP version\n", XMLUtils.class.getName());
            e.printStackTrace();
        } catch (ParserConfigurationException e2) {
            System.err.printf("%s: Unable to create XML parser\n", XMLUtils.class.getName());
            e2.printStackTrace();
        }
        return documentBuilder;
    }

    public static DocumentBuilder getValidatingXmlParser(File file) {
        DocumentBuilder documentBuilder = null;
        try {
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setSchema(SchemaFactory.newInstance("http://www.w3.org/2001/XMLSchema").newSchema(file));
            documentBuilder = newInstance.newDocumentBuilder();
            documentBuilder.setErrorHandler(new SAXErrorHandler());
        } catch (UnsupportedOperationException e) {
            System.err.printf("%s: API error while setting up XML parser. Check your JAXP version\n", XMLUtils.class.getName());
            e.printStackTrace();
        } catch (ParserConfigurationException e2) {
            System.err.printf("%s: Unable to create XML parser\n", XMLUtils.class.getName());
            e2.printStackTrace();
        } catch (SAXException e3) {
            System.err.printf("%s: XML parsing exception while loading schema %s\n", XMLUtils.class.getName(), file.getPath());
            e3.printStackTrace();
        }
        return documentBuilder;
    }

    public static String stripTags(Reader reader, List<Integer> list, boolean z) {
        if (list != null) {
            list.clear();
        }
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (true) {
            try {
                String readUntilTag = readUntilTag(reader);
                if (readUntilTag.length() > 0) {
                    for (int i2 = 0; i2 < readUntilTag.length(); i2++) {
                        sb.append(readUntilTag.charAt(i2));
                        if (list != null) {
                            list.add(Integer.valueOf(i + i2));
                        }
                    }
                    i += readUntilTag.length();
                }
                String readTag = readTag(reader);
                if (readTag == null) {
                    break;
                }
                if (z && isBreaking(parseTag(readTag))) {
                    sb.append("\n");
                    if (list != null) {
                        list.add(Integer.valueOf(-i));
                    }
                }
                i += readTag.length();
            } catch (IOException e) {
                System.err.println("Error reading string");
                e.printStackTrace();
            }
        }
        return sb.toString();
    }

    public static boolean isBreaking(String str) {
        return breakingTags.contains(str);
    }

    public static boolean isBreaking(XMLTag xMLTag) {
        return breakingTags.contains(xMLTag.name);
    }

    public static String readUntilTag(Reader reader) throws IOException {
        if (!reader.ready()) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        int read = reader.read();
        while (true) {
            int i = read;
            if (i < 0 || i == 60) {
                break;
            }
            sb.append((char) i);
            read = reader.read();
        }
        return sb.toString();
    }

    public static XMLTag readAndParseTag(Reader reader) throws IOException {
        String readTag = readTag(reader);
        if (readTag == null) {
            return null;
        }
        XMLTag xMLTag = null;
        try {
            xMLTag = new XMLTag(readTag);
        } catch (Exception e) {
            System.err.println("Failed to handle |" + readTag + "|");
        }
        return xMLTag;
    }

    public static String unescapeStringForXML(String str) {
        StringBuilder sb = new StringBuilder();
        Matcher matcher = xmlEscapingPattern.matcher(str);
        int i = 0;
        while (matcher.find()) {
            int start = matcher.start();
            sb.append(str.substring(i, start));
            i = matcher.end();
            sb.append(translate(str.substring(start, i)));
        }
        sb.append(str.substring(i, str.length()));
        return sb.toString();
    }

    private static char translate(String str) {
        if (str.equals("&amp;")) {
            return '&';
        }
        if (str.equals("&lt;") || str.equals("&Lt;")) {
            return '<';
        }
        if (str.equals("&gt;") || str.equals("&Gt;")) {
            return '>';
        }
        if (str.equals("&quot;")) {
            return '\"';
        }
        if (str.equals("&apos;")) {
            return '\'';
        }
        if (str.equals("&ast;") || str.equals("&sharp;")) {
            return '-';
        }
        if (str.equals("&equals;")) {
            return '=';
        }
        if (str.equals("&nbsp;")) {
            return (char) 160;
        }
        if (str.equals("&iexcl;")) {
            return (char) 161;
        }
        if (str.equals("&cent;") || str.equals("&shilling;")) {
            return (char) 162;
        }
        if (str.equals("&pound;")) {
            return (char) 163;
        }
        if (str.equals("&curren;")) {
            return (char) 164;
        }
        if (str.equals("&yen;")) {
            return (char) 165;
        }
        if (str.equals("&brvbar;")) {
            return (char) 166;
        }
        if (str.equals("&sect;")) {
            return (char) 167;
        }
        if (str.equals("&uml;")) {
            return (char) 168;
        }
        if (str.equals("&copy;")) {
            return (char) 169;
        }
        if (str.equals("&ordf;")) {
            return (char) 170;
        }
        if (str.equals("&laquo; ")) {
            return (char) 171;
        }
        if (str.equals("&not;")) {
            return (char) 172;
        }
        if (str.equals("&shy; ")) {
            return (char) 173;
        }
        if (str.equals("&reg;")) {
            return (char) 174;
        }
        if (str.equals("&macr;")) {
            return (char) 175;
        }
        if (str.equals("&deg;")) {
            return (char) 176;
        }
        if (str.equals("&plusmn;")) {
            return (char) 177;
        }
        if (str.equals("&sup2;")) {
            return (char) 178;
        }
        if (str.equals("&sup3;")) {
            return (char) 179;
        }
        if (str.equals("&acute;")) {
            return (char) 180;
        }
        if (str.equals("&micro;")) {
            return (char) 181;
        }
        if (str.equals("&middot;")) {
            return (char) 183;
        }
        if (str.equals("&cedil;")) {
            return (char) 184;
        }
        if (str.equals("&sup1;")) {
            return (char) 185;
        }
        if (str.equals("&ordm;")) {
            return (char) 186;
        }
        if (str.equals("&raquo;")) {
            return (char) 187;
        }
        if (str.equals("&frac14; ")) {
            return (char) 188;
        }
        if (str.equals("&frac12;")) {
            return (char) 189;
        }
        if (str.equals("&frac34; ")) {
            return (char) 190;
        }
        if (str.equals("&iquest;")) {
            return (char) 191;
        }
        if (str.equals("&Agrave;")) {
            return (char) 192;
        }
        if (str.equals("&Aacute;")) {
            return (char) 193;
        }
        if (str.equals("&Acirc;")) {
            return (char) 194;
        }
        if (str.equals("&Atilde;")) {
            return (char) 195;
        }
        if (str.equals("&Auml;")) {
            return (char) 196;
        }
        if (str.equals("&Aring;")) {
            return (char) 197;
        }
        if (str.equals("&AElig;")) {
            return (char) 198;
        }
        if (str.equals("&Ccedil;")) {
            return (char) 199;
        }
        if (str.equals("&Egrave;")) {
            return (char) 200;
        }
        if (str.equals("&Eacute;")) {
            return (char) 201;
        }
        if (str.equals("&Ecirc;")) {
            return (char) 202;
        }
        if (str.equals("&Euml;")) {
            return (char) 203;
        }
        if (str.equals("&Igrave;")) {
            return (char) 204;
        }
        if (str.equals("&Iacute;")) {
            return (char) 205;
        }
        if (str.equals("&Icirc;")) {
            return (char) 206;
        }
        if (str.equals("&Iuml;")) {
            return (char) 207;
        }
        if (str.equals("&ETH;")) {
            return (char) 208;
        }
        if (str.equals("&Ntilde;")) {
            return (char) 209;
        }
        if (str.equals("&Ograve;")) {
            return (char) 210;
        }
        if (str.equals("&Oacute;")) {
            return (char) 211;
        }
        if (str.equals("&Ocirc;")) {
            return (char) 212;
        }
        if (str.equals("&Otilde;")) {
            return (char) 213;
        }
        if (str.equals("&Ouml;")) {
            return (char) 214;
        }
        if (str.equals("&times;")) {
            return (char) 215;
        }
        if (str.equals("&Oslash;")) {
            return (char) 216;
        }
        if (str.equals("&Ugrave;")) {
            return (char) 217;
        }
        if (str.equals("&Uacute;")) {
            return (char) 218;
        }
        if (str.equals("&Ucirc;")) {
            return (char) 219;
        }
        if (str.equals("&Uuml;")) {
            return (char) 220;
        }
        if (str.equals("&Yacute;")) {
            return (char) 221;
        }
        if (str.equals("&THORN;")) {
            return (char) 222;
        }
        if (str.equals("&szlig;")) {
            return (char) 223;
        }
        if (str.equals("&agrave;")) {
            return (char) 224;
        }
        if (str.equals("&aacute;")) {
            return (char) 225;
        }
        if (str.equals("&acirc;")) {
            return (char) 226;
        }
        if (str.equals("&atilde;")) {
            return (char) 227;
        }
        if (str.equals("&auml;")) {
            return (char) 228;
        }
        if (str.equals("&aring;")) {
            return (char) 229;
        }
        if (str.equals("&aelig;")) {
            return (char) 230;
        }
        if (str.equals("&ccedil;")) {
            return (char) 231;
        }
        if (str.equals("&egrave;")) {
            return (char) 232;
        }
        if (str.equals("&eacute;")) {
            return (char) 233;
        }
        if (str.equals("&ecirc;")) {
            return (char) 234;
        }
        if (str.equals("&euml; ")) {
            return (char) 235;
        }
        if (str.equals("&igrave;")) {
            return (char) 236;
        }
        if (str.equals("&iacute;")) {
            return (char) 237;
        }
        if (str.equals("&icirc;")) {
            return (char) 238;
        }
        if (str.equals("&iuml;")) {
            return (char) 239;
        }
        if (str.equals("&eth;")) {
            return (char) 240;
        }
        if (str.equals("&ntilde;")) {
            return (char) 241;
        }
        if (str.equals("&ograve;")) {
            return (char) 242;
        }
        if (str.equals("&oacute;")) {
            return (char) 243;
        }
        if (str.equals("&ocirc;")) {
            return (char) 244;
        }
        if (str.equals("&otilde;")) {
            return (char) 245;
        }
        if (str.equals("&ouml;")) {
            return (char) 246;
        }
        if (str.equals("&divide;")) {
            return (char) 247;
        }
        if (str.equals("&oslash;")) {
            return (char) 248;
        }
        if (str.equals("&ugrave;")) {
            return (char) 249;
        }
        if (str.equals("&uacute;")) {
            return (char) 250;
        }
        if (str.equals("&ucirc;")) {
            return (char) 251;
        }
        if (str.equals("&uuml;")) {
            return (char) 252;
        }
        if (str.equals("&yacute;")) {
            return (char) 253;
        }
        if (str.equals("&thorn;")) {
            return (char) 254;
        }
        if (str.equals("&yuml;")) {
            return (char) 255;
        }
        if (str.equals("&OElig;")) {
            return (char) 338;
        }
        if (str.equals("&oelig;")) {
            return (char) 339;
        }
        if (str.equals("&Scaron;")) {
            return (char) 352;
        }
        if (str.equals("&scaron;")) {
            return (char) 353;
        }
        if (str.equals("&Yuml;")) {
            return (char) 376;
        }
        if (str.equals("&circ;")) {
            return (char) 710;
        }
        if (str.equals("&tilde;")) {
            return (char) 732;
        }
        if (str.equals("&lrm;")) {
            return (char) 8206;
        }
        if (str.equals("&rlm;")) {
            return (char) 8207;
        }
        if (str.equals("&ndash;")) {
            return (char) 8211;
        }
        if (str.equals("&mdash;")) {
            return (char) 8212;
        }
        if (str.equals("&lsquo;")) {
            return (char) 8216;
        }
        if (str.equals("&rsquo;")) {
            return (char) 8217;
        }
        if (str.equals("&sbquo;")) {
            return (char) 8218;
        }
        if (str.equals("&ldquo;") || str.equals("&bquo;") || str.equals("&bq;")) {
            return (char) 8220;
        }
        if (str.equals("&rdquo;") || str.equals("&equo;")) {
            return (char) 8221;
        }
        if (str.equals("&bdquo;")) {
            return (char) 8222;
        }
        if (str.equals("&sim;")) {
            return (char) 8764;
        }
        if (str.equals("&radic;")) {
            return (char) 8730;
        }
        if (str.equals("&le;")) {
            return (char) 8804;
        }
        if (str.equals("&ge;")) {
            return (char) 8805;
        }
        if (str.equals("&larr;")) {
            return (char) 8592;
        }
        if (str.equals("&darr;")) {
            return (char) 8595;
        }
        if (str.equals("&rarr;")) {
            return (char) 8594;
        }
        if (str.equals("&hellip;")) {
            return (char) 8230;
        }
        if (str.equals("&prime;")) {
            return (char) 8242;
        }
        if (str.equals("&Prime;") || str.equals("&ins;")) {
            return (char) 8243;
        }
        if (str.equals("&trade;")) {
            return (char) 8482;
        }
        if (str.equals("&Alpha;") || str.equals("&Agr;")) {
            return (char) 913;
        }
        if (str.equals("&Beta;") || str.equals("&Bgr;")) {
            return (char) 914;
        }
        if (str.equals("&Gamma;") || str.equals("&Ggr;")) {
            return (char) 915;
        }
        if (str.equals("&Delta;") || str.equals("&Dgr;")) {
            return (char) 916;
        }
        if (str.equals("&Epsilon;") || str.equals("&Egr;")) {
            return (char) 917;
        }
        if (str.equals("&Zeta;") || str.equals("&Zgr;")) {
            return (char) 918;
        }
        if (str.equals("&Eta;")) {
            return (char) 919;
        }
        if (str.equals("&Theta;") || str.equals("&THgr;")) {
            return (char) 920;
        }
        if (str.equals("&Iota;") || str.equals("&Igr;")) {
            return (char) 921;
        }
        if (str.equals("&Kappa;") || str.equals("&Kgr;")) {
            return (char) 922;
        }
        if (str.equals("&Lambda;") || str.equals("&Lgr;")) {
            return (char) 923;
        }
        if (str.equals("&Mu;") || str.equals("&Mgr;")) {
            return (char) 924;
        }
        if (str.equals("&Nu;") || str.equals("&Ngr;")) {
            return (char) 925;
        }
        if (str.equals("&Xi;") || str.equals("&Xgr;")) {
            return (char) 926;
        }
        if (str.equals("&Omicron;") || str.equals("&Ogr;")) {
            return (char) 927;
        }
        if (str.equals("&Pi;") || str.equals("&Pgr;")) {
            return (char) 928;
        }
        if (str.equals("&Rho;") || str.equals("&Rgr;")) {
            return (char) 929;
        }
        if (str.equals("&Sigma;") || str.equals("&Sgr;")) {
            return (char) 931;
        }
        if (str.equals("&Tau;") || str.equals("&Tgr;")) {
            return (char) 932;
        }
        if (str.equals("&Upsilon;") || str.equals("&Ugr;")) {
            return (char) 933;
        }
        if (str.equals("&Phi;") || str.equals("&PHgr;")) {
            return (char) 934;
        }
        if (str.equals("&Chi;") || str.equals("&KHgr;")) {
            return (char) 935;
        }
        if (str.equals("&Psi;") || str.equals("&PSgr;")) {
            return (char) 936;
        }
        if (str.equals("&Omega;") || str.equals("&OHgr;")) {
            return (char) 937;
        }
        if (str.equals("&alpha;") || str.equals("&agr;")) {
            return (char) 945;
        }
        if (str.equals("&beta;") || str.equals("&bgr;")) {
            return (char) 946;
        }
        if (str.equals("&gamma;") || str.equals("&ggr;")) {
            return (char) 947;
        }
        if (str.equals("&delta;") || str.equals("&dgr;")) {
            return (char) 948;
        }
        if (str.equals("&epsilon;") || str.equals("&egr;")) {
            return (char) 949;
        }
        if (str.equals("&zeta;") || str.equals("&zgr;")) {
            return (char) 950;
        }
        if (str.equals("&eta;") || str.equals("&eegr;")) {
            return (char) 951;
        }
        if (str.equals("&theta;") || str.equals("&thgr;")) {
            return (char) 952;
        }
        if (str.equals("&iota;") || str.equals("&igr;")) {
            return (char) 953;
        }
        if (str.equals("&kappa;") || str.equals("&kgr;")) {
            return (char) 954;
        }
        if (str.equals("&lambda;") || str.equals("&lgr;")) {
            return (char) 955;
        }
        if (str.equals("&mu;") || str.equals("&mgr;")) {
            return (char) 956;
        }
        if (str.equals("&nu;") || str.equals("&ngr;")) {
            return (char) 957;
        }
        if (str.equals("&xi;") || str.equals("&xgr;")) {
            return (char) 958;
        }
        if (str.equals("&omicron;") || str.equals("&ogr;")) {
            return (char) 959;
        }
        if (str.equals("&pi;") || str.equals("&pgr;")) {
            return (char) 960;
        }
        if (str.equals("&rho;") || str.equals("&rgr;")) {
            return (char) 961;
        }
        if (str.equals("&sigma;") || str.equals("&sgr;")) {
            return (char) 963;
        }
        if (str.equals("&tau;") || str.equals("&tgr;")) {
            return (char) 964;
        }
        if (str.equals("&upsilon;") || str.equals("&ugr;")) {
            return (char) 965;
        }
        if (str.equals("&phi;") || str.equals("&phgr;")) {
            return (char) 966;
        }
        if (str.equals("&chi;") || str.equals("&khgr;")) {
            return (char) 967;
        }
        if (str.equals("&psi;") || str.equals("&psgr;")) {
            return (char) 968;
        }
        if (str.equals("&omega;") || str.equals("&ohgr;")) {
            return (char) 969;
        }
        if (str.equals("&bull;")) {
            return (char) 8226;
        }
        if (str.equals("&percnt;")) {
            return '%';
        }
        if (str.equals("&plus;")) {
            return '+';
        }
        if (str.equals("&dash;")) {
            return '-';
        }
        if (str.equals("&abreve;") || str.equals("&amacr;") || str.equals("&ape;") || str.equals("&aogon;") || str.equals("&aring;")) {
            return 'a';
        }
        if (str.equals("&Amacr;")) {
            return 'A';
        }
        if (str.equals("&cacute;") || str.equals("&ccaron;") || str.equals("&ccirc;")) {
            return 'c';
        }
        if (str.equals("&Ccaron;")) {
            return 'C';
        }
        if (str.equals("&dcaron;")) {
            return 'd';
        }
        if (str.equals("&ecaron;") || str.equals("&emacr;") || str.equals("&eogon;")) {
            return 'e';
        }
        if (str.equals("&Emacr;") || str.equals("&Ecaron;")) {
            return 'E';
        }
        if (str.equals("&lacute;")) {
            return 'l';
        }
        if (str.equals("&Lacute;")) {
            return 'L';
        }
        if (str.equals("&nacute;") || str.equals("&ncaron;") || str.equals("&ncedil;")) {
            return 'n';
        }
        if (str.equals("&rcaron;") || str.equals("&racute;")) {
            return 'r';
        }
        if (str.equals("&Rcaron;")) {
            return 'R';
        }
        if (str.equals("&omacr;")) {
            return 'o';
        }
        if (str.equals("&imacr;")) {
            return 'i';
        }
        if (str.equals("&sacute;") || str.equals("&scedil;") || str.equals("&scirc;")) {
            return 's';
        }
        if (str.equals("&Sacute") || str.equals("&Scedil;")) {
            return 'S';
        }
        if (str.equals("&tcaron;") || str.equals("&tcedil;")) {
            return 't';
        }
        if (str.equals("&umacr;") || str.equals("&uring;")) {
            return 'u';
        }
        if (str.equals("&wcirc;")) {
            return 'w';
        }
        if (str.equals("&Ycirc;")) {
            return 'Y';
        }
        if (str.equals("&ycirc;")) {
            return 'y';
        }
        if (str.equals("&zcaron;") || str.equals("&zacute;")) {
            return 'z';
        }
        if (str.equals("&Zcaron;")) {
            return 'Z';
        }
        if (str.equals("&hearts;")) {
            return (char) 9829;
        }
        if (str.equals("&infin;")) {
            return (char) 8734;
        }
        if (str.equals("&dollar;")) {
            return '$';
        }
        if (str.equals("&sub;") || str.equals("&lcub;")) {
            return (char) 8834;
        }
        if (str.equals("&sup;") || str.equals("&rcub;")) {
            return (char) 8835;
        }
        if (str.equals("&lsqb;")) {
            return '[';
        }
        return str.equals("&rsqb;") ? ']' : ' ';
    }

    public static String escapeXML(String str) {
        int length = str.length();
        StringBuilder sb = new StringBuilder(length);
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (charAt == '&') {
                sb.append("&amp;");
            } else if (charAt == '<') {
                sb.append("&lt;");
            } else if (charAt == '>') {
                sb.append("&gt;");
            } else if (charAt == '\"') {
                sb.append("&quot;");
            } else if (charAt == '\'') {
                sb.append("&apos;");
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString();
    }

    public static String escapeElementXML(String str) {
        int length = str.length();
        StringBuilder sb = new StringBuilder(length);
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (charAt == '&') {
                sb.append("&amp;");
            } else if (charAt == '<') {
                sb.append("&lt;");
            } else if (charAt == '>') {
                sb.append("&gt;");
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString();
    }

    public static String escapeAttributeXML(String str) {
        int length = str.length();
        StringBuilder sb = new StringBuilder(length);
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (charAt == '&') {
                sb.append("&amp;");
            } else if (charAt == '\"') {
                sb.append("&quot;");
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString();
    }

    public static String escapeTextAroundXMLTags(String str) {
        StringBuilder sb = new StringBuilder();
        StringReader stringReader = new StringReader(str);
        while (true) {
            try {
                sb.append(escapeXML(readUntilTag(stringReader)));
                XMLTag readAndParseTag = readAndParseTag(stringReader);
                if (readAndParseTag == null) {
                    break;
                }
                sb.append(readAndParseTag.toString());
            } catch (IOException e) {
                System.err.println("Error reading string");
                e.printStackTrace();
            }
        }
        return sb.toString();
    }

    public static int findSpace(String str, int i) {
        int indexOf = str.indexOf(32);
        int indexOf2 = str.indexOf(160);
        if (indexOf == -1 && indexOf2 == -1) {
            return -1;
        }
        return (indexOf < 0 || indexOf2 < 0) ? Math.max(indexOf, indexOf2) : Math.min(indexOf, indexOf2);
    }

    public static String readTag(Reader reader) throws IOException {
        if (!reader.ready()) {
            return null;
        }
        StringBuilder sb = new StringBuilder("<");
        int read = reader.read();
        while (true) {
            int i = read;
            if (i < 0) {
                break;
            }
            sb.append((char) i);
            if (i == 62) {
                break;
            }
            read = reader.read();
        }
        if (sb.length() == 1) {
            return null;
        }
        return sb.toString();
    }

    public static XMLTag parseTag(String str) {
        if (str == null || str.length() == 0 || str.charAt(0) != '<' || str.charAt(str.length() - 1) != '>') {
            return null;
        }
        return new XMLTag(str);
    }

    public static Document readDocumentFromFile(String str) throws Exception {
        InputSource inputSource = new InputSource(new FileReader(str));
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setNamespaceAware(false);
        DocumentBuilder newDocumentBuilder = newInstance.newDocumentBuilder();
        newDocumentBuilder.setErrorHandler(new SAXErrorHandler());
        return newDocumentBuilder.parse(inputSource);
    }

    public static Document readDocumentFromString(String str) throws Exception {
        InputSource inputSource = new InputSource(new StringReader(str));
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setNamespaceAware(false);
        return newInstance.newDocumentBuilder().parse(inputSource);
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr[0].equals("-readDoc")) {
            System.out.println(readDocumentFromFile(strArr[1]));
            return;
        }
        StringReader stringReader = new StringReader(IOUtils.slurpFile(strArr[0]));
        String readTag = readTag(stringReader);
        while (readTag.length() > 0) {
            readUntilTag(stringReader);
            readTag = readTag(stringReader);
            if (readTag.length() == 0) {
                return;
            } else {
                System.out.println("got tag=" + new XMLTag(readTag));
            }
        }
    }
}
