001/*
002 * (c) Copyright 2009 University of Bristol
003 * All rights reserved.
004 * [See end of file]
005 */
006package net.rootdev.javardfa;
007
008import net.rootdev.javardfa.uri.URIExtractor10;
009import net.rootdev.javardfa.uri.URIExtractor;
010import net.rootdev.javardfa.uri.URIExtractor11;
011import net.rootdev.javardfa.uri.IRIResolver;
012import javax.xml.stream.XMLEventFactory;
013import javax.xml.stream.XMLOutputFactory;
014import nu.validator.htmlparser.common.XmlViolationPolicy;
015import nu.validator.htmlparser.sax.HtmlParser;
016import org.xml.sax.SAXException;
017import org.xml.sax.XMLReader;
018import org.xml.sax.helpers.XMLReaderFactory;
019
020/**
021 * I use these in a few places. stuck here for simplicity
022 *
023 * @author pldms
024 */
025public class ParserFactory {
026
027    public enum Format {
028
029        HTML, XHTML;
030
031        public static Format lookup(String format) {
032            if ("xhtml".equalsIgnoreCase(format)) {
033                return XHTML;
034            }
035            if ("html".equalsIgnoreCase(format)) {
036                return HTML;
037            }
038            return null;
039        }
040    }
041
042    /**
043     *
044     * @return An XMLReader with validation turned off
045     * @throws SAXException
046     */
047    public static XMLReader createNonvalidatingReader() throws SAXException {
048        XMLReader reader = XMLReaderFactory.createXMLReader();
049        reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
050        try {
051            reader.setFeature("http://www.xml.org/sax/features/validation", false);
052        } catch (Exception e) {} // continue whether this is recognised
053        return reader;
054    }
055
056    /**
057     *
058     * @return An HTML 5 XMLReader set up to by fairly forgiving.
059     */
060    public static XMLReader createHTML5Reader() {
061        HtmlParser reader = new HtmlParser();
062        reader.setXmlPolicy(XmlViolationPolicy.ALLOW);
063        reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
064        reader.setMappingLangToXmlLang(false);
065        return reader;
066    }
067
068    /**
069     * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
070     * to the StatementSink sink. Uses IRI resolver.
071     *
072     * @param sink
073     * @param format
074     * @return
075     * @throws SAXException
076     */
077    public static XMLReader createReaderForFormat(StatementSink sink,
078            Format format, Setting... settings) throws SAXException {
079        return createReaderForFormat(sink, format, new IRIResolver(), settings);
080    }
081
082    /**
083     * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
084     * to the StatementSink sink.
085     *
086     * @param sink
087     * @param format
088     * @param resolver
089     * @return
090     * @throws SAXException
091     */
092    public static XMLReader createReaderForFormat(StatementSink sink,
093            Format format, Resolver resolver, Setting... settings) throws SAXException {
094        XMLReader reader = getReader(format);
095        boolean is11 = false;
096        for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true;
097        URIExtractor extractor = (is11) ?
098            new URIExtractor11(resolver) : new URIExtractor10(resolver);
099        Parser parser = getParser(format, sink, extractor);
100        for (Setting setting: settings) parser.enable(setting);
101        reader.setContentHandler(parser);
102        reader.setErrorHandler(parser);
103        return reader;
104    }
105
106    private static XMLReader getReader(Format format) throws SAXException {
107        switch (format) {
108            case XHTML:
109                return ParserFactory.createNonvalidatingReader();
110            default:
111                return ParserFactory.createHTML5Reader();
112        }
113    }
114
115    private static Parser getParser(Format format, StatementSink sink,
116            URIExtractor extractor) {
117        return getParser(format, sink, XMLOutputFactory.newInstance(), 
118                XMLEventFactory.newInstance(), extractor);
119    }
120
121    private static Parser getParser(Format format, StatementSink sink,
122            XMLOutputFactory outputFactory, XMLEventFactory eventFactory,
123            URIExtractor extractor) {
124        switch (format) {
125            case XHTML:
126                return new Parser(sink, outputFactory, eventFactory, extractor);
127            default:
128                Parser p = new Parser(sink, outputFactory, eventFactory, extractor);
129                p.enable(Setting.ManualNamespaces);
130                return p;
131        }
132    }
133}
134
135/*
136 * (c) Copyright 2009 University of Bristol
137 * All rights reserved.
138 *
139 * Redistribution and use in source and binary forms, with or without
140 * modification, are permitted provided that the following conditions
141 * are met:
142 * 1. Redistributions of source code must retain the above copyright
143 *    notice, this list of conditions and the following disclaimer.
144 * 2. Redistributions in binary form must reproduce the above copyright
145 *    notice, this list of conditions and the following disclaimer in the
146 *    documentation and/or other materials provided with the distribution.
147 * 3. The name of the author may not be used to endorse or promote products
148 *    derived from this software without specific prior written permission.
149 *
150 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
151 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
152 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
153 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
154 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
155 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
156 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
157 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
158 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
159 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
160 */