001/* 002 * (c) Copyright 2009 University of Bristol 003 * All rights reserved. 004 * [See end of file] 005 */ 006package net.rootdev.javardfa; 007 008import net.rootdev.javardfa.uri.URIExtractor10; 009import net.rootdev.javardfa.uri.URIExtractor; 010import net.rootdev.javardfa.uri.URIExtractor11; 011import net.rootdev.javardfa.uri.IRIResolver; 012import javax.xml.stream.XMLEventFactory; 013import javax.xml.stream.XMLOutputFactory; 014import nu.validator.htmlparser.common.XmlViolationPolicy; 015import nu.validator.htmlparser.sax.HtmlParser; 016import org.xml.sax.SAXException; 017import org.xml.sax.XMLReader; 018import org.xml.sax.helpers.XMLReaderFactory; 019 020/** 021 * I use these in a few places. stuck here for simplicity 022 * 023 * @author pldms 024 */ 025public class ParserFactory { 026 027 public enum Format { 028 029 HTML, XHTML; 030 031 public static Format lookup(String format) { 032 if ("xhtml".equalsIgnoreCase(format)) { 033 return XHTML; 034 } 035 if ("html".equalsIgnoreCase(format)) { 036 return HTML; 037 } 038 return null; 039 } 040 } 041 042 /** 043 * 044 * @return An XMLReader with validation turned off 045 * @throws SAXException 046 */ 047 public static XMLReader createNonvalidatingReader() throws SAXException { 048 XMLReader reader = XMLReaderFactory.createXMLReader(); 049 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 050 try { 051 reader.setFeature("http://www.xml.org/sax/features/validation", false); 052 } catch (Exception e) {} // continue whether this is recognised 053 return reader; 054 } 055 056 /** 057 * 058 * @return An HTML 5 XMLReader set up to by fairly forgiving. 059 */ 060 public static XMLReader createHTML5Reader() { 061 HtmlParser reader = new HtmlParser(); 062 reader.setXmlPolicy(XmlViolationPolicy.ALLOW); 063 reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW); 064 reader.setMappingLangToXmlLang(false); 065 return reader; 066 } 067 068 /** 069 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed 070 * to the StatementSink sink. Uses IRI resolver. 071 * 072 * @param sink 073 * @param format 074 * @return 075 * @throws SAXException 076 */ 077 public static XMLReader createReaderForFormat(StatementSink sink, 078 Format format, Setting... settings) throws SAXException { 079 return createReaderForFormat(sink, format, new IRIResolver(), settings); 080 } 081 082 /** 083 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed 084 * to the StatementSink sink. 085 * 086 * @param sink 087 * @param format 088 * @param resolver 089 * @return 090 * @throws SAXException 091 */ 092 public static XMLReader createReaderForFormat(StatementSink sink, 093 Format format, Resolver resolver, Setting... settings) throws SAXException { 094 XMLReader reader = getReader(format); 095 boolean is11 = false; 096 for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true; 097 URIExtractor extractor = (is11) ? 098 new URIExtractor11(resolver) : new URIExtractor10(resolver); 099 Parser parser = getParser(format, sink, extractor); 100 for (Setting setting: settings) parser.enable(setting); 101 reader.setContentHandler(parser); 102 reader.setErrorHandler(parser); 103 return reader; 104 } 105 106 private static XMLReader getReader(Format format) throws SAXException { 107 switch (format) { 108 case XHTML: 109 return ParserFactory.createNonvalidatingReader(); 110 default: 111 return ParserFactory.createHTML5Reader(); 112 } 113 } 114 115 private static Parser getParser(Format format, StatementSink sink, 116 URIExtractor extractor) { 117 return getParser(format, sink, XMLOutputFactory.newInstance(), 118 XMLEventFactory.newInstance(), extractor); 119 } 120 121 private static Parser getParser(Format format, StatementSink sink, 122 XMLOutputFactory outputFactory, XMLEventFactory eventFactory, 123 URIExtractor extractor) { 124 switch (format) { 125 case XHTML: 126 return new Parser(sink, outputFactory, eventFactory, extractor); 127 default: 128 Parser p = new Parser(sink, outputFactory, eventFactory, extractor); 129 p.enable(Setting.ManualNamespaces); 130 return p; 131 } 132 } 133} 134 135/* 136 * (c) Copyright 2009 University of Bristol 137 * All rights reserved. 138 * 139 * Redistribution and use in source and binary forms, with or without 140 * modification, are permitted provided that the following conditions 141 * are met: 142 * 1. Redistributions of source code must retain the above copyright 143 * notice, this list of conditions and the following disclaimer. 144 * 2. Redistributions in binary form must reproduce the above copyright 145 * notice, this list of conditions and the following disclaimer in the 146 * documentation and/or other materials provided with the distribution. 147 * 3. The name of the author may not be used to endorse or promote products 148 * derived from this software without specific prior written permission. 149 * 150 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 151 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 152 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 153 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 154 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 155 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 156 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 157 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 158 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 159 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 160 */