001/* 002 * (c) Copyright 2010 University of Bristol 003 * All rights reserved. 004 * [See end of file] 005 */ 006package net.rootdev.javardfa.literal; 007 008import java.io.StringWriter; 009import java.util.Collection; 010import java.util.LinkedList; 011import java.util.List; 012import java.util.Stack; 013import javax.xml.XMLConstants; 014import javax.xml.stream.XMLEventFactory; 015import javax.xml.stream.XMLEventWriter; 016import javax.xml.stream.XMLOutputFactory; 017import javax.xml.stream.XMLStreamException; 018import javax.xml.stream.XMLStreamWriter; 019import javax.xml.stream.events.Attribute; 020import javax.xml.stream.events.StartElement; 021import javax.xml.stream.events.XMLEvent; 022import net.rootdev.javardfa.Parser; 023import net.rootdev.javardfa.Setting; 024 025/** 026 * 027 * @author pldms 028 */ 029public class LiteralCollector { 030 031 final String XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"; 032 033 private final Stack<Collector> collectors; 034 private List<XMLEvent> queuedEvents; 035 private int level; 036 private final Parser parser; 037 private final StartElement fakeEnvelope; 038 private final XMLEventFactory eventFactory; 039 private final XMLOutputFactory outputFactory; 040 041 public LiteralCollector(Parser parser, XMLEventFactory eventFactory, XMLOutputFactory outputFactory) { 042 this.parser = parser; 043 this.collectors = new Stack<Collector>(); 044 this.queuedEvents = null; 045 this.eventFactory = eventFactory; 046 this.outputFactory = outputFactory; 047 this.fakeEnvelope = eventFactory.createStartElement(XMLConstants.DEFAULT_NS_PREFIX, XMLConstants.NULL_NS_URI, "fake"); 048 } 049 050 public boolean isCollecting() { return !collectors.isEmpty(); } 051 052 public boolean isCollectingXML() { 053 if (!isCollecting()) return false; 054 return XMLLiteral.equals(collectors.peek().datatype); 055 } 056 057 public void collect(String subject, Collection<String> props, String datatype, String lang) { 058 if (!isCollecting()) { // set up collection 059 queuedEvents = new LinkedList<XMLEvent>(); 060 level = 0; 061 } 062 063 Collector coll = new Collector(subject, props, datatype, lang, level, queuedEvents.size()); 064 collectors.push(coll); 065 } 066 067 public void handleEvent(XMLEvent event) { 068 if (!isCollecting()) return; // nothing to do 069 if (event.isStartElement()) handleStartEvent(event); 070 else if (event.isEndElement()) handleEndEvent(event); 071 else queuedEvents.add(event); 072 } 073 074 private void handleStartEvent(XMLEvent event) { 075 level++; 076 queuedEvents.add(event); 077 // In 1.0 if no explicit dt given dt determined by content 078 // i.e. if it contains tags we have an xml literal 079 if (!parser.isEnabled(Setting.OnePointOne) && 080 collectors.peek().datatype == null) { // undecided so far 081 collectors.peek().datatype = XMLLiteral; 082 } 083 } 084 085 private void handleEndEvent(XMLEvent event) { 086 queuedEvents.add(event); 087 if (collectors.peek().level == level) { 088 Collector coll = collectors.pop(); 089 emitTriples(coll, queuedEvents.subList(coll.start, queuedEvents.size())); 090 } 091 level--; 092 } 093 094 private void emitTriples(Collector coll, List<XMLEvent> subList) { 095 String lex = (XMLLiteral.equals(coll.datatype)) ? 096 gatherXML(subList, coll.lang) : 097 gatherText(subList) ; 098 if ((coll.datatype != null) && !"".equals(coll.datatype)) // not plain 099 parser.emitTriplesDatatypeLiteral(coll.subject, 100 coll.props, lex, coll.datatype); 101 else 102 parser.emitTriplesPlainLiteral(coll.subject, 103 coll.props, lex, coll.lang); 104 } 105 106 private String gatherXML(List<XMLEvent> subList, String lang) { 107 try { 108 return gatherXMLEx(subList, lang); 109 } catch (XMLStreamException ex) { 110 throw new RuntimeException("Problem gathering XML", ex); 111 } 112 } 113 114 private String gatherXMLEx(List<XMLEvent> subList, String lang) 115 throws XMLStreamException { 116 Attribute xmlLang = (lang == null) ? 117 null : 118 eventFactory.createAttribute("xml:lang", lang); 119 StringWriter sw = new StringWriter(); 120 XMLStreamWriter out = outputFactory.createXMLStreamWriter(sw); 121 XMLEventWriter xmlWriter = new CanonicalXMLEventWriter(out, xmlLang); 122 xmlWriter.add(fakeEnvelope); // Some libraries dislike xml fragements 123 for (XMLEvent e: subList) { 124 xmlWriter.add(e); 125 } 126 xmlWriter.flush(); 127 String xml = sw.toString(); 128 int start = xml.indexOf('>') + 1; 129 int end = xml.lastIndexOf('<'); 130 return xml.substring(start, end); // remove <fake ...></fake> 131 } 132 133 private String gatherText(List<XMLEvent> subList) { 134 StringBuilder sb = new StringBuilder(); 135 for (XMLEvent e: subList) { 136 if (e.isCharacters()) sb.append(e.asCharacters().getData()); 137 } 138 return sb.toString(); 139 } 140 141 final static class Collector { 142 private final String subject; 143 private final Collection<String> props; 144 private String datatype; 145 private final String lang; 146 private final int level; 147 private final int start; 148 149 private Collector(String subject, Collection<String> props, String datatype, 150 String lang, int level, int start) { 151 this.subject = subject; 152 this.props = props; 153 this.datatype = datatype; 154 this.lang = lang; 155 this.level = level; 156 this.start = start; 157 } 158 159 } 160 161} 162 163/* 164 * (c) Copyright 2009 University of Bristol 165 * All rights reserved. 166 * 167 * Redistribution and use in source and binary forms, with or without 168 * modification, are permitted provided that the following conditions 169 * are met: 170 * 1. Redistributions of source code must retain the above copyright 171 * notice, this list of conditions and the following disclaimer. 172 * 2. Redistributions in binary form must reproduce the above copyright 173 * notice, this list of conditions and the following disclaimer in the 174 * documentation and/or other materials provided with the distribution. 175 * 3. The name of the author may not be used to endorse or promote products 176 * derived from this software without specific prior written permission. 177 * 178 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 179 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 180 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 181 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 182 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 183 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 184 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 185 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 186 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 187 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 188 */