001/*
002 * (c) Copyright 2010 University of Bristol
003 * All rights reserved.
004 * [See end of file]
005 */
006package net.rootdev.javardfa.literal;
007
008import java.io.StringWriter;
009import java.util.Collection;
010import java.util.LinkedList;
011import java.util.List;
012import java.util.Stack;
013import javax.xml.XMLConstants;
014import javax.xml.stream.XMLEventFactory;
015import javax.xml.stream.XMLEventWriter;
016import javax.xml.stream.XMLOutputFactory;
017import javax.xml.stream.XMLStreamException;
018import javax.xml.stream.XMLStreamWriter;
019import javax.xml.stream.events.Attribute;
020import javax.xml.stream.events.StartElement;
021import javax.xml.stream.events.XMLEvent;
022import net.rootdev.javardfa.Parser;
023import net.rootdev.javardfa.Setting;
024
025/**
026 *
027 * @author pldms
028 */
029public class LiteralCollector {
030
031    final String XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";
032
033    private final Stack<Collector> collectors;
034    private List<XMLEvent> queuedEvents;
035    private int level;
036    private final Parser parser;
037    private final StartElement fakeEnvelope;
038    private final XMLEventFactory eventFactory;
039    private final XMLOutputFactory outputFactory;
040
041    public LiteralCollector(Parser parser, XMLEventFactory eventFactory, XMLOutputFactory outputFactory) {
042        this.parser = parser;
043        this.collectors = new Stack<Collector>();
044        this.queuedEvents = null;
045        this.eventFactory = eventFactory;
046        this.outputFactory = outputFactory;
047        this.fakeEnvelope = eventFactory.createStartElement(XMLConstants.DEFAULT_NS_PREFIX, XMLConstants.NULL_NS_URI, "fake");
048    }
049
050    public boolean isCollecting() { return !collectors.isEmpty(); }
051
052    public boolean isCollectingXML() {
053        if (!isCollecting()) return false;
054        return XMLLiteral.equals(collectors.peek().datatype);
055    }
056
057    public void collect(String subject, Collection<String> props, String datatype, String lang) {
058        if (!isCollecting()) { // set up collection
059            queuedEvents = new LinkedList<XMLEvent>();
060            level = 0;
061        }
062
063        Collector coll = new Collector(subject, props, datatype, lang, level, queuedEvents.size());
064        collectors.push(coll);
065    }
066
067    public void handleEvent(XMLEvent event) {
068        if (!isCollecting()) return; // nothing to do
069        if (event.isStartElement()) handleStartEvent(event);
070        else if (event.isEndElement()) handleEndEvent(event);
071        else queuedEvents.add(event);
072    }
073
074    private void handleStartEvent(XMLEvent event) {
075        level++;
076        queuedEvents.add(event);
077        // In 1.0 if no explicit dt given dt determined by content
078        // i.e. if it contains tags we have an xml literal
079        if (!parser.isEnabled(Setting.OnePointOne) &&
080            collectors.peek().datatype == null) { // undecided so far
081            collectors.peek().datatype = XMLLiteral;
082        }
083    }
084
085    private void handleEndEvent(XMLEvent event) {
086        queuedEvents.add(event);
087        if (collectors.peek().level == level) { 
088            Collector coll = collectors.pop();
089            emitTriples(coll, queuedEvents.subList(coll.start, queuedEvents.size()));
090        }
091        level--;
092    }
093
094    private void emitTriples(Collector coll, List<XMLEvent> subList) {
095        String lex = (XMLLiteral.equals(coll.datatype)) ?
096            gatherXML(subList, coll.lang) :
097            gatherText(subList) ;
098        if ((coll.datatype != null) && !"".equals(coll.datatype)) // not plain
099            parser.emitTriplesDatatypeLiteral(coll.subject,
100                    coll.props, lex, coll.datatype);
101        else
102            parser.emitTriplesPlainLiteral(coll.subject,
103                    coll.props, lex, coll.lang);
104    }
105
106    private String gatherXML(List<XMLEvent> subList, String lang) {
107        try {
108            return gatherXMLEx(subList, lang);
109        } catch (XMLStreamException ex) {
110            throw new RuntimeException("Problem gathering XML", ex);
111        }
112    }
113
114    private String gatherXMLEx(List<XMLEvent> subList, String lang)
115            throws XMLStreamException {
116        Attribute xmlLang = (lang == null) ?
117            null :
118            eventFactory.createAttribute("xml:lang", lang);
119        StringWriter sw = new StringWriter();
120        XMLStreamWriter out = outputFactory.createXMLStreamWriter(sw);
121        XMLEventWriter xmlWriter = new CanonicalXMLEventWriter(out, xmlLang);
122        xmlWriter.add(fakeEnvelope); // Some libraries dislike xml fragements
123        for (XMLEvent e: subList) {
124            xmlWriter.add(e);
125        }
126        xmlWriter.flush();
127        String xml = sw.toString();
128        int start = xml.indexOf('>') + 1;
129        int end = xml.lastIndexOf('<');
130        return xml.substring(start, end); // remove <fake ...></fake>
131    }
132
133    private String gatherText(List<XMLEvent> subList) {
134        StringBuilder sb = new StringBuilder();
135        for (XMLEvent e: subList) {
136            if (e.isCharacters()) sb.append(e.asCharacters().getData());
137        }
138        return sb.toString();
139    }
140
141    final static class Collector {
142        private final String subject;
143        private final Collection<String> props;
144        private String datatype;
145        private final String lang;
146        private final int level;
147        private final int start;
148
149        private Collector(String subject, Collection<String> props, String datatype,
150                String lang, int level, int start) {
151            this.subject = subject;
152            this.props = props;
153            this.datatype = datatype;
154            this.lang = lang;
155            this.level = level;
156            this.start = start;
157        }
158
159    }
160
161}
162
163/*
164 * (c) Copyright 2009 University of Bristol
165 * All rights reserved.
166 *
167 * Redistribution and use in source and binary forms, with or without
168 * modification, are permitted provided that the following conditions
169 * are met:
170 * 1. Redistributions of source code must retain the above copyright
171 *    notice, this list of conditions and the following disclaimer.
172 * 2. Redistributions in binary form must reproduce the above copyright
173 *    notice, this list of conditions and the following disclaimer in the
174 *    documentation and/or other materials provided with the distribution.
175 * 3. The name of the author may not be used to endorse or promote products
176 *    derived from this software without specific prior written permission.
177 *
178 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
179 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
180 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
181 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
182 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
183 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
184 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
185 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
186 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
187 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
188 */