001 /*
002 * (c) Copyright 2010 University of Bristol
003 * All rights reserved.
004 * [See end of file]
005 */
006 package net.rootdev.javardfa.literal;
007
008 import java.io.StringWriter;
009 import java.util.Collection;
010 import java.util.LinkedList;
011 import java.util.List;
012 import java.util.Stack;
013 import javax.xml.XMLConstants;
014 import javax.xml.stream.XMLEventFactory;
015 import javax.xml.stream.XMLEventWriter;
016 import javax.xml.stream.XMLOutputFactory;
017 import javax.xml.stream.XMLStreamException;
018 import javax.xml.stream.XMLStreamWriter;
019 import javax.xml.stream.events.Attribute;
020 import javax.xml.stream.events.StartElement;
021 import javax.xml.stream.events.XMLEvent;
022 import net.rootdev.javardfa.Parser;
023
024 /**
025 *
026 * @author pldms
027 */
028 public class LiteralCollector {
029
030 final String XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";
031
032 private final Stack<Collector> collectors;
033 private List<XMLEvent> queuedEvents;
034 private int level;
035 private final Parser parser;
036 private final StartElement fakeEnvelope;
037 private final XMLEventFactory eventFactory;
038 private final XMLOutputFactory outputFactory;
039
040 public LiteralCollector(Parser parser, XMLEventFactory eventFactory, XMLOutputFactory outputFactory) {
041 this.parser = parser;
042 this.collectors = new Stack<Collector>();
043 this.queuedEvents = null;
044 this.eventFactory = eventFactory;
045 this.outputFactory = outputFactory;
046 this.fakeEnvelope = eventFactory.createStartElement(XMLConstants.DEFAULT_NS_PREFIX, XMLConstants.NULL_NS_URI, "fake");
047 }
048
049 public boolean isCollecting() { return !collectors.isEmpty(); }
050
051 public boolean isCollectingXML() {
052 if (!isCollecting()) return false;
053 return XMLLiteral.equals(collectors.peek().datatype);
054 }
055
056 public void collect(String subject, Collection<String> props, String datatype, String lang) {
057 if (!isCollecting()) { // set up collection
058 queuedEvents = new LinkedList<XMLEvent>();
059 level = 0;
060 }
061
062 Collector coll = new Collector(subject, props, datatype, lang, level, queuedEvents.size());
063 collectors.push(coll);
064 }
065
066 public void handleEvent(XMLEvent event) {
067 if (!isCollecting()) return; // nothing to do
068 if (event.isStartElement()) handleStartEvent(event);
069 else if (event.isEndElement()) handleEndEvent(event);
070 else queuedEvents.add(event);
071 }
072
073 private void handleStartEvent(XMLEvent event) {
074 level++;
075 queuedEvents.add(event);
076 if (collectors.peek().datatype == null) { // undecided so far
077 collectors.peek().datatype = XMLLiteral;
078 }
079 }
080
081 private void handleEndEvent(XMLEvent event) {
082 queuedEvents.add(event);
083 if (collectors.peek().level == level) {
084 Collector coll = collectors.pop();
085 emitTriples(coll, queuedEvents.subList(coll.start, queuedEvents.size()));
086 }
087 level--;
088 }
089
090 private void emitTriples(Collector coll, List<XMLEvent> subList) {
091 String lex = (XMLLiteral.equals(coll.datatype)) ?
092 gatherXML(subList, coll.lang) :
093 gatherText(subList) ;
094 if ((coll.datatype != null) && !"".equals(coll.datatype)) // not plain
095 parser.emitTriplesDatatypeLiteral(coll.subject,
096 coll.props, lex, coll.datatype);
097 else
098 parser.emitTriplesPlainLiteral(coll.subject,
099 coll.props, lex, coll.lang);
100 }
101
102 private String gatherXML(List<XMLEvent> subList, String lang) {
103 try {
104 return gatherXMLEx(subList, lang);
105 } catch (XMLStreamException ex) {
106 throw new RuntimeException("Problem gathering XML", ex);
107 }
108 }
109
110 private String gatherXMLEx(List<XMLEvent> subList, String lang)
111 throws XMLStreamException {
112 Attribute xmlLang = (lang == null) ?
113 null :
114 eventFactory.createAttribute("xml:lang", lang);
115 StringWriter sw = new StringWriter();
116 XMLStreamWriter out = outputFactory.createXMLStreamWriter(sw);
117 XMLEventWriter xmlWriter = new CanonicalXMLEventWriter(out, xmlLang);
118 xmlWriter.add(fakeEnvelope); // Some libraries dislike xml fragements
119 for (XMLEvent e: subList) {
120 xmlWriter.add(e);
121 }
122 xmlWriter.flush();
123 String xml = sw.toString();
124 int start = xml.indexOf('>') + 1;
125 int end = xml.lastIndexOf('<');
126 return xml.substring(start, end); // remove <fake ...></fake>
127 }
128
129 private String gatherText(List<XMLEvent> subList) {
130 StringBuilder sb = new StringBuilder();
131 for (XMLEvent e: subList) {
132 if (e.isCharacters()) sb.append(e.asCharacters().getData());
133 }
134 return sb.toString();
135 }
136
137 final static class Collector {
138 private final String subject;
139 private final Collection<String> props;
140 private String datatype;
141 private final String lang;
142 private final int level;
143 private final int start;
144
145 private Collector(String subject, Collection<String> props, String datatype,
146 String lang, int level, int start) {
147 this.subject = subject;
148 this.props = props;
149 this.datatype = datatype;
150 this.lang = lang;
151 this.level = level;
152 this.start = start;
153 }
154
155 }
156
157 }
158
159 /*
160 * (c) Copyright 2009 University of Bristol
161 * All rights reserved.
162 *
163 * Redistribution and use in source and binary forms, with or without
164 * modification, are permitted provided that the following conditions
165 * are met:
166 * 1. Redistributions of source code must retain the above copyright
167 * notice, this list of conditions and the following disclaimer.
168 * 2. Redistributions in binary form must reproduce the above copyright
169 * notice, this list of conditions and the following disclaimer in the
170 * documentation and/or other materials provided with the distribution.
171 * 3. The name of the author may not be used to endorse or promote products
172 * derived from this software without specific prior written permission.
173 *
174 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
175 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
176 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
177 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
178 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
179 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
180 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
181 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
182 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
183 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
184 */