001 /* 002 * (c) Copyright 2010 University of Bristol 003 * All rights reserved. 004 * [See end of file] 005 */ 006 package net.rootdev.javardfa.literal; 007 008 import java.io.StringWriter; 009 import java.util.Collection; 010 import java.util.LinkedList; 011 import java.util.List; 012 import java.util.Stack; 013 import javax.xml.XMLConstants; 014 import javax.xml.stream.XMLEventFactory; 015 import javax.xml.stream.XMLEventWriter; 016 import javax.xml.stream.XMLOutputFactory; 017 import javax.xml.stream.XMLStreamException; 018 import javax.xml.stream.XMLStreamWriter; 019 import javax.xml.stream.events.Attribute; 020 import javax.xml.stream.events.StartElement; 021 import javax.xml.stream.events.XMLEvent; 022 import net.rootdev.javardfa.Parser; 023 024 /** 025 * 026 * @author pldms 027 */ 028 public class LiteralCollector { 029 030 final String XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"; 031 032 private final Stack<Collector> collectors; 033 private List<XMLEvent> queuedEvents; 034 private int level; 035 private final Parser parser; 036 private final StartElement fakeEnvelope; 037 private final XMLEventFactory eventFactory; 038 private final XMLOutputFactory outputFactory; 039 040 public LiteralCollector(Parser parser, XMLEventFactory eventFactory, XMLOutputFactory outputFactory) { 041 this.parser = parser; 042 this.collectors = new Stack<Collector>(); 043 this.queuedEvents = null; 044 this.eventFactory = eventFactory; 045 this.outputFactory = outputFactory; 046 this.fakeEnvelope = eventFactory.createStartElement(XMLConstants.DEFAULT_NS_PREFIX, XMLConstants.NULL_NS_URI, "fake"); 047 } 048 049 public boolean isCollecting() { return !collectors.isEmpty(); } 050 051 public boolean isCollectingXML() { 052 if (!isCollecting()) return false; 053 return XMLLiteral.equals(collectors.peek().datatype); 054 } 055 056 public void collect(String subject, Collection<String> props, String datatype, String lang) { 057 if (!isCollecting()) { // set up collection 058 queuedEvents = new LinkedList<XMLEvent>(); 059 level = 0; 060 } 061 062 Collector coll = new Collector(subject, props, datatype, lang, level, queuedEvents.size()); 063 collectors.push(coll); 064 } 065 066 public void handleEvent(XMLEvent event) { 067 if (!isCollecting()) return; // nothing to do 068 if (event.isStartElement()) handleStartEvent(event); 069 else if (event.isEndElement()) handleEndEvent(event); 070 else queuedEvents.add(event); 071 } 072 073 private void handleStartEvent(XMLEvent event) { 074 level++; 075 queuedEvents.add(event); 076 if (collectors.peek().datatype == null) { // undecided so far 077 collectors.peek().datatype = XMLLiteral; 078 } 079 } 080 081 private void handleEndEvent(XMLEvent event) { 082 queuedEvents.add(event); 083 if (collectors.peek().level == level) { 084 Collector coll = collectors.pop(); 085 emitTriples(coll, queuedEvents.subList(coll.start, queuedEvents.size())); 086 } 087 level--; 088 } 089 090 private void emitTriples(Collector coll, List<XMLEvent> subList) { 091 String lex = (XMLLiteral.equals(coll.datatype)) ? 092 gatherXML(subList, coll.lang) : 093 gatherText(subList) ; 094 if ((coll.datatype != null) && !"".equals(coll.datatype)) // not plain 095 parser.emitTriplesDatatypeLiteral(coll.subject, 096 coll.props, lex, coll.datatype); 097 else 098 parser.emitTriplesPlainLiteral(coll.subject, 099 coll.props, lex, coll.lang); 100 } 101 102 private String gatherXML(List<XMLEvent> subList, String lang) { 103 try { 104 return gatherXMLEx(subList, lang); 105 } catch (XMLStreamException ex) { 106 throw new RuntimeException("Problem gathering XML", ex); 107 } 108 } 109 110 private String gatherXMLEx(List<XMLEvent> subList, String lang) 111 throws XMLStreamException { 112 Attribute xmlLang = (lang == null) ? 113 null : 114 eventFactory.createAttribute("xml:lang", lang); 115 StringWriter sw = new StringWriter(); 116 XMLStreamWriter out = outputFactory.createXMLStreamWriter(sw); 117 XMLEventWriter xmlWriter = new CanonicalXMLEventWriter(out, xmlLang); 118 xmlWriter.add(fakeEnvelope); // Some libraries dislike xml fragements 119 for (XMLEvent e: subList) { 120 xmlWriter.add(e); 121 } 122 xmlWriter.flush(); 123 String xml = sw.toString(); 124 int start = xml.indexOf('>') + 1; 125 int end = xml.lastIndexOf('<'); 126 return xml.substring(start, end); // remove <fake ...></fake> 127 } 128 129 private String gatherText(List<XMLEvent> subList) { 130 StringBuilder sb = new StringBuilder(); 131 for (XMLEvent e: subList) { 132 if (e.isCharacters()) sb.append(e.asCharacters().getData()); 133 } 134 return sb.toString(); 135 } 136 137 final static class Collector { 138 private final String subject; 139 private final Collection<String> props; 140 private String datatype; 141 private final String lang; 142 private final int level; 143 private final int start; 144 145 private Collector(String subject, Collection<String> props, String datatype, 146 String lang, int level, int start) { 147 this.subject = subject; 148 this.props = props; 149 this.datatype = datatype; 150 this.lang = lang; 151 this.level = level; 152 this.start = start; 153 } 154 155 } 156 157 } 158 159 /* 160 * (c) Copyright 2009 University of Bristol 161 * All rights reserved. 162 * 163 * Redistribution and use in source and binary forms, with or without 164 * modification, are permitted provided that the following conditions 165 * are met: 166 * 1. Redistributions of source code must retain the above copyright 167 * notice, this list of conditions and the following disclaimer. 168 * 2. Redistributions in binary form must reproduce the above copyright 169 * notice, this list of conditions and the following disclaimer in the 170 * documentation and/or other materials provided with the distribution. 171 * 3. The name of the author may not be used to endorse or promote products 172 * derived from this software without specific prior written permission. 173 * 174 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 175 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 176 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 177 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 178 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 179 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 180 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 181 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 182 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 183 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 184 */