001    /*
002     * (c) Copyright 2010 University of Bristol
003     * All rights reserved.
004     * [See end of file]
005     */
006    package net.rootdev.javardfa.literal;
007    
008    import java.io.StringWriter;
009    import java.util.Collection;
010    import java.util.LinkedList;
011    import java.util.List;
012    import java.util.Stack;
013    import javax.xml.XMLConstants;
014    import javax.xml.stream.XMLEventFactory;
015    import javax.xml.stream.XMLEventWriter;
016    import javax.xml.stream.XMLOutputFactory;
017    import javax.xml.stream.XMLStreamException;
018    import javax.xml.stream.XMLStreamWriter;
019    import javax.xml.stream.events.Attribute;
020    import javax.xml.stream.events.StartElement;
021    import javax.xml.stream.events.XMLEvent;
022    import net.rootdev.javardfa.Parser;
023    
024    /**
025     *
026     * @author pldms
027     */
028    public class LiteralCollector {
029    
030        final String XMLLiteral = "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";
031    
032        private final Stack<Collector> collectors;
033        private List<XMLEvent> queuedEvents;
034        private int level;
035        private final Parser parser;
036        private final StartElement fakeEnvelope;
037        private final XMLEventFactory eventFactory;
038        private final XMLOutputFactory outputFactory;
039    
040        public LiteralCollector(Parser parser, XMLEventFactory eventFactory, XMLOutputFactory outputFactory) {
041            this.parser = parser;
042            this.collectors = new Stack<Collector>();
043            this.queuedEvents = null;
044            this.eventFactory = eventFactory;
045            this.outputFactory = outputFactory;
046            this.fakeEnvelope = eventFactory.createStartElement(XMLConstants.DEFAULT_NS_PREFIX, XMLConstants.NULL_NS_URI, "fake");
047        }
048    
049        public boolean isCollecting() { return !collectors.isEmpty(); }
050    
051        public boolean isCollectingXML() {
052            if (!isCollecting()) return false;
053            return XMLLiteral.equals(collectors.peek().datatype);
054        }
055    
056        public void collect(String subject, Collection<String> props, String datatype, String lang) {
057            if (!isCollecting()) { // set up collection
058                queuedEvents = new LinkedList<XMLEvent>();
059                level = 0;
060            }
061    
062            Collector coll = new Collector(subject, props, datatype, lang, level, queuedEvents.size());
063            collectors.push(coll);
064        }
065    
066        public void handleEvent(XMLEvent event) {
067            if (!isCollecting()) return; // nothing to do
068            if (event.isStartElement()) handleStartEvent(event);
069            else if (event.isEndElement()) handleEndEvent(event);
070            else queuedEvents.add(event);
071        }
072    
073        private void handleStartEvent(XMLEvent event) {
074            level++;
075            queuedEvents.add(event);
076            if (collectors.peek().datatype == null) { // undecided so far
077                collectors.peek().datatype = XMLLiteral;
078            }
079        }
080    
081        private void handleEndEvent(XMLEvent event) {
082            queuedEvents.add(event);
083            if (collectors.peek().level == level) { 
084                Collector coll = collectors.pop();
085                emitTriples(coll, queuedEvents.subList(coll.start, queuedEvents.size()));
086            }
087            level--;
088        }
089    
090        private void emitTriples(Collector coll, List<XMLEvent> subList) {
091            String lex = (XMLLiteral.equals(coll.datatype)) ?
092                gatherXML(subList, coll.lang) :
093                gatherText(subList) ;
094            if ((coll.datatype != null) && !"".equals(coll.datatype)) // not plain
095                parser.emitTriplesDatatypeLiteral(coll.subject,
096                        coll.props, lex, coll.datatype);
097            else
098                parser.emitTriplesPlainLiteral(coll.subject,
099                        coll.props, lex, coll.lang);
100        }
101    
102        private String gatherXML(List<XMLEvent> subList, String lang) {
103            try {
104                return gatherXMLEx(subList, lang);
105            } catch (XMLStreamException ex) {
106                throw new RuntimeException("Problem gathering XML", ex);
107            }
108        }
109    
110        private String gatherXMLEx(List<XMLEvent> subList, String lang)
111                throws XMLStreamException {
112            Attribute xmlLang = (lang == null) ?
113                null :
114                eventFactory.createAttribute("xml:lang", lang);
115            StringWriter sw = new StringWriter();
116            XMLStreamWriter out = outputFactory.createXMLStreamWriter(sw);
117            XMLEventWriter xmlWriter = new CanonicalXMLEventWriter(out, xmlLang);
118            xmlWriter.add(fakeEnvelope); // Some libraries dislike xml fragements
119            for (XMLEvent e: subList) {
120                xmlWriter.add(e);
121            }
122            xmlWriter.flush();
123            String xml = sw.toString();
124            int start = xml.indexOf('>') + 1;
125            int end = xml.lastIndexOf('<');
126            return xml.substring(start, end); // remove <fake ...></fake>
127        }
128    
129        private String gatherText(List<XMLEvent> subList) {
130            StringBuilder sb = new StringBuilder();
131            for (XMLEvent e: subList) {
132                if (e.isCharacters()) sb.append(e.asCharacters().getData());
133            }
134            return sb.toString();
135        }
136    
137        final static class Collector {
138            private final String subject;
139            private final Collection<String> props;
140            private String datatype;
141            private final String lang;
142            private final int level;
143            private final int start;
144    
145            private Collector(String subject, Collection<String> props, String datatype,
146                    String lang, int level, int start) {
147                this.subject = subject;
148                this.props = props;
149                this.datatype = datatype;
150                this.lang = lang;
151                this.level = level;
152                this.start = start;
153            }
154    
155        }
156    
157    }
158    
159    /*
160     * (c) Copyright 2009 University of Bristol
161     * All rights reserved.
162     *
163     * Redistribution and use in source and binary forms, with or without
164     * modification, are permitted provided that the following conditions
165     * are met:
166     * 1. Redistributions of source code must retain the above copyright
167     *    notice, this list of conditions and the following disclaimer.
168     * 2. Redistributions in binary form must reproduce the above copyright
169     *    notice, this list of conditions and the following disclaimer in the
170     *    documentation and/or other materials provided with the distribution.
171     * 3. The name of the author may not be used to endorse or promote products
172     *    derived from this software without specific prior written permission.
173     *
174     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
175     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
176     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
177     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
178     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
179     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
180     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
181     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
182     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
183     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
184     */