001    /*
002     * (c) Copyright 2009 University of Bristol
003     * All rights reserved.
004     * [See end of file]
005     */
006    package net.rootdev.javardfa;
007    
008    import net.rootdev.javardfa.uri.URIExtractor10;
009    import net.rootdev.javardfa.uri.URIExtractor;
010    import net.rootdev.javardfa.uri.URIExtractor11;
011    import net.rootdev.javardfa.uri.IRIResolver;
012    import javax.xml.stream.XMLEventFactory;
013    import javax.xml.stream.XMLOutputFactory;
014    import nu.validator.htmlparser.common.XmlViolationPolicy;
015    import nu.validator.htmlparser.sax.HtmlParser;
016    import org.xml.sax.SAXException;
017    import org.xml.sax.XMLReader;
018    import org.xml.sax.helpers.XMLReaderFactory;
019    
020    /**
021     * I use these in a few places. stuck here for simplicity
022     *
023     * @author pldms
024     */
025    public class ParserFactory {
026    
027        public enum Format {
028    
029            HTML, XHTML;
030    
031            public static Format lookup(String format) {
032                if ("xhtml".equalsIgnoreCase(format)) {
033                    return XHTML;
034                }
035                if ("html".equalsIgnoreCase(format)) {
036                    return HTML;
037                }
038                return null;
039            }
040        }
041    
042        /**
043         *
044         * @return An XMLReader with validation turned off
045         * @throws SAXException
046         */
047        public static XMLReader createNonvalidatingReader() throws SAXException {
048            XMLReader reader = XMLReaderFactory.createXMLReader();
049            reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
050            try {
051                reader.setFeature("http://www.xml.org/sax/features/validation", false);
052            } catch (Exception e) {} // continue whether this is recognised
053            return reader;
054        }
055    
056        /**
057         *
058         * @return An HTML 5 XMLReader set up to by fairly forgiving.
059         */
060        public static XMLReader createHTML5Reader() {
061            HtmlParser reader = new HtmlParser();
062            reader.setXmlPolicy(XmlViolationPolicy.ALLOW);
063            reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
064            reader.setMappingLangToXmlLang(false);
065            return reader;
066        }
067    
068        /**
069         * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
070         * to the StatementSink sink. Uses IRI resolver.
071         *
072         * @param sink
073         * @param format
074         * @return
075         * @throws SAXException
076         */
077        public static XMLReader createReaderForFormat(StatementSink sink,
078                Format format, Setting... settings) throws SAXException {
079            return createReaderForFormat(sink, format, new IRIResolver(), settings);
080        }
081    
082        /**
083         * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
084         * to the StatementSink sink.
085         *
086         * @param sink
087         * @param format
088         * @param resolver
089         * @return
090         * @throws SAXException
091         */
092        public static XMLReader createReaderForFormat(StatementSink sink,
093                Format format, Resolver resolver, Setting... settings) throws SAXException {
094            XMLReader reader = getReader(format);
095            boolean is11 = false;
096            for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true;
097            URIExtractor extractor = (is11) ?
098                new URIExtractor11(resolver) : new URIExtractor10(resolver);
099            ProfileCollector profileCollector = (is11) ?
100                new SimpleProfileCollector() : ProfileCollector.EMPTY_COLLECTOR ;
101            Parser parser = getParser(format, sink, extractor, profileCollector);
102            for (Setting setting: settings) parser.enable(setting);
103            reader.setContentHandler(parser);
104            return reader;
105        }
106    
107        private static XMLReader getReader(Format format) throws SAXException {
108            switch (format) {
109                case XHTML:
110                    return ParserFactory.createNonvalidatingReader();
111                default:
112                    return ParserFactory.createHTML5Reader();
113            }
114        }
115    
116        private static Parser getParser(Format format, StatementSink sink,
117                URIExtractor extractor, ProfileCollector profileCollector) {
118            return getParser(format, sink, XMLOutputFactory.newInstance(), 
119                    XMLEventFactory.newInstance(), extractor, profileCollector);
120        }
121    
122        private static Parser getParser(Format format, StatementSink sink,
123                XMLOutputFactory outputFactory, XMLEventFactory eventFactory,
124                URIExtractor extractor, ProfileCollector profileCollector) {
125            switch (format) {
126                case XHTML:
127                    return new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
128                default:
129                    Parser p = new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
130                    p.enable(Setting.ManualNamespaces);
131                    return p;
132            }
133        }
134    }
135    
136    /*
137     * (c) Copyright 2009 University of Bristol
138     * All rights reserved.
139     *
140     * Redistribution and use in source and binary forms, with or without
141     * modification, are permitted provided that the following conditions
142     * are met:
143     * 1. Redistributions of source code must retain the above copyright
144     *    notice, this list of conditions and the following disclaimer.
145     * 2. Redistributions in binary form must reproduce the above copyright
146     *    notice, this list of conditions and the following disclaimer in the
147     *    documentation and/or other materials provided with the distribution.
148     * 3. The name of the author may not be used to endorse or promote products
149     *    derived from this software without specific prior written permission.
150     *
151     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
152     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
153     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
154     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
155     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
156     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
157     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
158     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
159     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
160     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
161     */