001 /*
002 * (c) Copyright 2009 University of Bristol
003 * All rights reserved.
004 * [See end of file]
005 */
006 package net.rootdev.javardfa;
007
008 import net.rootdev.javardfa.uri.URIExtractor10;
009 import net.rootdev.javardfa.uri.URIExtractor;
010 import net.rootdev.javardfa.uri.URIExtractor11;
011 import net.rootdev.javardfa.uri.IRIResolver;
012 import javax.xml.stream.XMLEventFactory;
013 import javax.xml.stream.XMLOutputFactory;
014 import nu.validator.htmlparser.common.XmlViolationPolicy;
015 import nu.validator.htmlparser.sax.HtmlParser;
016 import org.xml.sax.SAXException;
017 import org.xml.sax.XMLReader;
018 import org.xml.sax.helpers.XMLReaderFactory;
019
020 /**
021 * I use these in a few places. stuck here for simplicity
022 *
023 * @author pldms
024 */
025 public class ParserFactory {
026
027 public enum Format {
028
029 HTML, XHTML;
030
031 public static Format lookup(String format) {
032 if ("xhtml".equalsIgnoreCase(format)) {
033 return XHTML;
034 }
035 if ("html".equalsIgnoreCase(format)) {
036 return HTML;
037 }
038 return null;
039 }
040 }
041
042 /**
043 *
044 * @return An XMLReader with validation turned off
045 * @throws SAXException
046 */
047 public static XMLReader createNonvalidatingReader() throws SAXException {
048 XMLReader reader = XMLReaderFactory.createXMLReader();
049 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
050 try {
051 reader.setFeature("http://www.xml.org/sax/features/validation", false);
052 } catch (Exception e) {} // continue whether this is recognised
053 return reader;
054 }
055
056 /**
057 *
058 * @return An HTML 5 XMLReader set up to by fairly forgiving.
059 */
060 public static XMLReader createHTML5Reader() {
061 HtmlParser reader = new HtmlParser();
062 reader.setXmlPolicy(XmlViolationPolicy.ALLOW);
063 reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW);
064 reader.setMappingLangToXmlLang(false);
065 return reader;
066 }
067
068 /**
069 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
070 * to the StatementSink sink. Uses IRI resolver.
071 *
072 * @param sink
073 * @param format
074 * @return
075 * @throws SAXException
076 */
077 public static XMLReader createReaderForFormat(StatementSink sink,
078 Format format, Setting... settings) throws SAXException {
079 return createReaderForFormat(sink, format, new IRIResolver(), settings);
080 }
081
082 /**
083 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed
084 * to the StatementSink sink.
085 *
086 * @param sink
087 * @param format
088 * @param resolver
089 * @return
090 * @throws SAXException
091 */
092 public static XMLReader createReaderForFormat(StatementSink sink,
093 Format format, Resolver resolver, Setting... settings) throws SAXException {
094 XMLReader reader = getReader(format);
095 boolean is11 = false;
096 for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true;
097 URIExtractor extractor = (is11) ?
098 new URIExtractor11(resolver) : new URIExtractor10(resolver);
099 ProfileCollector profileCollector = (is11) ?
100 new SimpleProfileCollector() : ProfileCollector.EMPTY_COLLECTOR ;
101 Parser parser = getParser(format, sink, extractor, profileCollector);
102 for (Setting setting: settings) parser.enable(setting);
103 reader.setContentHandler(parser);
104 return reader;
105 }
106
107 private static XMLReader getReader(Format format) throws SAXException {
108 switch (format) {
109 case XHTML:
110 return ParserFactory.createNonvalidatingReader();
111 default:
112 return ParserFactory.createHTML5Reader();
113 }
114 }
115
116 private static Parser getParser(Format format, StatementSink sink,
117 URIExtractor extractor, ProfileCollector profileCollector) {
118 return getParser(format, sink, XMLOutputFactory.newInstance(),
119 XMLEventFactory.newInstance(), extractor, profileCollector);
120 }
121
122 private static Parser getParser(Format format, StatementSink sink,
123 XMLOutputFactory outputFactory, XMLEventFactory eventFactory,
124 URIExtractor extractor, ProfileCollector profileCollector) {
125 switch (format) {
126 case XHTML:
127 return new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
128 default:
129 Parser p = new Parser(sink, outputFactory, eventFactory, extractor, profileCollector);
130 p.enable(Setting.ManualNamespaces);
131 return p;
132 }
133 }
134 }
135
136 /*
137 * (c) Copyright 2009 University of Bristol
138 * All rights reserved.
139 *
140 * Redistribution and use in source and binary forms, with or without
141 * modification, are permitted provided that the following conditions
142 * are met:
143 * 1. Redistributions of source code must retain the above copyright
144 * notice, this list of conditions and the following disclaimer.
145 * 2. Redistributions in binary form must reproduce the above copyright
146 * notice, this list of conditions and the following disclaimer in the
147 * documentation and/or other materials provided with the distribution.
148 * 3. The name of the author may not be used to endorse or promote products
149 * derived from this software without specific prior written permission.
150 *
151 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
152 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
153 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
154 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
155 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
156 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
157 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
158 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
159 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
160 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
161 */