001 /* 002 * (c) Copyright 2009 University of Bristol 003 * All rights reserved. 004 * [See end of file] 005 */ 006 package net.rootdev.javardfa; 007 008 import net.rootdev.javardfa.uri.URIExtractor10; 009 import net.rootdev.javardfa.uri.URIExtractor; 010 import net.rootdev.javardfa.uri.URIExtractor11; 011 import net.rootdev.javardfa.uri.IRIResolver; 012 import javax.xml.stream.XMLEventFactory; 013 import javax.xml.stream.XMLOutputFactory; 014 import nu.validator.htmlparser.common.XmlViolationPolicy; 015 import nu.validator.htmlparser.sax.HtmlParser; 016 import org.xml.sax.SAXException; 017 import org.xml.sax.XMLReader; 018 import org.xml.sax.helpers.XMLReaderFactory; 019 020 /** 021 * I use these in a few places. stuck here for simplicity 022 * 023 * @author pldms 024 */ 025 public class ParserFactory { 026 027 public enum Format { 028 029 HTML, XHTML; 030 031 public static Format lookup(String format) { 032 if ("xhtml".equalsIgnoreCase(format)) { 033 return XHTML; 034 } 035 if ("html".equalsIgnoreCase(format)) { 036 return HTML; 037 } 038 return null; 039 } 040 } 041 042 /** 043 * 044 * @return An XMLReader with validation turned off 045 * @throws SAXException 046 */ 047 public static XMLReader createNonvalidatingReader() throws SAXException { 048 XMLReader reader = XMLReaderFactory.createXMLReader(); 049 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 050 try { 051 reader.setFeature("http://www.xml.org/sax/features/validation", false); 052 } catch (Exception e) {} // continue whether this is recognised 053 return reader; 054 } 055 056 /** 057 * 058 * @return An HTML 5 XMLReader set up to by fairly forgiving. 059 */ 060 public static XMLReader createHTML5Reader() { 061 HtmlParser reader = new HtmlParser(); 062 reader.setXmlPolicy(XmlViolationPolicy.ALLOW); 063 reader.setXmlnsPolicy(XmlViolationPolicy.ALLOW); 064 reader.setMappingLangToXmlLang(false); 065 return reader; 066 } 067 068 /** 069 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed 070 * to the StatementSink sink. Uses IRI resolver. 071 * 072 * @param sink 073 * @param format 074 * @return 075 * @throws SAXException 076 */ 077 public static XMLReader createReaderForFormat(StatementSink sink, 078 Format format, Setting... settings) throws SAXException { 079 return createReaderForFormat(sink, format, new IRIResolver(), settings); 080 } 081 082 /** 083 * Makes an XMLReader appropriate to the format, with an rdfa parser plumbed 084 * to the StatementSink sink. 085 * 086 * @param sink 087 * @param format 088 * @param resolver 089 * @return 090 * @throws SAXException 091 */ 092 public static XMLReader createReaderForFormat(StatementSink sink, 093 Format format, Resolver resolver, Setting... settings) throws SAXException { 094 XMLReader reader = getReader(format); 095 boolean is11 = false; 096 for (Setting setting: settings) if (setting == Setting.OnePointOne) is11 = true; 097 URIExtractor extractor = (is11) ? 098 new URIExtractor11(resolver) : new URIExtractor10(resolver); 099 ProfileCollector profileCollector = (is11) ? 100 new SimpleProfileCollector() : ProfileCollector.EMPTY_COLLECTOR ; 101 Parser parser = getParser(format, sink, extractor, profileCollector); 102 for (Setting setting: settings) parser.enable(setting); 103 reader.setContentHandler(parser); 104 return reader; 105 } 106 107 private static XMLReader getReader(Format format) throws SAXException { 108 switch (format) { 109 case XHTML: 110 return ParserFactory.createNonvalidatingReader(); 111 default: 112 return ParserFactory.createHTML5Reader(); 113 } 114 } 115 116 private static Parser getParser(Format format, StatementSink sink, 117 URIExtractor extractor, ProfileCollector profileCollector) { 118 return getParser(format, sink, XMLOutputFactory.newInstance(), 119 XMLEventFactory.newInstance(), extractor, profileCollector); 120 } 121 122 private static Parser getParser(Format format, StatementSink sink, 123 XMLOutputFactory outputFactory, XMLEventFactory eventFactory, 124 URIExtractor extractor, ProfileCollector profileCollector) { 125 switch (format) { 126 case XHTML: 127 return new Parser(sink, outputFactory, eventFactory, extractor, profileCollector); 128 default: 129 Parser p = new Parser(sink, outputFactory, eventFactory, extractor, profileCollector); 130 p.enable(Setting.ManualNamespaces); 131 return p; 132 } 133 } 134 } 135 136 /* 137 * (c) Copyright 2009 University of Bristol 138 * All rights reserved. 139 * 140 * Redistribution and use in source and binary forms, with or without 141 * modification, are permitted provided that the following conditions 142 * are met: 143 * 1. Redistributions of source code must retain the above copyright 144 * notice, this list of conditions and the following disclaimer. 145 * 2. Redistributions in binary form must reproduce the above copyright 146 * notice, this list of conditions and the following disclaimer in the 147 * documentation and/or other materials provided with the distribution. 148 * 3. The name of the author may not be used to endorse or promote products 149 * derived from this software without specific prior written permission. 150 * 151 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 152 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 153 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 154 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 155 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 156 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 157 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 158 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 159 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 160 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 161 */