/* WikiContent2HTML - Servlet to export a JSPWiki page into a clean HTML file. Copyright (C) 2009 Kyriakos Galatis (kakaouskia@gmail.com) This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.kyriakosgalatis.jspwiki; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import javax.servlet.ServletConfig; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.log4j.Logger; import org.w3c.dom.Document; import org.w3c.tidy.Tidy; import com.ecyrd.jspwiki.WikiContext; import com.ecyrd.jspwiki.WikiEngine; import com.ecyrd.jspwiki.WikiPage; /** * Servlet to convert a JSPWiki HTML page into a clean HTML page. * *

The purpose of this servlet is to enable clean HTML output of an JSPWiki document. This resolves wrong formating * of tables and other HTML content, usually entered by copying from external sources * or from other plugins. * *

Parameters: *

* *

Installation: *

  • Copy wikicontent2html-jar-with-dependencies.jar * to /WEB-INF/lib of your JSPWiki installation
  • * *
  • Enter the following in the web.xml file: * *
     * <servlet>
     *     <servlet-name>WikiContent2HTMLServlet</servlet-name>
     *     <servlet-class>com.kyriakosgalatis.jspwiki.WikiContent2HTMLServlet</servlet-class>
     * </servlet>
     * 
     * <servlet-mapping>
     *     <servlet-name>WikiContent2HTMLServlet</servlet-name>
     *     <url-pattern>/wiki.html</url-pattern>
     * </servlet-mapping>
     * 
    * *

    3: Add the call to the servlet somewhere on your page, * templates/default/Favorites.jsp is my preferred place, as it gives an option to the More... menu: * *

     *        
  • View as HTML
  • *
    * *

    4: Create the file default.properties in /templates/ folder of the JSPWiki installation * (if it does not already exists) and add the following content: * * actions.viewhtml=View Page HTML * actions.viewhtml.title=View HTML * * * * @author Kyriakos Galatis * @version 1.0 */ public class WikiContent2HTMLServlet extends HttpServlet { private static final long serialVersionUID = 1L; protected Logger log = Logger.getLogger(this.getClass().getName()); //Logging mechanism protected WikiEngine eng_wikiEngine; protected static final String HTML_HEADER = ""; protected static final String HTML_TRAILER = "</body></html>"; protected static final String DEFAULT_TIDY_OPTION = "YES"; /* * Servlet initialization / Configuration * * @see javax.servlet.Servlet#init(javax.servlet.ServletConfig) * */ public void init(final ServletConfig config) throws ServletException { super.init(config); eng_wikiEngine = WikiEngine.getInstance(config); } // End of init() method /** * Get formal HTML. * Use a byte stream to the response to send it to the browser. * * @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest, * javax.servlet.http.HttpServletResponse) */ public void doGet(final HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { // Get the name of the page String pageName = request.getParameter("page"); if ((pageName == null) || ("".equals(pageName.toString()))) { throw new IllegalArgumentException("Invalid parameter \"page\"."); } // Get the Wiki Context WikiContext context = new WikiContext(eng_wikiEngine, request, new WikiPage(eng_wikiEngine, pageName)); if (!context.hasAccess(response, true)) { return; } // Get encoding options, default to machine character set String encoding = request.getParameter("encoding"); if(encoding == null || ("".equals(encoding.trim()))) { encoding = response.getCharacterEncoding(); } encoding = encoding.trim(); // Get Tidy options String tidyOption = request.getParameter("tidy"); if ((tidyOption == null) || ("".equals(tidyOption.trim()))) { tidyOption = DEFAULT_TIDY_OPTION; } // Generate a formal HTML page String formalHTMLPage = HTML_HEADER + pageName + "" + eng_wikiEngine.getHTML(pageName) + HTML_TRAILER; log.debug("Successfully generated formal HTML code for page " + pageName); // Convert attached image paths to absolute String relativePattern = request.getContextPath() + "/"; String absolutePattern = getBaseURL(request); formalHTMLPage = formalHTMLPage.replace(relativePattern,absolutePattern); log.debug("Successfully connerted paths to absolute"); //System.out.println("HTML after replace: " + htmlPage); // Cleanup HTML code if (tidyOption.toUpperCase().equals("YES")) { try { formalHTMLPage = cleanupHTML(formalHTMLPage, encoding); log.debug("HTML cleaned up."); } catch (Throwable t) { log.error("Throwable caught while cleaning up html.", t); throw new ServletException(t); } } // end if // Send formal HTML to browser // Setup the output buffer and write the HTML ByteArrayOutputStream outStream = new ByteArrayOutputStream(); outStream.write(formalHTMLPage.getBytes(encoding)); // Prepare the response response.setHeader("Cache-control", "private"); response.setHeader("Pragma", "private"); response.setContentType("text/html"); response.setContentLength(outStream.size()); // Send HTML content to Browser response.getOutputStream().write(outStream.toByteArray()); response.getOutputStream().flush(); } // end of content2HTML method /** * Cleanup HTML into XML. Transform that XML into a PDF byte stream. Write * the byte stream to the response to send it to the browser. * * @param formalHTML * * The complete HTML document for cleanup. * * @throws Exception * * If something goes wrong while cleaning up. * * @return htmlDocument * * The cleaned up XML * * */ private String cleanupHTML(final String formalHTML, final String encoding) { // Document should be declared outside try / catch block Document xmlDocument = null; try { // Convert HTML to bytes. InputStream inStream = new ByteArrayInputStream(formalHTML.getBytes()); Tidy tidy = new Tidy(); tidy.setXmlOut(true); // Encoding is handled by Tidy tidy.setInputEncoding(encoding); tidy.setOutputEncoding(encoding); xmlDocument = tidy.parseDOM(inStream,null); } catch (Exception e) { log.error("Something went wrong while cleaning up HTML with Tidy. Message: " + e.getMessage()); log.error("Something went wrong while cleaning up HTML with Tidy. Stack Trace: " + e.getStackTrace()); } return convertDocument(xmlDocument).toString(); } // end of cleanupHTML method private static String getBaseURL(HttpServletRequest request) { return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getContextPath() + "/"; } // end of getBaseURL method /** * Convert Document to String * * * @param Document - An XML Document, as it is returned by * org.w3c.tidy.Tidy.parseDOM(InputStream arg0, OutputStream arg1) * * */ private StringBuilder convertDocument(Document doc) { StringBuilder stringBuilder = null; try { ByteArrayOutputStream stream = new ByteArrayOutputStream(); Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.transform(new DOMSource(doc), new StreamResult(stream)); stringBuilder = new StringBuilder(stream.toString()); } catch (Exception e) { log.error("Cannot convert Document to String. Message: " + e.getMessage()); log.error("Cannot convert Document to String. Stack Trace: " + e.getStackTrace()); } return stringBuilder; } // end of convertDocument method } //End of WikiContent2HTMLServlet class