/*
WikiContent2HTML - Servlet to export a JSPWiki page into a clean HTML file.
Copyright (C) 2009 Kyriakos Galatis (kakaouskia@gmail.com)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.kyriakosgalatis.jspwiki;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
import com.ecyrd.jspwiki.WikiContext;
import com.ecyrd.jspwiki.WikiEngine;
import com.ecyrd.jspwiki.WikiPage;
/**
* Servlet to convert a JSPWiki HTML page into a clean HTML page.
*
*
The purpose of this servlet is to enable clean HTML output of an JSPWiki document. This resolves wrong formating
* of tables and other HTML content, usually entered by copying from external sources
* or from other plugins.
*
* Parameters:
*
* - page - The name of the page. Will be used as the HTML title.
* (Required)
* - tidy - Define the use of the HTML Tidy class for cleanup of the HTML code.
* Should be used for correct display; set to no only if you have problem with the encoding.
* Default value: yes. (Optional)
* - encoding - ID of the encoding to be used like UTF-8.
* If not set, default character set of the machine will be used.
* (Optional)
*
*
* Installation:
* Copy wikicontent2html-jar-with-dependencies.jar
* to /WEB-INF/lib of your JSPWiki installation
*
* Enter the following in the web.xml file:
*
*
* <servlet>
* <servlet-name>WikiContent2HTMLServlet</servlet-name>
* <servlet-class>com.kyriakosgalatis.jspwiki.WikiContent2HTMLServlet</servlet-class>
* </servlet>
*
* <servlet-mapping>
* <servlet-name>WikiContent2HTMLServlet</servlet-name>
* <url-pattern>/wiki.html</url-pattern>
* </servlet-mapping>
*
*
* 3: Add the call to the servlet somewhere on your page,
* templates/default/Favorites.jsp is my preferred place, as it gives an option to the More... menu:
*
*
*
View as HTML
*
*
* 4: Create the file default.properties in /templates/ folder of the JSPWiki installation
* (if it does not already exists) and add the following content:
*
* actions.viewhtml=View Page HTML
* actions.viewhtml.title=View HTML
*
*
*
* @author Kyriakos Galatis
* @version 1.0
*/
public class WikiContent2HTMLServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
protected Logger log = Logger.getLogger(this.getClass().getName()); //Logging mechanism
protected WikiEngine eng_wikiEngine;
protected static final String HTML_HEADER = "";
protected static final String HTML_TRAILER = "";
protected static final String DEFAULT_TIDY_OPTION = "YES";
/*
* Servlet initialization / Configuration
*
* @see javax.servlet.Servlet#init(javax.servlet.ServletConfig)
*
*/
public void init(final ServletConfig config) throws ServletException {
super.init(config);
eng_wikiEngine = WikiEngine.getInstance(config);
} // End of init() method
/**
* Get formal HTML.
* Use a byte stream to the response to send it to the browser.
*
* @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest,
* javax.servlet.http.HttpServletResponse)
*/
public void doGet(final HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException {
// Get the name of the page
String pageName = request.getParameter("page");
if ((pageName == null) || ("".equals(pageName.toString()))) {
throw new IllegalArgumentException("Invalid parameter \"page\".");
}
// Get the Wiki Context
WikiContext context = new WikiContext(eng_wikiEngine, request, new WikiPage(eng_wikiEngine, pageName));
if (!context.hasAccess(response, true)) {
return;
}
// Get encoding options, default to machine character set
String encoding = request.getParameter("encoding");
if(encoding == null || ("".equals(encoding.trim()))) {
encoding = response.getCharacterEncoding();
}
encoding = encoding.trim();
// Get Tidy options
String tidyOption = request.getParameter("tidy");
if ((tidyOption == null) || ("".equals(tidyOption.trim()))) {
tidyOption = DEFAULT_TIDY_OPTION;
}
// Generate a formal HTML page
String formalHTMLPage = HTML_HEADER + pageName + "" + eng_wikiEngine.getHTML(pageName) + HTML_TRAILER;
log.debug("Successfully generated formal HTML code for page " + pageName);
// Convert attached image paths to absolute
String relativePattern = request.getContextPath() + "/";
String absolutePattern = getBaseURL(request);
formalHTMLPage = formalHTMLPage.replace(relativePattern,absolutePattern);
log.debug("Successfully connerted paths to absolute");
//System.out.println("HTML after replace: " + htmlPage);
// Cleanup HTML code
if (tidyOption.toUpperCase().equals("YES")) {
try {
formalHTMLPage = cleanupHTML(formalHTMLPage, encoding);
log.debug("HTML cleaned up.");
} catch (Throwable t) {
log.error("Throwable caught while cleaning up html.", t);
throw new ServletException(t);
}
} // end if
// Send formal HTML to browser
// Setup the output buffer and write the HTML
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
outStream.write(formalHTMLPage.getBytes(encoding));
// Prepare the response
response.setHeader("Cache-control", "private");
response.setHeader("Pragma", "private");
response.setContentType("text/html");
response.setContentLength(outStream.size());
// Send HTML content to Browser
response.getOutputStream().write(outStream.toByteArray());
response.getOutputStream().flush();
} // end of content2HTML method
/**
* Cleanup HTML into XML. Transform that XML into a PDF byte stream. Write
* the byte stream to the response to send it to the browser.
*
* @param formalHTML
*
* The complete HTML document for cleanup.
*
* @throws Exception
*
* If something goes wrong while cleaning up.
*
* @return htmlDocument
*
* The cleaned up XML
*
*
*/
private String cleanupHTML(final String formalHTML, final String encoding) {
// Document should be declared outside try / catch block
Document xmlDocument = null;
try {
// Convert HTML to bytes.
InputStream inStream = new ByteArrayInputStream(formalHTML.getBytes());
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
// Encoding is handled by Tidy
tidy.setInputEncoding(encoding);
tidy.setOutputEncoding(encoding);
xmlDocument = tidy.parseDOM(inStream,null);
} catch (Exception e) {
log.error("Something went wrong while cleaning up HTML with Tidy. Message: " + e.getMessage());
log.error("Something went wrong while cleaning up HTML with Tidy. Stack Trace: " + e.getStackTrace());
}
return convertDocument(xmlDocument).toString();
} // end of cleanupHTML method
private static String getBaseURL(HttpServletRequest request) {
return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getContextPath() + "/";
} // end of getBaseURL method
/**
* Convert Document to String
*
*
* @param Document - An XML Document, as it is returned by
* org.w3c.tidy.Tidy.parseDOM(InputStream arg0, OutputStream arg1)
*
*
*/
private StringBuilder convertDocument(Document doc) {
StringBuilder stringBuilder = null;
try {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(doc), new StreamResult(stream));
stringBuilder = new StringBuilder(stream.toString());
} catch (Exception e) {
log.error("Cannot convert Document to String. Message: " + e.getMessage());
log.error("Cannot convert Document to String. Stack Trace: " + e.getStackTrace());
}
return stringBuilder;
} // end of convertDocument method
} //End of WikiContent2HTMLServlet class