/* * RCSDirFileProvider.java * @author Zhenlei Cai * Created on February 3, 2003, 4:06 PM */ package com.ecyrd.jspwiki.providers; import java.io.*; import java.util.TreeSet; import java.util.Iterator; import java.util.Properties; import java.util.Collection; import java.util.ArrayList; import java.util.List; import java.util.Date; import com.ecyrd.jspwiki.*; import org.apache.log4j.Category; import java.text.SimpleDateFormat; import java.text.ParseException; import org.apache.oro.text.*; import org.apache.oro.text.regex.*; /** * * @author zcai */ public class RCSDirFileProvider implements WikiPageProvider { private String m_pageDirectory = "/tmp/"; protected String m_encoding; /** * Name of the property that defines where page directories are. */ public static final String PROP_PAGEDIR = "jspwiki.fileSystemProvider.pageDir"; /** * All files should have this extension to be recognized as JSPWiki files. * We default to .txt, because that is probably easiest for Windows users, * and guarantees correct handling. */ public static final String FILE_EXT = ".txt"; private static final String DEFAULT_ENCODING = "ISO-8859-1"; private String m_checkinCommand = "ci -q -m\"author=%u\" -l -t-none %s"; private String m_checkoutCommand = "co -l %s"; private String m_logCommand = "rlog -zLT -r %s"; private String m_fullLogCommand = "rlog -zLT %s"; private String m_checkoutVersionCommand = "co -p -r1.%v %s"; private static final Category log = Category.getInstance(RCSDirFileProvider.class); public static final String PROP_CHECKIN = "jspwiki.rcsFileProvider.checkinCommand"; public static final String PROP_CHECKOUT = "jspwiki.rcsFileProvider.checkoutCommand"; public static final String PROP_LOG = "jspwiki.rcsFileProvider.logCommand"; public static final String PROP_FULLLOG = "jspwiki.rcsFileProvider.fullLogCommand"; public static final String PROP_CHECKOUTVERSION = "jspwiki.rcsFileProvider.checkoutVersionCommand"; private static final String PATTERN_DATE = "^date:\\s*(.*\\d);"; private static final String PATTERN_AUTHOR = "^\"?author=([\\w\\.\\s\\+\\.\\%]*)\"?"; private static final String PATTERN_REVISION = "^revision \\d+\\.(\\d+)"; private static final String RCSFMT_DATE = "yyyy-MM-dd HH:mm:ss"; private static final String RCSFMT_DATE_UTC = "yyyy/MM/dd HH:mm:ss"; // Date format parsers, placed here to save on object creation private SimpleDateFormat m_rcsdatefmt = new SimpleDateFormat( RCSFMT_DATE ); private SimpleDateFormat m_rcsdatefmt_utc = new SimpleDateFormat( RCSFMT_DATE_UTC ); public void initialize( Properties props ) throws NoRequiredPropertyException, IOException { log.debug("Initing RCSDirFileSystemProvider"); m_pageDirectory = WikiEngine.getRequiredProperty( props, PROP_PAGEDIR ); File f = new File(m_pageDirectory); if( !f.exists() ) { throw new FileNotFoundException("Page directory does not exist: "+m_pageDirectory); } else if( !f.isDirectory() ) { throw new IOException("Page directory is not a directory: "+m_pageDirectory); } m_encoding = props.getProperty( WikiEngine.PROP_ENCODING, DEFAULT_ENCODING ); log.info("wikiPageFiles are read from : "+m_pageDirectory); m_checkinCommand = props.getProperty( PROP_CHECKIN, m_checkinCommand ); m_checkoutCommand = props.getProperty( PROP_CHECKOUT, m_checkoutCommand ); m_logCommand = props.getProperty( PROP_LOG, m_logCommand ); m_fullLogCommand = props.getProperty( PROP_FULLLOG, m_fullLogCommand ); m_checkoutVersionCommand = props.getProperty( PROP_CHECKOUTVERSION, m_checkoutVersionCommand ); File rcsdir = new File( getPageDirectory(), "RCS" ); if( !rcsdir.exists() ) rcsdir.mkdirs(); log.debug("checkin="+m_checkinCommand); log.debug("checkout="+m_checkoutCommand); log.debug("log="+m_logCommand); log.debug("fulllog="+m_fullLogCommand); log.debug("checkoutversion="+m_checkoutVersionCommand); } String getPageDirectory() { return m_pageDirectory; } /** * This makes sure that the queried page name * is still readable by the file system. */ protected String mangleName( String pagename ) { // FIXME: Horrible kludge, very slow, etc. if( "UTF-8".equalsIgnoreCase( m_encoding ) ) return TextUtil.urlEncodeUTF8( pagename ); return java.net.URLEncoder.encode( pagename ); } /** * This makes the reverse of mangleName */ protected String unmangleName( String filename ) { // FIXME: Horrible kludge, very slow, etc. if( "UTF-8".equalsIgnoreCase( m_encoding ) ) return TextUtil.urlDecodeUTF8( filename ); return java.net.URLDecoder.decode( filename ); } public boolean pageExists( String page ) { File pagefile = getDiskFileForPage( page ); return pagefile.exists(); } /** * Read the text directly from the correct file. */ private String getPageText( String page ) { String result = null; InputStream in = null; File pagedata = getDiskFileForPage( page ); if( pagedata.exists() ) { if( pagedata.canRead() ) { try { in = new FileInputStream( pagedata ); result = FileUtil.readContents( in, m_encoding ); } catch( IOException e ) { log.error("Failed to read", e); } finally { try { if( in != null ) in.close(); } catch( Exception e ) { log.fatal("Closing failed",e); } } } else { log.warn("Failed to read page '"+page+"' from '"+pagedata.getAbsolutePath()+"', possibly a permissions problem"); } } else { // This is okay. log.info("New page '"+page+"'"); } return result; } public String getPageText( String page, int version ) throws ProviderException { String result = null; // Let parent handle latest fetches, since the FileSystemProvider // can do the file reading just as well. if( version == WikiPageProvider.LATEST_VERSION ) return getPageText( page); log.debug("Fetching specific version "+version+" of page "+page); try { String cmd = m_checkoutVersionCommand; File file = getDiskFileForPage( page ); cmd = TextUtil.replaceString( cmd, "%s", file.getAbsolutePath()); cmd = TextUtil.replaceString( cmd, "%v", Integer.toString(version ) ); log.debug("Command = '"+cmd+"'"); Process process = Runtime.getRuntime().exec( cmd, null, new File(getPageDirectory()) ); result = FileUtil.readContents( process.getInputStream(), m_encoding ); process.waitFor(); int exitVal = process.exitValue(); log.debug("Done, returned = "+exitVal); // // If fetching failed, assume that this is because of the user // has just migrated from FileSystemProvider, and check // if he's getting version 1. // if( exitVal != 0 && version == 1 ) { result = getPageText( page, version ); } } catch( Exception e ) { log.error("RCS checkout failed",e); } return result; } private Collection getAllPagesUnderDir(File dir, String wikiGroup) throws ProviderException { ArrayList set = new ArrayList(); String[] files = dir.list(); for (int i = 0; i < files.length; i++) { File file = new File (dir, files[i]); if (file.isDirectory()) { String group = files[i]; if (!wikiGroup.equals("")) group = wikiGroup + WikiEngine.WIKI_PAGE_NAME_DELIMITER + group; Collection subpages = getAllPagesUnderDir(file, group); set.addAll(subpages); } } // .txts in this dir File[] wikiPageFiles = dir.listFiles( new WikiFileFilter() ); if( wikiPageFiles == null ) { log.error("wikiPageFiles directory does not exist!"); throw new InternalWikiException("Page directory does not exist"); } for( int i = 0; i < wikiPageFiles.length; i++ ) { String pageFileName = wikiPageFiles[i].getName(); int cutpoint = pageFileName.lastIndexOf( FILE_EXT ); String pageFileBaseName = unmangleName(pageFileName.substring(0,cutpoint)) ; String pageName = WikiEngine.WIKI_PAGE_NAME_DELIMITER + pageFileBaseName; if (!wikiGroup.equals("")) pageName = WikiEngine.WIKI_PAGE_NAME_DELIMITER + wikiGroup + WikiEngine.WIKI_PAGE_NAME_DELIMITER + pageFileBaseName; WikiPage page = getPageInfo(pageName, WikiPageProvider.LATEST_VERSION ); if( page == null ) { // This should not really happen. // FIXME: Should we throw an exception here? log.error("Page "+pageFileName+" was found in directory listing, but could not be located individually."); continue; } set.add( page ); } return set; } public Collection getAllPages() throws ProviderException { log.debug("Getting all pages..."); File wikipagedir = new File( m_pageDirectory ); return getAllPagesUnderDir(wikipagedir, ""); } public Collection getAllChangedSince( Date date ) { return new ArrayList(); // FIXME } public int getPageCount() { try { return this.getAllPages().size(); } catch (ProviderException e) { e.printStackTrace(); return 0; } } public Collection findPages( QueryItem[] query ) { try { return getPagesMatchQuery(query, getAllPages()); } catch (ProviderException e) { e.printStackTrace(); return null; } } /** * Given a list of pages, return those pages that match a search */ public Collection getPagesMatchQuery ( QueryItem[] query , Collection pageCandidates) { TreeSet res = new TreeSet( new SearchResultComparator() ); Iterator pageIt = pageCandidates.iterator(); nextfile: while (pageIt.hasNext()) { String pageName = ((WikiPage) pageIt.next()).getName(); File file = this.getDiskFileForPage(pageName); String line = null; // log.debug("Searching page "+wikiPageFiles[i].getPath() ); String filename = file.getName(); int cutpoint = filename.lastIndexOf( FILE_EXT ); String wikiname = pageName; wikiname = unmangleName( wikiname ); try { FileInputStream input = new FileInputStream( file); String pagetext = FileUtil.readContents( input, m_encoding ); int scores[] = new int[ query.length ]; BufferedReader in = new BufferedReader( new StringReader(pagetext) ); while( (line = in.readLine()) != null ) { line = line.toLowerCase(); for( int j = 0; j < query.length; j++ ) { int index = -1; while( (index = line.indexOf( query[j].word, index+1 )) != -1 ) { // log.debug(" Match found for "+query[j].word ); if( query[j].type != QueryItem.FORBIDDEN ) { scores[j]++; // Mark, found this word n times } else { // Found something that was forbidden. continue nextfile; } } } } // // Check that we have all required words. // int totalscore = 0; for( int j = 0; j < scores.length; j++ ) { // Give five points for each occurrence // of the word in the wiki name. if( wikiname.toLowerCase().indexOf( query[j].word ) != -1 && query[j].type != QueryItem.FORBIDDEN ) scores[j] += 5; // Filter out pages if the search word is marked 'required' // but they have no score. if( query[j].type == QueryItem.REQUIRED && scores[j] == 0 ) continue nextfile; // // Count the total score for this page. // totalscore += scores[j]; } if( totalscore > 0 ) { res.add( new SearchResultImpl(wikiname,totalscore) ); } } catch( IOException e ) { log.error( "Failed to read", e ); } } return res; } // FIXME: Put the rcs date formats into properties as well. public List getVersionHistory( String page ) { PatternMatcher matcher = new Perl5Matcher(); PatternCompiler compiler = new Perl5Compiler(); PatternMatcherInput input; log.debug("Getting RCS version history"); ArrayList list = new ArrayList(); File file = getDiskFileForPage( page ); try { Pattern revpattern = compiler.compile( PATTERN_REVISION ); Pattern datepattern = compiler.compile( PATTERN_DATE ); // This complicated pattern is required, since on Linux RCS adds // quotation marks, but on Windows, it does not. Pattern userpattern = compiler.compile( PATTERN_AUTHOR ); String cmd = TextUtil.replaceString( m_fullLogCommand, "%s", file.getAbsolutePath() ); Process process = Runtime.getRuntime().exec( cmd, null, new File(getPageDirectory()) ); // FIXME: Should this use encoding as well? BufferedReader stdout = new BufferedReader( new InputStreamReader(process.getInputStream()) ); String line; WikiPage info = null; while( (line = stdout.readLine()) != null ) { if( matcher.contains( line, revpattern ) ) { info = new WikiPage( page ); MatchResult result = matcher.getMatch(); int vernum = Integer.parseInt( result.group(1) ); info.setVersion( vernum ); list.add( info ); } if( matcher.contains( line, datepattern ) ) { MatchResult result = matcher.getMatch(); Date d = parseDate( result.group(1) ); info.setLastModified( d ); } if( matcher.contains( line, userpattern ) ) { MatchResult result = matcher.getMatch(); info.setAuthor( TextUtil.urlDecodeUTF8(result.group(1)) ); } } process.waitFor(); } catch( Exception e ) { log.error( "RCS log failed", e ); } return list; } public String getProviderInfo() { return ""; } public void deleteVersion( String pageName, int version ) { // FIXME. } public void deletePage( String pageName ) { // FIXME: } public class WikiFileFilter implements FilenameFilter { public boolean accept( File dir, String name ) { return name.endsWith( FILE_EXT ); } } /** * Searches return this class. */ public class SearchResultImpl implements SearchResult { int m_score; WikiPage m_page; public SearchResultImpl( String name, int score ) { m_page = new WikiPage( name ); m_score = score; } public WikiPage getPage() { return m_page; } public int getScore() { return m_score; } } /** * Based on work from Wolfgang Irler (01/2003) * checks the dir for a hierarchically named page (like Admin\SickLeave) * SIDE EFFECT: if dir does not exist, creates it * returns true if the dir is missing and it is created */ private boolean pageDirectoryMissing (String page ) { String pageDir = WikiEngine.getPageWikiGroupName(page); String pageBaseName = WikiEngine.getPageBaseName(page); if (pageDir != null) { pageDir = pageDir.replace(WikiEngine.WIKI_PAGE_NAME_DELIMITER, '/'); File dir = new File( m_pageDirectory + "/" + pageDir ); log.info( "++++++++RCSDirFileSystemProvider: new dir = " + dir ); if ( !dir.exists()) { dir.mkdirs(); // THIS IS THE SIDE EFFECT!! } File rcsdir = new File(dir, "RCS" ); if( !rcsdir.exists() ) rcsdir.mkdirs(); return dir.exists(); } return false; } /** * Given Wiki page name, find the file on disk for it */ protected File getDiskFileForPage( String page ) { String pageGroup = WikiEngine.getPageWikiGroupName(page); String pageBaseName = WikiEngine.getPageBaseName(page); if (pageGroup != null) { String pageFileDir = ""; java.util.StringTokenizer parser = new java.util.StringTokenizer(pageGroup, "" + WikiEngine.WIKI_PAGE_NAME_DELIMITER); while (parser.hasMoreTokens()) { pageFileDir += mangleName(parser.nextToken()); pageFileDir += WikiEngine.WIKI_PAGE_NAME_DELIMITER; } // convert the page group delimiter to file path separator '/' pageFileDir = pageFileDir.replace(WikiEngine.WIKI_PAGE_NAME_DELIMITER, '/'); return new File( m_pageDirectory + "/" + pageFileDir , mangleName(pageBaseName) + FILE_EXT ); } else return new File( m_pageDirectory, mangleName(page)+FILE_EXT ); // original return } /** * Write the .txt file, but no the .txt,v in RCS yet */ private void storePageTextLatestVersion(WikiPage page, String text) { if ( pageDirectoryMissing( page.getName() ) ){ log.info(" dir for " +page+ " created"); } File file = getDiskFileForPage( page.getName() ); try { PrintWriter out = new PrintWriter(new OutputStreamWriter( new FileOutputStream( file ), m_encoding )); out.print( text ); out.close(); } catch( IOException e ) { log.error( "Saving failed : to file " + file.getAbsolutePath(), e); } } /** * Puts the page into RCS and makes sure there is a fresh copy in * the directory as well. */ public void putPageText( WikiPage page, String text ) { String pagename = page.getName(); // Writes it in the dir. storePageTextLatestVersion (page, text); log.debug( "Checking in text..." ); File file = getDiskFileForPage( page.getName() ); try { String cmd = m_checkinCommand; String author = page.getAuthor(); if( author == null ) author = "unknown"; cmd = TextUtil.replaceString( cmd, "%s", file.getAbsolutePath()); cmd = TextUtil.replaceString( cmd, "%u", TextUtil.urlEncodeUTF8(author) ); log.debug("Command = '"+cmd+"'"); Process process = Runtime.getRuntime().exec( cmd, null, new File(getPageDirectory()) ); process.waitFor(); log.debug("Done, returned = "+process.exitValue()); } catch( Exception e ) { log.error("RCS checkin failed",e); } } // NB: This is a very slow method. public WikiPage getPageInfo( String page, int version ) throws ProviderException { PatternMatcher matcher = new Perl5Matcher(); PatternCompiler compiler = new Perl5Compiler(); PatternMatcherInput input; File file = getDiskFileForPage( page ); if( !file.exists() ) { return null; } WikiPage info = new WikiPage( page ); try { String cmd = m_fullLogCommand; cmd = TextUtil.replaceString( cmd, "%s", file.getAbsolutePath() ); Process process = Runtime.getRuntime().exec( cmd, null, new File(this.getPageDirectory()) ); // FIXME: Should this use encoding as well? BufferedReader stdout = new BufferedReader( new InputStreamReader(process.getInputStream() ) ); String line; Pattern headpattern = compiler.compile( PATTERN_REVISION ); // This complicated pattern is required, since on Linux RCS adds // quotation marks, but on Windows, it does not. Pattern userpattern = compiler.compile( PATTERN_AUTHOR ); Pattern datepattern = compiler.compile( PATTERN_DATE ); boolean found = false; while( (line = stdout.readLine()) != null ) { if( matcher.contains( line, headpattern ) ) { MatchResult result = matcher.getMatch(); int vernum = Integer.parseInt( result.group(1) ); if( vernum == version || version == WikiPageProvider.LATEST_VERSION ) { info.setVersion( vernum ); found = true; } } else if( matcher.contains( line, datepattern ) && found ) { MatchResult result = matcher.getMatch(); Date d = parseDate( result.group(1) ); if( d != null ) { info.setLastModified( d ); } else { log.info("WikiPage "+info.getName()+ " has null modification date for version "+ version); } } else if( matcher.contains( line, userpattern ) && found ) { MatchResult result = matcher.getMatch(); info.setAuthor( TextUtil.urlDecodeUTF8(result.group(1)) ); } else if( found && line.startsWith("----") ) { // End of line sign from RCS break; } } // // Especially with certain versions of RCS on Windows, // process.waitFor() hangs unless you read all of the // standard output. So we make sure it's all emptied. // while( (line = stdout.readLine()) != null ) { } process.waitFor(); } catch( Exception e ) { // This also occurs when 'info' was null. log.warn("Failed to read RCS info",e); } return info; } /** * util method to parse a date string in Local and UTC formats */ private Date parseDate( String str ) { Date d = null; try { d = m_rcsdatefmt.parse( str ); return d; } catch ( ParseException pe ) { } try { d = m_rcsdatefmt_utc.parse( str ); return d; } catch ( ParseException pe ) { } return d; } }