Source for gnu.javax.swing.text.html.parser.support.textPreProcessor

   1: /* textPreProcessor.java --
   2:    Copyright (C) 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package gnu.javax.swing.text.html.parser.support;
  40: 
  41: import gnu.javax.swing.text.html.parser.support.low.Constants;
  42: 
  43: /**
  44:  * Pre - processes text in text parts of the html document.
  45:  *
  46:  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
  47:  */
  48: public class textPreProcessor
  49: {
  50:   /**
  51:    * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then
  52:    * multiple spaces mutate into single one, all whitespace around tags is
  53:    * consumed. The content of the passed buffer is destroyed.
  54:    *
  55:    * @param a_text A text to pre-process.
  56:    */
  57:   public char[] preprocess(StringBuffer a_text)
  58:   {
  59:     if (a_text.length() == 0)
  60:       return null;
  61: 
  62:     char[] text = toCharArray(a_text);
  63: 
  64:     int a = 0;
  65:     int b = text.length - 1;
  66: 
  67:     // Remove leading/trailing whitespace, leaving at most one character
  68:     int len = text.length;
  69:     while (a + 1 < len && Constants.bWHITESPACE.get(text[a])
  70:            && Constants.bWHITESPACE.get(text[a + 1]))
  71:       a++;
  72: 
  73:     while (b > a && Constants.bWHITESPACE.get(text[b])
  74:                && Constants.bWHITESPACE.get(text[b - 1]))
  75:       b--;
  76: 
  77:     a_text.setLength(0);
  78: 
  79:     boolean spacesWere = false;
  80:     boolean spaceNow;
  81:     char c;
  82: 
  83:     chars: for (int i = a; i <= b; i++)
  84:       {
  85:         c = text[i];
  86:         spaceNow = Constants.bWHITESPACE.get(c);
  87:         if (spacesWere && spaceNow)
  88:           continue chars;
  89:         if (spaceNow)
  90:           a_text.append(' ');
  91:         else
  92:           a_text.append(c);
  93:         spacesWere = spaceNow;
  94:       }
  95: 
  96:     if (a_text.length() == text.length)
  97:       {
  98:         a_text.getChars(0, a_text.length(), text, 0);
  99:         return text;
 100:       }
 101:     else
 102:       return toCharArray(a_text);
 103:   }
 104: 
 105:   /**
 106:    * Pre - process pre-formatted text.
 107:    * Heading/closing spaces and tabs preserved.
 108:    * ONE  bounding \r, \n or \r\n is removed.
 109:    * \r or \r\n mutate into \n. Tabs are
 110:    * preserved.
 111:    * The content of the passed buffer is destroyed.
 112:    * @param a_text
 113:    * @return
 114:    */
 115:   public char[] preprocessPreformatted(StringBuffer a_text)
 116:   {
 117:     if (a_text.length() == 0)
 118:       return null;
 119: 
 120:     char[] text = toCharArray(a_text);
 121: 
 122:     int a = 0;
 123:     int n = text.length - 1;
 124:     int b = n;
 125: 
 126:     if (text [ 0 ] == '\n')
 127:       a++;
 128:     else
 129:       {
 130:         if (text [ 0 ] == '\r')
 131:           {
 132:             a++;
 133:             if (text.length > 1 && text [ 1 ] == '\n')
 134:               a++;
 135:           }
 136:       }
 137: 
 138:     if (text [ n ] == '\r')
 139:       b--;
 140:     else
 141:       {
 142:         if (text [ n ] == '\n')
 143:           {
 144:             b--;
 145:             if (n > 0 && text [ n - 1 ] == '\r')
 146:               b--;
 147:           }
 148:       }
 149: 
 150:     a_text.setLength(0);
 151: 
 152:     if (a > b)
 153:       return null;
 154: 
 155:     char c;
 156: 
 157:     for (int i = a; i <= b; i++)
 158:       {
 159:         c = text [ i ];
 160:         if (c == '\r')
 161:           {
 162:             if (i == b || text [ i + 1 ] != '\n')
 163:               a_text.append('\n');
 164:           }
 165:         else
 166:           a_text.append(c);
 167:       }
 168: 
 169:     if (a_text.length() == text.length)
 170:       {
 171:         a_text.getChars(0, a_text.length(), text, 0);
 172:         return text;
 173:       }
 174:     else
 175:       return toCharArray(a_text);
 176:   }
 177: 
 178:   /**
 179:    * Return array of chars, present in the given buffer.
 180:    * @param a_text The buffer
 181:    * @return
 182:    */
 183:   private static char[] toCharArray(StringBuffer a_text)
 184:   {
 185:     char[] text = new char[ a_text.length() ];
 186:     a_text.getChars(0, text.length, text, 0);
 187:     return text;
 188:   }
 189: }