Frames | No Frames |
1: /* textPreProcessor.java -- 2: Copyright (C) 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package gnu.javax.swing.text.html.parser.support; 40: 41: import gnu.javax.swing.text.html.parser.support.low.Constants; 42: 43: /** 44: * Pre - processes text in text parts of the html document. 45: * 46: * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 47: */ 48: public class textPreProcessor 49: { 50: /** 51: * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then 52: * multiple spaces mutate into single one, all whitespace around tags is 53: * consumed. The content of the passed buffer is destroyed. 54: * 55: * @param a_text A text to pre-process. 56: */ 57: public char[] preprocess(StringBuffer a_text) 58: { 59: if (a_text.length() == 0) 60: return null; 61: 62: char[] text = toCharArray(a_text); 63: 64: int a = 0; 65: int b = text.length - 1; 66: 67: // Remove leading/trailing whitespace, leaving at most one character 68: int len = text.length; 69: while (a + 1 < len && Constants.bWHITESPACE.get(text[a]) 70: && Constants.bWHITESPACE.get(text[a + 1])) 71: a++; 72: 73: while (b > a && Constants.bWHITESPACE.get(text[b]) 74: && Constants.bWHITESPACE.get(text[b - 1])) 75: b--; 76: 77: a_text.setLength(0); 78: 79: boolean spacesWere = false; 80: boolean spaceNow; 81: char c; 82: 83: chars: for (int i = a; i <= b; i++) 84: { 85: c = text[i]; 86: spaceNow = Constants.bWHITESPACE.get(c); 87: if (spacesWere && spaceNow) 88: continue chars; 89: if (spaceNow) 90: a_text.append(' '); 91: else 92: a_text.append(c); 93: spacesWere = spaceNow; 94: } 95: 96: if (a_text.length() == text.length) 97: { 98: a_text.getChars(0, a_text.length(), text, 0); 99: return text; 100: } 101: else 102: return toCharArray(a_text); 103: } 104: 105: /** 106: * Pre - process pre-formatted text. 107: * Heading/closing spaces and tabs preserved. 108: * ONE bounding \r, \n or \r\n is removed. 109: * \r or \r\n mutate into \n. Tabs are 110: * preserved. 111: * The content of the passed buffer is destroyed. 112: * @param a_text 113: * @return 114: */ 115: public char[] preprocessPreformatted(StringBuffer a_text) 116: { 117: if (a_text.length() == 0) 118: return null; 119: 120: char[] text = toCharArray(a_text); 121: 122: int a = 0; 123: int n = text.length - 1; 124: int b = n; 125: 126: if (text [ 0 ] == '\n') 127: a++; 128: else 129: { 130: if (text [ 0 ] == '\r') 131: { 132: a++; 133: if (text.length > 1 && text [ 1 ] == '\n') 134: a++; 135: } 136: } 137: 138: if (text [ n ] == '\r') 139: b--; 140: else 141: { 142: if (text [ n ] == '\n') 143: { 144: b--; 145: if (n > 0 && text [ n - 1 ] == '\r') 146: b--; 147: } 148: } 149: 150: a_text.setLength(0); 151: 152: if (a > b) 153: return null; 154: 155: char c; 156: 157: for (int i = a; i <= b; i++) 158: { 159: c = text [ i ]; 160: if (c == '\r') 161: { 162: if (i == b || text [ i + 1 ] != '\n') 163: a_text.append('\n'); 164: } 165: else 166: a_text.append(c); 167: } 168: 169: if (a_text.length() == text.length) 170: { 171: a_text.getChars(0, a_text.length(), text, 0); 172: return text; 173: } 174: else 175: return toCharArray(a_text); 176: } 177: 178: /** 179: * Return array of chars, present in the given buffer. 180: * @param a_text The buffer 181: * @return 182: */ 183: private static char[] toCharArray(StringBuffer a_text) 184: { 185: char[] text = new char[ a_text.length() ]; 186: a_text.getChars(0, text.length, text, 0); 187: return text; 188: } 189: }