Frames | No Frames |
1: /* BreakIterator.java -- Breaks text into elements 2: Copyright (C) 1998, 1999, 2001, 2004, 2005, 2007, 2012 3: Free Software Foundation, Inc. 4: 5: This file is part of GNU Classpath. 6: 7: GNU Classpath is free software; you can redistribute it and/or modify 8: it under the terms of the GNU General Public License as published by 9: the Free Software Foundation; either version 2, or (at your option) 10: any later version. 11: 12: GNU Classpath is distributed in the hope that it will be useful, but 13: WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: General Public License for more details. 16: 17: You should have received a copy of the GNU General Public License 18: along with GNU Classpath; see the file COPYING. If not, write to the 19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20: 02110-1301 USA. 21: 22: Linking this library statically or dynamically with other modules is 23: making a combined work based on this library. Thus, the terms and 24: conditions of the GNU General Public License cover the whole 25: combination. 26: 27: As a special exception, the copyright holders of this library give you 28: permission to link this library with independent modules to produce an 29: executable, regardless of the license terms of these independent 30: modules, and to copy and distribute the resulting executable under 31: terms of your choice, provided that you also meet, for each linked 32: independent module, the terms and conditions of the license of that 33: module. An independent module is a module which is not derived from 34: or based on this library. If you modify this library, you may extend 35: this exception to your version of the library, but you are not 36: obligated to do so. If you do not wish to do so, delete this 37: exception statement from your version. */ 38: 39: 40: package java.text; 41: 42: import gnu.java.locale.LocaleHelper; 43: 44: import gnu.java.text.CharacterBreakIterator; 45: import gnu.java.text.LineBreakIterator; 46: import gnu.java.text.SentenceBreakIterator; 47: import gnu.java.text.WordBreakIterator; 48: 49: import java.text.spi.BreakIteratorProvider; 50: 51: import java.util.Locale; 52: import java.util.MissingResourceException; 53: import java.util.ResourceBundle; 54: import java.util.ServiceLoader; 55: 56: /** 57: * This class iterates over text elements such as words, lines, sentences, 58: * and characters. It can only iterate over one of these text elements at 59: * a time. An instance of this class configured for the desired iteration 60: * type is created by calling one of the static factory methods, not 61: * by directly calling a constructor. 62: * 63: * The standard iterators created by the factory methods in this 64: * class will be valid upon creation. That is, their methods will 65: * not cause exceptions if called before you call setText(). 66: * 67: * @author Tom Tromey (tromey@cygnus.com) 68: * @author Aaron M. Renn (arenn@urbanophile.com) 69: * @date March 19, 1999 70: */ 71: /* Written using "Java Class Libraries", 2nd edition, plus online 72: * API docs for JDK 1.2 beta from http://www.javasoft.com. 73: * Status: Believed complete and correct to 1.1. 74: */ 75: public abstract class BreakIterator implements Cloneable 76: { 77: /** 78: * This value is returned by the <code>next()</code> and 79: * <code>previous</code> in order to indicate that the end of the 80: * text has been reached. 81: */ 82: // The value was discovered by writing a test program. 83: public static final int DONE = -1; 84: 85: /** 86: * This method initializes a new instance of <code>BreakIterator</code>. 87: * This protected constructor is available to subclasses as a default 88: * no-arg superclass constructor. 89: */ 90: protected BreakIterator () 91: { 92: } 93: 94: /** 95: * Create a clone of this object. 96: */ 97: public Object clone () 98: { 99: try 100: { 101: return super.clone(); 102: } 103: catch (CloneNotSupportedException e) 104: { 105: return null; 106: } 107: } 108: 109: /** 110: * This method returns the index of the current text element boundary. 111: * 112: * @return The current text boundary. 113: */ 114: public abstract int current (); 115: 116: /** 117: * This method returns the first text element boundary in the text being 118: * iterated over. 119: * 120: * @return The first text boundary. 121: */ 122: public abstract int first (); 123: 124: /** 125: * This methdod returns the offset of the text element boundary following 126: * the specified offset. 127: * 128: * @param pos The text index from which to find the next text boundary. 129: * 130: * @return The next text boundary following the specified index. 131: */ 132: public abstract int following (int pos); 133: 134: /** 135: * This method returns a list of locales for which instances of 136: * <code>BreakIterator</code> are available. 137: * 138: * @return A list of available locales 139: */ 140: public static synchronized Locale[] getAvailableLocales () 141: { 142: Locale[] l = new Locale[1]; 143: l[0] = Locale.US; 144: return l; 145: } 146: 147: private static BreakIterator getInstance (String type, Locale loc) 148: { 149: String className; 150: try 151: { 152: ResourceBundle res 153: = ResourceBundle.getBundle("gnu.java.locale.LocaleInformation", 154: loc, ClassLoader.getSystemClassLoader()); 155: className = res.getString(type); 156: } 157: catch (MissingResourceException x) 158: { 159: return null; 160: } 161: try 162: { 163: Class<?> k = Class.forName(className); 164: return (BreakIterator) k.newInstance(); 165: } 166: catch (ClassNotFoundException x1) 167: { 168: return null; 169: } 170: catch (InstantiationException x2) 171: { 172: return null; 173: } 174: catch (IllegalAccessException x3) 175: { 176: return null; 177: } 178: } 179: 180: /** 181: * This method returns an instance of <code>BreakIterator</code> that will 182: * iterate over characters as defined in the default locale. 183: * 184: * @return A <code>BreakIterator</code> instance for the default locale. 185: */ 186: public static BreakIterator getCharacterInstance () 187: { 188: return getCharacterInstance (Locale.getDefault()); 189: } 190: 191: /** 192: * This method returns an instance of <code>BreakIterator</code> that will 193: * iterate over characters as defined in the specified locale. 194: * 195: * @param locale The desired locale. 196: * 197: * @return A <code>BreakIterator</code> instance for the specified locale. 198: */ 199: public static BreakIterator getCharacterInstance (Locale locale) 200: { 201: BreakIterator r = getInstance("CharacterIterator", locale); 202: if (r != null) 203: return r; 204: for (BreakIteratorProvider p : 205: ServiceLoader.load(BreakIteratorProvider.class)) 206: { 207: for (Locale loc : p.getAvailableLocales()) 208: { 209: if (loc.equals(locale)) 210: { 211: BreakIterator bi = p.getCharacterInstance(locale); 212: if (bi != null) 213: return bi; 214: break; 215: } 216: } 217: } 218: if (locale.equals(Locale.ROOT)) 219: return new CharacterBreakIterator(); 220: return getCharacterInstance(LocaleHelper.getFallbackLocale(locale)); 221: } 222: 223: /** 224: * This method returns an instance of <code>BreakIterator</code> that will 225: * iterate over line breaks as defined in the default locale. 226: * 227: * @return A <code>BreakIterator</code> instance for the default locale. 228: */ 229: public static BreakIterator getLineInstance () 230: { 231: return getLineInstance (Locale.getDefault()); 232: } 233: 234: /** 235: * This method returns an instance of <code>BreakIterator</code> that will 236: * iterate over line breaks as defined in the specified locale. 237: * 238: * @param locale The desired locale. 239: * 240: * @return A <code>BreakIterator</code> instance for the default locale. 241: */ 242: public static BreakIterator getLineInstance (Locale locale) 243: { 244: BreakIterator r = getInstance ("LineIterator", locale); 245: if (r != null) 246: return r; 247: for (BreakIteratorProvider p : 248: ServiceLoader.load(BreakIteratorProvider.class)) 249: { 250: for (Locale loc : p.getAvailableLocales()) 251: { 252: if (loc.equals(locale)) 253: { 254: BreakIterator bi = p.getLineInstance(locale); 255: if (bi != null) 256: return bi; 257: break; 258: } 259: } 260: } 261: if (locale.equals(Locale.ROOT)) 262: return new LineBreakIterator(); 263: return getLineInstance(LocaleHelper.getFallbackLocale(locale)); 264: } 265: 266: /** 267: * This method returns an instance of <code>BreakIterator</code> that will 268: * iterate over sentences as defined in the default locale. 269: * 270: * @return A <code>BreakIterator</code> instance for the default locale. 271: */ 272: public static BreakIterator getSentenceInstance () 273: { 274: return getSentenceInstance (Locale.getDefault()); 275: } 276: 277: /** 278: * This method returns an instance of <code>BreakIterator</code> that will 279: * iterate over sentences as defined in the specified locale. 280: * 281: * @param locale The desired locale. 282: * 283: * @return A <code>BreakIterator</code> instance for the default locale. 284: */ 285: public static BreakIterator getSentenceInstance (Locale locale) 286: { 287: BreakIterator r = getInstance ("SentenceIterator", locale); 288: if (r != null) 289: return r; 290: for (BreakIteratorProvider p : 291: ServiceLoader.load(BreakIteratorProvider.class)) 292: { 293: for (Locale loc : p.getAvailableLocales()) 294: { 295: if (loc.equals(locale)) 296: { 297: BreakIterator bi = p.getSentenceInstance(locale); 298: if (bi != null) 299: return bi; 300: break; 301: } 302: } 303: } 304: if (locale.equals(Locale.ROOT)) 305: return new SentenceBreakIterator(); 306: return getSentenceInstance(LocaleHelper.getFallbackLocale(locale)); 307: } 308: 309: /** 310: * This method returns the text this object is iterating over as a 311: * <code>CharacterIterator</code>. 312: * 313: * @return The text being iterated over. 314: */ 315: public abstract CharacterIterator getText (); 316: 317: /** 318: * This method returns an instance of <code>BreakIterator</code> that will 319: * iterate over words as defined in the default locale. 320: * 321: * @return A <code>BreakIterator</code> instance for the default locale. 322: */ 323: public static BreakIterator getWordInstance () 324: { 325: return getWordInstance (Locale.getDefault()); 326: } 327: 328: /** 329: * This method returns an instance of <code>BreakIterator</code> that will 330: * iterate over words as defined in the specified locale. 331: * 332: * @param locale The desired locale. 333: * 334: * @return A <code>BreakIterator</code> instance for the default locale. 335: */ 336: public static BreakIterator getWordInstance (Locale locale) 337: { 338: BreakIterator r = getInstance ("WordIterator", locale); 339: if (r != null) 340: return r; 341: for (BreakIteratorProvider p : 342: ServiceLoader.load(BreakIteratorProvider.class)) 343: { 344: for (Locale loc : p.getAvailableLocales()) 345: { 346: if (loc.equals(locale)) 347: { 348: BreakIterator bi = p.getWordInstance(locale); 349: if (bi != null) 350: return bi; 351: break; 352: } 353: } 354: } 355: if (locale.equals(Locale.ROOT)) 356: return new WordBreakIterator(); 357: return getWordInstance(LocaleHelper.getFallbackLocale(locale)); 358: } 359: 360: /** 361: * This method tests whether or not the specified position is a text 362: * element boundary. 363: * 364: * @param pos The text position to test. 365: * 366: * @return <code>true</code> if the position is a boundary, 367: * <code>false</code> otherwise. 368: */ 369: public boolean isBoundary (int pos) 370: { 371: if (pos == 0) 372: return true; 373: return following (pos - 1) == pos; 374: } 375: 376: /** 377: * This method returns the last text element boundary in the text being 378: * iterated over. 379: * 380: * @return The last text boundary. 381: */ 382: public abstract int last (); 383: 384: /** 385: * This method returns the text element boundary following the current 386: * text position. 387: * 388: * @return The next text boundary. 389: */ 390: public abstract int next (); 391: 392: /** 393: * This method returns the n'th text element boundary following the current 394: * text position. 395: * 396: * @param n The number of text element boundaries to skip. 397: * 398: * @return The next text boundary. 399: */ 400: public abstract int next (int n); 401: 402: /** 403: * This methdod returns the offset of the text element boundary preceding 404: * the specified offset. 405: * 406: * @param pos The text index from which to find the preceding text boundary. 407: * 408: * @returns The next text boundary preceding the specified index. 409: */ 410: public int preceding (int pos) 411: { 412: if (following (pos) == DONE) 413: last (); 414: while (previous () >= pos) 415: ; 416: return current (); 417: } 418: 419: /** 420: * This method returns the text element boundary preceding the current 421: * text position. 422: * 423: * @return The previous text boundary. 424: */ 425: public abstract int previous (); 426: 427: /** 428: * This method sets the text string to iterate over. 429: * 430: * @param newText The <code>String</code> to iterate over. 431: */ 432: public void setText (String newText) 433: { 434: setText (new StringCharacterIterator (newText)); 435: } 436: 437: /** 438: * This method sets the text to iterate over from the specified 439: * <code>CharacterIterator</code>. 440: * 441: * @param newText The desired <code>CharacterIterator</code>. 442: */ 443: public abstract void setText (CharacterIterator newText); 444: }