Source for java.text.BreakIterator

   1: /* BreakIterator.java -- Breaks text into elements
   2:    Copyright (C) 1998, 1999, 2001, 2004, 2005, 2007, 2012
   3:    Free Software Foundation, Inc.
   4: 
   5: This file is part of GNU Classpath.
   6: 
   7: GNU Classpath is free software; you can redistribute it and/or modify
   8: it under the terms of the GNU General Public License as published by
   9: the Free Software Foundation; either version 2, or (at your option)
  10: any later version.
  11: 
  12: GNU Classpath is distributed in the hope that it will be useful, but
  13: WITHOUT ANY WARRANTY; without even the implied warranty of
  14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15: General Public License for more details.
  16: 
  17: You should have received a copy of the GNU General Public License
  18: along with GNU Classpath; see the file COPYING.  If not, write to the
  19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20: 02110-1301 USA.
  21: 
  22: Linking this library statically or dynamically with other modules is
  23: making a combined work based on this library.  Thus, the terms and
  24: conditions of the GNU General Public License cover the whole
  25: combination.
  26: 
  27: As a special exception, the copyright holders of this library give you
  28: permission to link this library with independent modules to produce an
  29: executable, regardless of the license terms of these independent
  30: modules, and to copy and distribute the resulting executable under
  31: terms of your choice, provided that you also meet, for each linked
  32: independent module, the terms and conditions of the license of that
  33: module.  An independent module is a module which is not derived from
  34: or based on this library.  If you modify this library, you may extend
  35: this exception to your version of the library, but you are not
  36: obligated to do so.  If you do not wish to do so, delete this
  37: exception statement from your version. */
  38: 
  39: 
  40: package java.text;
  41: 
  42: import gnu.java.locale.LocaleHelper;
  43: 
  44: import gnu.java.text.CharacterBreakIterator;
  45: import gnu.java.text.LineBreakIterator;
  46: import gnu.java.text.SentenceBreakIterator;
  47: import gnu.java.text.WordBreakIterator;
  48: 
  49: import java.text.spi.BreakIteratorProvider;
  50: 
  51: import java.util.Locale;
  52: import java.util.MissingResourceException;
  53: import java.util.ResourceBundle;
  54: import java.util.ServiceLoader;
  55: 
  56: /**
  57:  * This class iterates over text elements such as words, lines, sentences,
  58:  * and characters.  It can only iterate over one of these text elements at
  59:  * a time.  An instance of this class configured for the desired iteration
  60:  * type is created by calling one of the static factory methods, not
  61:  * by directly calling a constructor.
  62:  *
  63:  * The standard iterators created by the factory methods in this
  64:  * class will be valid upon creation.  That is, their methods will
  65:  * not cause exceptions if called before you call setText().
  66:  *
  67:  * @author Tom Tromey (tromey@cygnus.com)
  68:  * @author Aaron M. Renn (arenn@urbanophile.com)
  69:  * @date March 19, 1999
  70:  */
  71: /* Written using "Java Class Libraries", 2nd edition, plus online
  72:  * API docs for JDK 1.2 beta from http://www.javasoft.com.
  73:  * Status:  Believed complete and correct to 1.1.
  74:  */
  75: public abstract class BreakIterator implements Cloneable
  76: {
  77:   /**
  78:    * This value is returned by the <code>next()</code> and
  79:    * <code>previous</code> in order to indicate that the end of the
  80:    * text has been reached.
  81:    */
  82:   // The value was discovered by writing a test program.
  83:   public static final int DONE = -1;
  84: 
  85:   /**
  86:    * This method initializes a new instance of <code>BreakIterator</code>.
  87:    * This protected constructor is available to subclasses as a default
  88:    * no-arg superclass constructor.
  89:    */
  90:   protected BreakIterator ()
  91:   {
  92:   }
  93: 
  94:   /**
  95:    * Create a clone of this object.
  96:    */
  97:   public Object clone ()
  98:   {
  99:     try
 100:       {
 101:         return super.clone();
 102:       }
 103:     catch (CloneNotSupportedException e)
 104:       {
 105:         return null;
 106:       }
 107:   }
 108: 
 109:   /**
 110:    * This method returns the index of the current text element boundary.
 111:    *
 112:    * @return The current text boundary.
 113:    */
 114:   public abstract int current ();
 115: 
 116:   /**
 117:    * This method returns the first text element boundary in the text being
 118:    * iterated over.
 119:    *
 120:    * @return The first text boundary.
 121:    */
 122:   public abstract int first ();
 123: 
 124:   /**
 125:    * This methdod returns the offset of the text element boundary following
 126:    * the specified offset.
 127:    *
 128:    * @param pos The text index from which to find the next text boundary.
 129:    *
 130:    * @return The next text boundary following the specified index.
 131:    */
 132:   public abstract int following (int pos);
 133: 
 134:   /**
 135:    * This method returns a list of locales for which instances of
 136:    * <code>BreakIterator</code> are available.
 137:    *
 138:    * @return A list of available locales
 139:    */
 140:   public static synchronized Locale[] getAvailableLocales ()
 141:   {
 142:     Locale[] l = new Locale[1];
 143:     l[0] = Locale.US;
 144:     return l;
 145:   }
 146: 
 147:   private static BreakIterator getInstance (String type, Locale loc)
 148:   {
 149:     String className;
 150:     try
 151:       {
 152:         ResourceBundle res
 153:           = ResourceBundle.getBundle("gnu.java.locale.LocaleInformation",
 154:                                      loc, ClassLoader.getSystemClassLoader());
 155:         className = res.getString(type);
 156:       }
 157:     catch (MissingResourceException x)
 158:       {
 159:         return null;
 160:       }
 161:     try
 162:       {
 163:         Class<?> k = Class.forName(className);
 164:         return (BreakIterator) k.newInstance();
 165:       }
 166:     catch (ClassNotFoundException x1)
 167:       {
 168:         return null;
 169:       }
 170:     catch (InstantiationException x2)
 171:       {
 172:         return null;
 173:       }
 174:     catch (IllegalAccessException x3)
 175:       {
 176:         return null;
 177:       }
 178:   }
 179: 
 180:   /**
 181:    * This method returns an instance of <code>BreakIterator</code> that will
 182:    * iterate over characters as defined in the default locale.
 183:    *
 184:    * @return A <code>BreakIterator</code> instance for the default locale.
 185:    */
 186:   public static BreakIterator getCharacterInstance ()
 187:   {
 188:     return getCharacterInstance (Locale.getDefault());
 189:   }
 190: 
 191:   /**
 192:    * This method returns an instance of <code>BreakIterator</code> that will
 193:    * iterate over characters as defined in the specified locale.
 194:    *
 195:    * @param locale The desired locale.
 196:    *
 197:    * @return A <code>BreakIterator</code> instance for the specified locale.
 198:    */
 199:   public static BreakIterator getCharacterInstance (Locale locale)
 200:   {
 201:     BreakIterator r = getInstance("CharacterIterator", locale);
 202:     if (r != null)
 203:       return r;
 204:     for (BreakIteratorProvider p :
 205:            ServiceLoader.load(BreakIteratorProvider.class))
 206:       {
 207:         for (Locale loc : p.getAvailableLocales())
 208:           {
 209:             if (loc.equals(locale))
 210:               {
 211:                 BreakIterator bi = p.getCharacterInstance(locale);
 212:                 if (bi != null)
 213:                   return bi;
 214:                 break;
 215:               }
 216:           }
 217:       }
 218:     if (locale.equals(Locale.ROOT))
 219:       return new CharacterBreakIterator();
 220:     return getCharacterInstance(LocaleHelper.getFallbackLocale(locale));
 221:   }
 222: 
 223:   /**
 224:    * This method returns an instance of <code>BreakIterator</code> that will
 225:    * iterate over line breaks as defined in the default locale.
 226:    *
 227:    * @return A <code>BreakIterator</code> instance for the default locale.
 228:    */
 229:   public static BreakIterator getLineInstance ()
 230:   {
 231:     return getLineInstance (Locale.getDefault());
 232:   }
 233: 
 234:   /**
 235:    * This method returns an instance of <code>BreakIterator</code> that will
 236:    * iterate over line breaks as defined in the specified locale.
 237:    *
 238:    * @param locale The desired locale.
 239:    *
 240:    * @return A <code>BreakIterator</code> instance for the default locale.
 241:    */
 242:   public static BreakIterator getLineInstance (Locale locale)
 243:   {
 244:     BreakIterator r = getInstance ("LineIterator", locale);
 245:     if (r != null)
 246:       return r;
 247:     for (BreakIteratorProvider p :
 248:            ServiceLoader.load(BreakIteratorProvider.class))
 249:       {
 250:         for (Locale loc : p.getAvailableLocales())
 251:           {
 252:             if (loc.equals(locale))
 253:               {
 254:                 BreakIterator bi = p.getLineInstance(locale);
 255:                 if (bi != null)
 256:                   return bi;
 257:                 break;
 258:               }
 259:           }
 260:       }
 261:     if (locale.equals(Locale.ROOT))
 262:       return new LineBreakIterator();
 263:     return getLineInstance(LocaleHelper.getFallbackLocale(locale));
 264:   }
 265: 
 266:   /**
 267:    * This method returns an instance of <code>BreakIterator</code> that will
 268:    * iterate over sentences as defined in the default locale.
 269:    *
 270:    * @return A <code>BreakIterator</code> instance for the default locale.
 271:    */
 272:   public static BreakIterator getSentenceInstance ()
 273:   {
 274:     return getSentenceInstance (Locale.getDefault());
 275:   }
 276: 
 277:   /**
 278:    * This method returns an instance of <code>BreakIterator</code> that will
 279:    * iterate over sentences as defined in the specified locale.
 280:    *
 281:    * @param locale The desired locale.
 282:    *
 283:    * @return A <code>BreakIterator</code> instance for the default locale.
 284:    */
 285:   public static BreakIterator getSentenceInstance (Locale locale)
 286:   {
 287:     BreakIterator r = getInstance ("SentenceIterator", locale);
 288:     if (r != null)
 289:       return r;
 290:     for (BreakIteratorProvider p :
 291:            ServiceLoader.load(BreakIteratorProvider.class))
 292:       {
 293:         for (Locale loc : p.getAvailableLocales())
 294:           {
 295:             if (loc.equals(locale))
 296:               {
 297:                 BreakIterator bi = p.getSentenceInstance(locale);
 298:                 if (bi != null)
 299:                   return bi;
 300:                 break;
 301:               }
 302:           }
 303:       }
 304:     if (locale.equals(Locale.ROOT))
 305:       return new SentenceBreakIterator();
 306:     return getSentenceInstance(LocaleHelper.getFallbackLocale(locale));
 307:   }
 308: 
 309:   /**
 310:    * This method returns the text this object is iterating over as a
 311:    * <code>CharacterIterator</code>.
 312:    *
 313:    * @return The text being iterated over.
 314:    */
 315:   public abstract CharacterIterator getText ();
 316: 
 317:   /**
 318:    * This method returns an instance of <code>BreakIterator</code> that will
 319:    * iterate over words as defined in the default locale.
 320:    *
 321:    * @return A <code>BreakIterator</code> instance for the default locale.
 322:    */
 323:   public static BreakIterator getWordInstance ()
 324:   {
 325:     return getWordInstance (Locale.getDefault());
 326:   }
 327: 
 328:   /**
 329:    * This method returns an instance of <code>BreakIterator</code> that will
 330:    * iterate over words as defined in the specified locale.
 331:    *
 332:    * @param locale The desired locale.
 333:    *
 334:    * @return A <code>BreakIterator</code> instance for the default locale.
 335:    */
 336:   public static BreakIterator getWordInstance (Locale locale)
 337:   {
 338:     BreakIterator r = getInstance ("WordIterator", locale);
 339:     if (r != null)
 340:       return r;
 341:     for (BreakIteratorProvider p :
 342:            ServiceLoader.load(BreakIteratorProvider.class))
 343:       {
 344:         for (Locale loc : p.getAvailableLocales())
 345:           {
 346:             if (loc.equals(locale))
 347:               {
 348:                 BreakIterator bi = p.getWordInstance(locale);
 349:                 if (bi != null)
 350:                   return bi;
 351:                 break;
 352:               }
 353:           }
 354:       }
 355:     if (locale.equals(Locale.ROOT))
 356:       return new WordBreakIterator();
 357:     return getWordInstance(LocaleHelper.getFallbackLocale(locale));
 358:   }
 359: 
 360:   /**
 361:    * This method tests whether or not the specified position is a text
 362:    * element boundary.
 363:    *
 364:    * @param pos The text position to test.
 365:    *
 366:    * @return <code>true</code> if the position is a boundary,
 367:    * <code>false</code> otherwise.
 368:    */
 369:   public boolean isBoundary (int pos)
 370:   {
 371:     if (pos == 0)
 372:       return true;
 373:     return following (pos - 1) == pos;
 374:   }
 375: 
 376:   /**
 377:    * This method returns the last text element boundary in the text being
 378:    * iterated over.
 379:    *
 380:    * @return The last text boundary.
 381:    */
 382:   public abstract int last ();
 383: 
 384:   /**
 385:    * This method returns the text element boundary following the current
 386:    * text position.
 387:    *
 388:    * @return The next text boundary.
 389:    */
 390:   public abstract int next ();
 391: 
 392:   /**
 393:    * This method returns the n'th text element boundary following the current
 394:    * text position.
 395:    *
 396:    * @param n The number of text element boundaries to skip.
 397:    *
 398:    * @return The next text boundary.
 399:    */
 400:   public abstract int next (int n);
 401: 
 402:   /**
 403:    * This methdod returns the offset of the text element boundary preceding
 404:    * the specified offset.
 405:    *
 406:    * @param pos The text index from which to find the preceding text boundary.
 407:    *
 408:    * @returns The next text boundary preceding the specified index.
 409:    */
 410:   public int preceding (int pos)
 411:   {
 412:     if (following (pos) == DONE)
 413:       last ();
 414:     while (previous () >= pos)
 415:       ;
 416:     return current ();
 417:   }
 418: 
 419:   /**
 420:    * This method returns the text element boundary preceding the current
 421:    * text position.
 422:    *
 423:    * @return The previous text boundary.
 424:    */
 425:   public abstract int previous ();
 426: 
 427:   /**
 428:    * This method sets the text string to iterate over.
 429:    *
 430:    * @param newText The <code>String</code> to iterate over.
 431:    */
 432:   public void setText (String newText)
 433:   {
 434:     setText (new StringCharacterIterator (newText));
 435:   }
 436: 
 437:   /**
 438:    * This method sets the text to iterate over from the specified
 439:    * <code>CharacterIterator</code>.
 440:    *
 441:    * @param newText The desired <code>CharacterIterator</code>.
 442:    */
 443:   public abstract void setText (CharacterIterator newText);
 444: }