Source for java.util.regex.Pattern

   1: /* Pattern.java -- Compiled regular expression ready to be applied.
   2:    Copyright (C) 2002, 2004, 2005, 2007, 2010
   3:    Free Software Foundation, Inc.
   4: 
   5: This file is part of GNU Classpath.
   6: 
   7: GNU Classpath is free software; you can redistribute it and/or modify
   8: it under the terms of the GNU General Public License as published by
   9: the Free Software Foundation; either version 2, or (at your option)
  10: any later version.
  11: 
  12: GNU Classpath is distributed in the hope that it will be useful, but
  13: WITHOUT ANY WARRANTY; without even the implied warranty of
  14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15: General Public License for more details.
  16: 
  17: You should have received a copy of the GNU General Public License
  18: along with GNU Classpath; see the file COPYING.  If not, write to the
  19: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20: 02110-1301 USA.
  21: 
  22: Linking this library statically or dynamically with other modules is
  23: making a combined work based on this library.  Thus, the terms and
  24: conditions of the GNU General Public License cover the whole
  25: combination.
  26: 
  27: As a special exception, the copyright holders of this library give you
  28: permission to link this library with independent modules to produce an
  29: executable, regardless of the license terms of these independent
  30: modules, and to copy and distribute the resulting executable under
  31: terms of your choice, provided that you also meet, for each linked
  32: independent module, the terms and conditions of the license of that
  33: module.  An independent module is a module which is not derived from
  34: or based on this library.  If you modify this library, you may extend
  35: this exception to your version of the library, but you are not
  36: obligated to do so.  If you do not wish to do so, delete this
  37: exception statement from your version. */
  38: 
  39: package java.util.regex;
  40: 
  41: import gnu.java.lang.CPStringBuilder;
  42: 
  43: import gnu.java.util.regex.RE;
  44: import gnu.java.util.regex.REException;
  45: import gnu.java.util.regex.RESyntax;
  46: 
  47: import java.io.Serializable;
  48: import java.util.ArrayList;
  49: 
  50: 
  51: /**
  52:  * Compiled regular expression ready to be applied.
  53:  *
  54:  * @since 1.4
  55:  */
  56: public final class Pattern implements Serializable
  57: {
  58:   private static final long serialVersionUID = 5073258162644648461L;
  59: 
  60:   public static final int CANON_EQ = 128;
  61:   public static final int CASE_INSENSITIVE = 2;
  62:   public static final int COMMENTS = 4;
  63:   public static final int DOTALL = 32;
  64:   public static final int MULTILINE = 8;
  65:   public static final int UNICODE_CASE = 64;
  66:   public static final int UNIX_LINES = 1;
  67: 
  68:   private final String regex;
  69:   private final int flags;
  70: 
  71:   private final RE re;
  72: 
  73:   private Pattern (String regex, int flags)
  74:     throws PatternSyntaxException
  75:   {
  76:     this.regex = regex;
  77:     this.flags = flags;
  78: 
  79:     RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
  80:     int gnuFlags = 0;
  81:     gnuFlags |= RE.REG_ICASE_USASCII;
  82:     if ((flags & CASE_INSENSITIVE) != 0)
  83:       gnuFlags |= RE.REG_ICASE;
  84:     if ((flags & MULTILINE) != 0)
  85:       {
  86:         gnuFlags |= RE.REG_MULTILINE;
  87:         syntax = new RESyntax(syntax);
  88:         syntax.setLineSeparator(null);
  89:       }
  90:     if ((flags & DOTALL) != 0)
  91:       gnuFlags |= RE.REG_DOT_NEWLINE;
  92:     if ((flags & UNICODE_CASE) != 0)
  93:       gnuFlags &= ~RE.REG_ICASE_USASCII;
  94:     // not yet supported:
  95:     // if ((flags & CANON_EQ) != 0) gnuFlags =
  96: 
  97:     if ((flags & UNIX_LINES) != 0)
  98:       {
  99:         // Use a syntax set with \n for linefeeds?
 100:         syntax = new RESyntax(syntax);
 101:         syntax.setLineSeparator("\n");
 102:       }
 103: 
 104:     if ((flags & COMMENTS) != 0)
 105:       {
 106:         gnuFlags |= RE.REG_X_COMMENTS;
 107:       }
 108: 
 109:     try
 110:       {
 111:         this.re = new RE(regex, gnuFlags, syntax);
 112:       }
 113:     catch (REException e)
 114:       {
 115:         PatternSyntaxException pse;
 116:         pse = new PatternSyntaxException(e.getMessage(),
 117:                                          regex, e.getPosition());
 118:         pse.initCause(e);
 119:         throw pse;
 120:       }
 121:   }
 122: 
 123:   // package private accessor method
 124:   RE getRE()
 125:   {
 126:     return re;
 127:   }
 128: 
 129:   /**
 130:    * @param regex The regular expression
 131:    *
 132:    * @exception PatternSyntaxException If the expression's syntax is invalid
 133:    */
 134:   public static Pattern compile (String regex)
 135:     throws PatternSyntaxException
 136:   {
 137:     return compile(regex, 0);
 138:   }
 139: 
 140:   /**
 141:    * @param regex The regular expression
 142:    * @param flags The match flags, a bit mask
 143:    *
 144:    * @exception PatternSyntaxException If the expression's syntax is invalid
 145:    * @exception IllegalArgumentException If bit values other than those
 146:    * corresponding to the defined match flags are set in flags
 147:    */
 148:   public static Pattern compile (String regex, int flags)
 149:     throws PatternSyntaxException
 150:   {
 151:     // FIXME: check which flags are really accepted
 152:     if ((flags & ~0xEF) != 0)
 153:       throw new IllegalArgumentException ();
 154: 
 155:     return new Pattern (regex, flags);
 156:   }
 157: 
 158:   public int flags ()
 159:   {
 160:     return this.flags;
 161:   }
 162: 
 163:   /**
 164:    * @param regex The regular expression
 165:    * @param input The character sequence to be matched
 166:    *
 167:    * @exception PatternSyntaxException If the expression's syntax is invalid
 168:    */
 169:   public static boolean matches (String regex, CharSequence input)
 170:   {
 171:     return compile(regex).matcher(input).matches();
 172:   }
 173: 
 174:   /**
 175:    * @param input The character sequence to be matched
 176:    */
 177:   public Matcher matcher (CharSequence input)
 178:   {
 179:     return new Matcher(this, input);
 180:   }
 181: 
 182:   /**
 183:    * @param input The character sequence to be matched
 184:    */
 185:   public String[] split (CharSequence input)
 186:   {
 187:     return split(input, 0);
 188:   }
 189: 
 190:   /**
 191:    * @param input The character sequence to be matched
 192:    * @param limit The result threshold
 193:    */
 194:   public String[] split (CharSequence input, int limit)
 195:   {
 196:     Matcher matcher = new Matcher(this, input);
 197:     ArrayList<String> list = new ArrayList<String>();
 198:     int empties = 0;
 199:     int count = 0;
 200:     int start = 0;
 201:     int end;
 202:     boolean matched = matcher.find();
 203: 
 204:     while (matched && (limit <= 0 || count < limit - 1))
 205:       {
 206:         ++count;
 207:         end = matcher.start();
 208:         if (start == end)
 209:           empties++;
 210:         else
 211:           {
 212:             while (empties > 0)
 213:               {
 214:                 list.add("");
 215:                 empties--;
 216:               }
 217: 
 218:             String text = input.subSequence(start, end).toString();
 219:             list.add(text);
 220:           }
 221:         start = matcher.end();
 222:         matched = matcher.find();
 223:       }
 224: 
 225:     // We matched nothing.
 226:     if (!matched && count == 0)
 227:       return new String[] { input.toString() };
 228: 
 229:     // Is the last token empty?
 230:     boolean emptyLast = (start == input.length());
 231: 
 232:     // Can/Must we add empties or an extra last token at the end?
 233:     if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
 234:       {
 235:         if (limit > list.size())
 236:           {
 237:             int max = limit - list.size();
 238:             empties = (empties > max) ? max : empties;
 239:           }
 240:         while (empties > 0)
 241:           {
 242:             list.add("");
 243:             empties--;
 244:           }
 245:       }
 246: 
 247:     // last token at end
 248:     if (limit != 0 || (limit == 0 && !emptyLast))
 249:       {
 250:         String t = input.subSequence(start, input.length()).toString();
 251:         if ("".equals(t) && limit == 0)
 252:           { /* Don't add. */ }
 253:         else
 254:           list.add(t);
 255:       }
 256: 
 257:     return list.toArray(new String[list.size()]);
 258:   }
 259: 
 260:   public String pattern ()
 261:   {
 262:     return regex;
 263:   }
 264: 
 265:   /**
 266:    * Returns a literal pattern for the specified String.
 267:    *
 268:    * @param String to return a literal pattern for.
 269:    * @return a literal pattern for the specified String.
 270:    * @exception NullPointerException if str is null.
 271:    * @since 1.5
 272:    */
 273:   public static String quote(String str)
 274:   {
 275:     int eInd = str.indexOf("\\E");
 276:     if (eInd < 0)
 277:       {
 278:         // No need to handle backslashes.
 279:         return "\\Q" + str + "\\E";
 280:       }
 281: 
 282:     CPStringBuilder sb = new CPStringBuilder(str.length() + 16);
 283:     sb.append("\\Q"); // start quote
 284: 
 285:     int pos = 0;
 286:     do
 287:       {
 288:         // A backslash is quoted by another backslash;
 289:         // 'E' is not needed to be quoted.
 290:         sb.append(str.substring(pos, eInd))
 291:           .append("\\E" + "\\\\" + "E" + "\\Q");
 292:         pos = eInd + 2;
 293:       } while ((eInd = str.indexOf("\\E", pos)) >= 0);
 294: 
 295:     sb.append(str.substring(pos, str.length()))
 296:       .append("\\E"); // end quote
 297:     return sb.toString();
 298:   }
 299: 
 300:   /**
 301:    * Return the regular expression used to construct this object.
 302:    * @specnote Prior to JDK 1.5 this method had a different behavior
 303:    * @since 1.5
 304:    */
 305:   public String toString()
 306:   {
 307:     return regex;
 308:   }
 309: }