1:
37:
38:
39: package ;
40:
41: import ;
42:
43:
48:
49: public class CharacterBreakIterator extends BaseBreakIterator
50: {
51:
52: private static final int LBase = 0x1100;
53: private static final int VBase = 0x1161;
54: private static final int TBase = 0x11a7;
55: private static final int LCount = 19;
56: private static final int VCount = 21;
57: private static final int TCount = 28;
58:
59:
60: private static final int highSurrogateStart = 0xD800;
61: private static final int highSurrogateEnd = 0xDBFF;
62: private static final int lowSurrogateStart = 0xDC00;
63: private static final int lowSurrogateEnd = 0xDFFF;
64:
65: public Object clone ()
66: {
67: return new CharacterBreakIterator (this);
68: }
69:
70: public CharacterBreakIterator ()
71: {
72: }
73:
74: private CharacterBreakIterator (CharacterBreakIterator other)
75: {
76: iter = (CharacterIterator) other.iter.clone();
77: }
78:
79:
80: private final boolean isL (char c)
81: {
82: return c >= LBase && c <= LBase + LCount;
83: }
84: private final boolean isV (char c)
85: {
86: return c >= VBase && c <= VBase + VCount;
87: }
88: private final boolean isT (char c)
89: {
90: return c >= TBase && c <= TBase + TCount;
91: }
92: private final boolean isLVT (char c)
93: {
94: return isL (c) || isV (c) || isT (c);
95: }
96: private final boolean isHighSurrogate (char c)
97: {
98: return c >= highSurrogateStart && c <= highSurrogateEnd;
99: }
100: private final boolean isLowSurrogate (char c)
101: {
102: return c >= lowSurrogateStart && c <= lowSurrogateEnd;
103: }
104:
105: public int next ()
106: {
107: int end = iter.getEndIndex();
108: if (iter.getIndex() == end)
109: return DONE;
110:
111: char c;
112: for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c)
113: {
114: c = iter.next();
115: if (c == CharacterIterator.DONE)
116: break;
117: int type = Character.getType(c);
118:
119:
120: if (type == Character.PARAGRAPH_SEPARATOR)
121: break;
122:
123:
124: char ahead = iter.next();
125: iter.previous();
126: if (ahead == CharacterIterator.DONE)
127: break;
128: int aheadType = Character.getType(ahead);
129:
130: if (aheadType != Character.NON_SPACING_MARK
131: && ! isLowSurrogate (ahead)
132: && ! isLVT (ahead))
133: break;
134: if (! isLVT (c) && isLVT (ahead))
135: break;
136: if (isL (c) && ! isLVT (ahead)
137: && aheadType != Character.NON_SPACING_MARK)
138: break;
139: if (isV (c) && ! isV (ahead) && !isT (ahead)
140: && aheadType != Character.NON_SPACING_MARK)
141: break;
142: if (isT (c) && ! isT (ahead)
143: && aheadType != Character.NON_SPACING_MARK)
144: break;
145:
146: if (! isHighSurrogate (c) && isLowSurrogate (ahead))
147: break;
148: if (isHighSurrogate (c) && ! isLowSurrogate (ahead))
149: break;
150: if (! isHighSurrogate (prev) && isLowSurrogate (c))
151: break;
152: }
153:
154: return iter.getIndex();
155: }
156:
157: public int previous ()
158: {
159: if (iter.getIndex() == iter.getBeginIndex())
160: return DONE;
161:
162: while (iter.getIndex() >= iter.getBeginIndex())
163: {
164: char c = iter.previous();
165: if (c == CharacterIterator.DONE)
166: break;
167: int type = Character.getType(c);
168:
169: if (type != Character.NON_SPACING_MARK
170: && ! isLowSurrogate (c)
171: && ! isLVT (c))
172: break;
173:
174:
175: char ahead = iter.previous();
176: if (ahead == CharacterIterator.DONE)
177: {
178: iter.next();
179: break;
180: }
181: char ahead2 = iter.previous();
182: iter.next();
183: iter.next();
184: if (ahead2 == CharacterIterator.DONE)
185: break;
186: int aheadType = Character.getType(ahead);
187:
188: if (aheadType == Character.PARAGRAPH_SEPARATOR)
189: break;
190:
191: if (isLVT (c) && ! isLVT (ahead))
192: break;
193: if (! isLVT (c) && type != Character.NON_SPACING_MARK
194: && isL (ahead))
195: break;
196: if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK
197: && isV (ahead))
198: break;
199: if (! isT (c) && type != Character.NON_SPACING_MARK
200: && isT (ahead))
201: break;
202:
203: if (isLowSurrogate (c) && ! isHighSurrogate (ahead))
204: break;
205: if (! isLowSurrogate (c) && isHighSurrogate (ahead))
206: break;
207: if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2))
208: break;
209: }
210:
211: return iter.getIndex();
212: }
213: }