1:
37:
38:
39: package ;
40:
41: import ;
42:
43:
48:
49: public class WordBreakIterator extends BaseBreakIterator
50: {
51: public Object clone ()
52: {
53: return new WordBreakIterator (this);
54: }
55:
56: public WordBreakIterator ()
57: {
58: }
59:
60: private WordBreakIterator (WordBreakIterator other)
61: {
62: iter = (CharacterIterator) other.iter.clone();
63: }
64:
65:
66: private final boolean isHira (char c)
67: {
68: return c >= 0x3040 && c <= 0x309f;
69: }
70: private final boolean isKata (char c)
71: {
72: return c >= 0x30a0 && c <= 0x30ff;
73: }
74: private final boolean isHan (char c)
75: {
76: return c >= 0x4e00 && c <= 0x9fff;
77: }
78:
79: public int next ()
80: {
81: int end = iter.getEndIndex();
82: if (iter.getIndex() == end)
83: return DONE;
84:
85: while (iter.getIndex() < end)
86: {
87: char c = iter.current();
88: if (c == CharacterIterator.DONE)
89: break;
90: int type = Character.getType(c);
91:
92: char n = iter.next();
93: if (n == CharacterIterator.DONE)
94: break;
95:
96:
97: if (type == Character.PARAGRAPH_SEPARATOR
98: || type == Character.LINE_SEPARATOR)
99: break;
100:
101:
102:
103:
104: boolean is_letter = Character.isLetter(c);
105: if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK
106: && Character.isLetter(n))
107: break;
108:
109:
110:
111:
112:
113:
114: if (c != '\''
115: && (type == Character.DASH_PUNCTUATION
116: || type == Character.START_PUNCTUATION
117: || type == Character.END_PUNCTUATION
118: || type == Character.CONNECTOR_PUNCTUATION
119: || type == Character.OTHER_PUNCTUATION
120: || type == Character.MATH_SYMBOL
121: || type == Character.CURRENCY_SYMBOL
122: || type == Character.MODIFIER_SYMBOL
123: || type == Character.OTHER_SYMBOL
124: || type == Character.FORMAT
125: || type == Character.CONTROL))
126: break;
127:
128: boolean is_hira = isHira (c);
129: boolean is_kata = isKata (c);
130: boolean is_han = isHan (c);
131:
132:
133: if (! is_hira && ! is_kata && ! is_han
134: && type != Character.NON_SPACING_MARK
135: && (isHira (n) || isKata (n) || isHan (n)))
136: break;
137:
138: if (is_hira || is_kata || is_han || is_letter)
139: {
140:
141:
142:
143: int save = iter.getIndex();
144:
145: while (n != CharacterIterator.DONE
146: && Character.getType(n) == Character.NON_SPACING_MARK)
147: n = iter.next();
148: if (n == CharacterIterator.DONE)
149: break;
150: if ((is_hira && ! isHira (n))
151: || (is_kata && ! isHira (n) && ! isKata (n))
152: || (is_han && ! isHira (n) && ! isHan (n))
153:
154:
155: || (is_letter && ! Character.isLetter(n) && n != '\''))
156: break;
157: iter.setIndex(save);
158: }
159: }
160:
161: return iter.getIndex();
162: }
163:
164: public int previous ()
165: {
166: int start = iter.getBeginIndex();
167: if (iter.getIndex() == start)
168: return DONE;
169:
170: while (iter.getIndex() >= start)
171: {
172: char c = iter.previous();
173: if (c == CharacterIterator.DONE)
174: break;
175:
176: boolean is_hira = isHira (c);
177: boolean is_kata = isKata (c);
178: boolean is_han = isHan (c);
179: boolean is_letter = Character.isLetter(c);
180:
181: char n = iter.previous();
182: if (n == CharacterIterator.DONE)
183: break;
184: iter.next();
185: int type = Character.getType(n);
186:
187: if (type == Character.PARAGRAPH_SEPARATOR
188: || type == Character.LINE_SEPARATOR)
189: break;
190:
191:
192:
193:
194: if (n != '\'' && ! Character.isLetter(n)
195: && type != Character.NON_SPACING_MARK
196: && is_letter)
197: break;
198:
199:
200:
201:
202:
203:
204: if (n != '\''
205: && (type == Character.DASH_PUNCTUATION
206: || type == Character.START_PUNCTUATION
207: || type == Character.END_PUNCTUATION
208: || type == Character.CONNECTOR_PUNCTUATION
209: || type == Character.OTHER_PUNCTUATION
210: || type == Character.MATH_SYMBOL
211: || type == Character.CURRENCY_SYMBOL
212: || type == Character.MODIFIER_SYMBOL
213: || type == Character.OTHER_SYMBOL
214: || type == Character.FORMAT
215: || type == Character.CONTROL))
216: break;
217:
218:
219: if ((is_hira || is_kata || is_han)
220: && ! isHira (n) && ! isKata (n) && ! isHan (n)
221: && type != Character.NON_SPACING_MARK)
222: break;
223:
224:
225:
226: if (! is_hira || (! is_letter && c != '\''))
227: {
228: int save = iter.getIndex();
229: while (n != CharacterIterator.DONE
230: && Character.getType(n) == Character.NON_SPACING_MARK)
231: n = iter.previous();
232: iter.setIndex(save);
233:
234:
235:
236: if (n == CharacterIterator.DONE)
237: break;
238: if ((isHira (n) && ! is_hira)
239: || (isKata (n) && ! is_hira && ! is_kata)
240: || (isHan (n) && ! is_hira && ! is_han)
241:
242:
243: || (! is_letter && c != '\'' && Character.isLetter(n)))
244: break;
245: }
246: }
247:
248: return iter.getIndex();
249: }
250: }