1:
37:
38:
39: package ;
40:
41: import ;
42:
43:
48:
49: public class SentenceBreakIterator extends BaseBreakIterator
50: {
51: public Object clone ()
52: {
53: return new SentenceBreakIterator (this);
54: }
55:
56: public SentenceBreakIterator ()
57: {
58: }
59:
60: private SentenceBreakIterator (SentenceBreakIterator other)
61: {
62: iter = (CharacterIterator) other.iter.clone();
63: }
64:
65: public int next ()
66: {
67: int end = iter.getEndIndex();
68: if (iter.getIndex() == end)
69: return DONE;
70:
71: while (iter.getIndex() < end)
72: {
73: char c = iter.current();
74: if (c == CharacterIterator.DONE)
75: break;
76: int type = Character.getType(c);
77:
78: char n = iter.next();
79: if (n == CharacterIterator.DONE)
80: break;
81:
82:
83: if (type == Character.PARAGRAPH_SEPARATOR)
84: break;
85:
86: if (c == '!' || c == '?')
87: {
88:
89: while (n != CharacterIterator.DONE
90: && Character.getType(n) == Character.END_PUNCTUATION)
91: n = iter.next();
92:
93: while (n != CharacterIterator.DONE && Character.isWhitespace(n))
94: n = iter.next();
95:
96:
97: break;
98: }
99:
100: if (c == '.')
101: {
102: int save = iter.getIndex();
103:
104: while (n != CharacterIterator.DONE
105: && Character.getType(n) == Character.END_PUNCTUATION)
106: n = iter.next();
107:
108:
109:
110: int spcount = 0;
111: while (n != CharacterIterator.DONE && Character.isWhitespace(n))
112: {
113: n = iter.next();
114: ++spcount;
115: }
116: if (spcount > 0)
117: {
118: int save2 = iter.getIndex();
119:
120: while (n != CharacterIterator.DONE
121: && Character.getType(n) == Character.START_PUNCTUATION)
122: n = iter.next();
123:
124: if (n == CharacterIterator.DONE
125: || ! Character.isLowerCase(n))
126: {
127: iter.setIndex(save2);
128: break;
129: }
130: }
131: iter.setIndex(save);
132: }
133: }
134:
135: return iter.getIndex();
136: }
137:
138: private final int previous_internal ()
139: {
140: int start = iter.getBeginIndex();
141: if (iter.getIndex() == start)
142: return DONE;
143:
144: while (iter.getIndex() >= start)
145: {
146: char c = iter.previous();
147: if (c == CharacterIterator.DONE)
148: break;
149:
150: char n = iter.previous();
151: if (n == CharacterIterator.DONE)
152: break;
153: iter.next();
154: int nt = Character.getType(n);
155:
156: if (! Character.isLowerCase(c)
157: && (nt == Character.START_PUNCTUATION
158: || Character.isWhitespace(n)))
159: {
160: int save = iter.getIndex();
161: int save_nt = nt;
162: char save_n = n;
163:
164: while (n != CharacterIterator.DONE
165: && Character.getType(n) == Character.START_PUNCTUATION)
166: n = iter.previous();
167: if (n == CharacterIterator.DONE)
168: break;
169: if (Character.isWhitespace(n))
170: {
171:
172: int save2 = iter.getIndex();
173: while (n != CharacterIterator.DONE
174: && Character.isWhitespace(n))
175: n = iter.previous();
176:
177: while (n != CharacterIterator.DONE
178: && Character.getType(n) == Character.END_PUNCTUATION)
179: n = iter.previous();
180: if (n == CharacterIterator.DONE || n == '.')
181: {
182:
183: period = iter.getIndex();
184: iter.setIndex(save2);
185: break;
186: }
187: }
188: iter.setIndex(save);
189: nt = save_nt;
190: n = save_n;
191: }
192:
193: if (nt == Character.PARAGRAPH_SEPARATOR)
194: {
195:
196: period = iter.getIndex();
197: break;
198: }
199: else if (Character.isWhitespace(n)
200: || nt == Character.END_PUNCTUATION)
201: {
202: int save = iter.getIndex();
203:
204: while (n != CharacterIterator.DONE
205: && Character.isWhitespace(n))
206: n = iter.previous();
207:
208: while (n != CharacterIterator.DONE
209: && Character.getType(n) == Character.END_PUNCTUATION)
210: n = iter.previous();
211: int here = iter.getIndex();
212: iter.setIndex(save);
213: if (n == CharacterIterator.DONE || n == '!' || n == '?')
214: {
215:
216: period = here;
217: break;
218: }
219: }
220: else if (n == '!' || n == '?')
221: {
222:
223: period = iter.getIndex();
224: break;
225: }
226: }
227:
228: return iter.getIndex();
229: }
230:
231: public int previous ()
232: {
233:
234:
235: int here = iter.getIndex();
236: period = here;
237: int first = previous_internal ();
238: if (here == iter.getEndIndex() || first == DONE)
239: return first;
240: iter.setIndex(period);
241: return previous_internal ();
242: }
243:
244:
245:
246: private int period;
247: }