1:
37:
38:
39: package ;
40:
41: import ;
42:
43:
48: public class Constants
49: {
50:
51:
52:
55: public static final int BEGIN = '<';
56:
57:
60: public static final int END = '>';
61:
62:
65: public static final int EXCLAMATION = '!';
66:
67:
70: public static final int SLASH = '/';
71:
72:
75: public static final int EQ = '=';
76:
77:
80: public static final int AP = '\'';
81:
82:
85: public static final int QUOT = '"';
86:
87:
88:
89:
90:
93: public static final int DOUBLE_DASH = 1000;
94:
95:
98: public static final int STYLE = 1001;
99:
100:
103: public static final int SCRIPT = 1002;
104:
105:
106:
107:
110: public static final int WS = 1003;
111:
112:
115: public static final int ENTITY = 1004;
116:
117:
120: public static final int NUMTOKEN = 1005;
121:
122:
123:
124:
127: public static final pattern COMMENT_OPEN =
128: new pattern(new node[]
129: {
130: new node(BEGIN), new node(WS, true), new node(EXCLAMATION),
131: new node(WS, true), new node(DOUBLE_DASH),
132: }
133: );
134:
135:
138: public static final pattern COMMENT_END =
139: new pattern(new node[]
140: {
141: new node(DOUBLE_DASH), new node(WS, true), new node(END)
142: }
143: );
144:
145:
148: public static final pattern COMMENT_TRIPLEDASH_END =
149: new pattern(new node[]
150: {
151: new node(DOUBLE_DASH), new node(NUMTOKEN), new node(END)
152: }
153: );
154:
155:
158: public static final pattern STYLE_OPEN =
159: new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(STYLE) });
160:
161:
164: public static final pattern SCRIPT_OPEN =
165: new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(SCRIPT) });
166:
167:
170: public static final pattern SGML =
171: new pattern(new node[]
172: {
173: new node(BEGIN), new node(WS, true), new node(EXCLAMATION)
174: }
175: );
176:
177:
180: public static final pattern SCRIPT_CLOSE =
181: new pattern(new node[]
182: {
183: new node(BEGIN), new node(WS, true), new node(SLASH),
184: new node(WS, true), new node(SCRIPT), new node(WS, true),
185: new node(END)
186: }
187: );
188:
189:
192: public static final pattern STYLE_CLOSE =
193: new pattern(new node[]
194: {
195: new node(BEGIN), new node(WS, true), new node(SLASH),
196: new node(WS, true), new node(STYLE), new node(WS, true),
197: new node(END)
198: }
199: );
200:
201:
204: public static final pattern TAG =
205: new pattern(new node[]
206: {
207: new node(BEGIN), new node(WS, true), new node(SLASH, true),
208: new node(WS, true), new node(NUMTOKEN)
209: }
210: );
211:
212:
215: public static final pattern TAG_CLOSE =
216: new pattern(new node[]
217: {
218: new node(BEGIN), new node(WS, true), new node(SLASH),
219: new node(WS, true), new node(NUMTOKEN)
220: }
221: );
222:
223:
224:
225:
228: public static final int OTHER = 1999;
229:
230:
233: static final char ETX = 3;
234:
235:
238: public static final int EOF = ETX;
239:
240:
241:
242:
245: public static final BitSet bSINGLE_CHAR_TOKEN = new BitSet();
246:
247:
250: public static final BitSet bSPECIAL = new BitSet();
251:
252:
255: public static final BitSet bLETTER = new BitSet();
256:
257:
260: public static final BitSet bDIGIT = new BitSet();
261:
262:
265: public static final BitSet bLINEBREAK = new BitSet();
266:
267:
270: public static final BitSet bWHITESPACE = new BitSet();
271:
272:
275: public static final BitSet bQUOTING = new BitSet();
276:
277:
280: public static final BitSet bNAME = new BitSet();
281:
282:
283:
284:
287: public static final int ENTITY_NAMED = 1;
288:
289:
292: public static final int ENTITY_NUMERIC = 2;
293:
294: static
295: {
296: bQUOTING.set(AP);
297: bQUOTING.set(QUOT);
298:
299: bSINGLE_CHAR_TOKEN.set(BEGIN);
300: bSINGLE_CHAR_TOKEN.set(END);
301: bSINGLE_CHAR_TOKEN.set(EXCLAMATION);
302: bSINGLE_CHAR_TOKEN.set(SLASH);
303: bSINGLE_CHAR_TOKEN.set(EQ);
304: bSINGLE_CHAR_TOKEN.set(EOF);
305:
306: bSINGLE_CHAR_TOKEN.or(bQUOTING);
307:
308: bLINEBREAK.set('\r');
309: bLINEBREAK.set('\n');
310:
311: bWHITESPACE.set(' ');
312: bWHITESPACE.set('\t');
313: bWHITESPACE.set(0xC);
314: bWHITESPACE.or(bLINEBREAK);
315:
316: for (char i = '0'; i <= '9'; i++)
317: {
318: bDIGIT.set(i);
319: }
320:
321: for (char i = 'a'; i <= 'z'; i++)
322: {
323: bLETTER.set(i);
324: }
325:
326: for (char i = 'A'; i <= 'Z'; i++)
327: {
328: bLETTER.set(i);
329: }
330:
331: bSPECIAL.set('-');
332: bSPECIAL.set('_');
333: bSPECIAL.set(':');
334: bSPECIAL.set('.');
335:
336: bNAME.or(bLETTER);
337: bNAME.or(bDIGIT);
338: bNAME.or(bSPECIAL);
339: }
340:
341:
350: public Token endMatches(Buffer b)
351: {
352: if (b.length() < 2)
353: return null;
354:
355: int p = b.length() - 2;
356:
357: if (b.length() > 2 && b.charAt(p) == '-' && b.charAt(p - 1) == '-')
358: return new Token(DOUBLE_DASH, "--", b.getLocation(p - 1, p + 1));
359:
360: char last = b.charAt(p);
361:
362: if (bSINGLE_CHAR_TOKEN.get(last))
363: return new Token(last, last, b.getLocation(p, p + 1));
364:
365: char future = b.charAt(p + 1);
366:
367:
368: if (bNAME.get(last) && !bNAME.get(future))
369: {
370:
371: int u = p - 1;
372: while (u >= 0 && bNAME.get(b.charAt(u)))
373: u--;
374: u++;
375:
376: char[] token = new char[ p - u + 1 ];
377:
378:
379: b.getChars(u, p + 1, token, 0);
380:
381:
382: String e = new String(token);
383:
384:
385: if (u > 0 && b.charAt(u - 1) == '&')
386: {
387:
388:
389:
390: return new Token(ENTITY, ENTITY_NAMED, "&" + e,
391: b.getLocation(u - 1, p + 1)
392: );
393: }
394:
395:
396: if (u > 1 && b.charAt(u - 1) == '#' && b.charAt(u - 2) == '&')
397: {
398:
399:
400:
401: return new Token(ENTITY, ENTITY_NUMERIC, "&#" + e,
402: b.getLocation(u - 2, p + 2)
403: );
404: }
405:
406: Location le = b.getLocation(u, p + 1);
407:
408: if (e.equalsIgnoreCase("SCRIPT"))
409: return new Token(SCRIPT, e, le);
410: else if (e.equalsIgnoreCase("STYLE"))
411: return new Token(STYLE, e, le);
412: else
413: return new Token(NUMTOKEN, e, le);
414: }
415:
416:
417: if (bWHITESPACE.get(last) && !bWHITESPACE.get(future))
418: {
419:
420: int u = p - 1;
421: while (u >= 0 && bWHITESPACE.get(b.charAt(u)))
422: u--;
423: u++;
424:
425: char[] token = new char[ p - u + 1 ];
426: b.getChars(u, p + 1, token, 0);
427:
428: return new Token(WS, new String(token), b.getLocation(u, p + 1));
429: }
430:
431: return null;
432: }
433: }