1:
37:
38:
39: package ;
40:
41: import ;
42:
43:
48: public class Constants
49: {
50:
51:
52:
55: public static final int BEGIN = '<';
56:
57:
60: public static final int END = '>';
61:
62:
65: public static final int EXCLAMATION = '!';
66:
67:
70: public static final int SLASH = '/';
71:
72:
75: public static final int EQ = '=';
76:
77:
80: public static final int AP = '\'';
81:
82:
85: public static final int QUOT = '"';
86:
87:
88:
89:
90:
93: public static final int DOUBLE_DASH = 1000;
94:
95:
98: public static final int STYLE = 1001;
99:
100:
103: public static final int SCRIPT = 1002;
104:
105:
106:
107:
110: public static final int WS = 1003;
111:
112:
115: public static final int ENTITY = 1004;
116:
117:
120: public static final int NUMTOKEN = 1005;
121:
122:
123:
124:
127: public static final pattern COMMENT_OPEN =
128: new pattern(new node[]
129: {
130: new node(BEGIN), new node(WS, true), new node(EXCLAMATION),
131: new node(WS, true), new node(DOUBLE_DASH),
132: }
133: );
134:
135:
138: public static final pattern COMMENT_END =
139: new pattern(new node[]
140: {
141: new node(DOUBLE_DASH), new node(WS, true), new node(END)
142: }
143: );
144:
145:
148: public static final pattern COMMENT_TRIPLEDASH_END =
149: new pattern(new node[]
150: {
151: new node(DOUBLE_DASH), new node(NUMTOKEN), new node(END)
152: }
153: );
154:
155:
158: public static final pattern STYLE_OPEN =
159: new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(STYLE) });
160:
161:
164: public static final pattern SCRIPT_OPEN =
165: new pattern(new node[] { new node(BEGIN), new node(WS, true), new node(SCRIPT) });
166:
167:
170: public static final pattern SGML =
171: new pattern(new node[]
172: {
173: new node(BEGIN), new node(WS, true), new node(EXCLAMATION)
174: }
175: );
176:
177:
180: public static final pattern SCRIPT_CLOSE =
181: new pattern(new node[]
182: {
183: new node(BEGIN), new node(WS, true), new node(SLASH),
184: new node(WS, true), new node(SCRIPT), new node(WS, true),
185: new node(END)
186: }
187: );
188:
189:
192: public static final pattern STYLE_CLOSE =
193: new pattern(new node[]
194: {
195: new node(BEGIN), new node(WS, true), new node(SLASH),
196: new node(WS, true), new node(STYLE), new node(WS, true),
197: new node(END)
198: }
199: );
200:
201:
204: public static final pattern TAG =
205: new pattern(new node[]
206: {
207: new node(BEGIN), new node(WS, true), new node(SLASH, true),
208: new node(WS, true), new node(NUMTOKEN)
209: }
210: );
211:
212:
213:
214:
217: public static final int OTHER = 1999;
218:
219:
222: static final char ETX = 3;
223:
224:
227: public static final int EOF = ETX;
228:
229:
230:
231:
234: public static final BitSet bSINGLE_CHAR_TOKEN = new BitSet();
235:
236:
239: public static final BitSet bSPECIAL = new BitSet();
240:
241:
244: public static final BitSet bLETTER = new BitSet();
245:
246:
249: public static final BitSet bDIGIT = new BitSet();
250:
251:
254: public static final BitSet bLINEBREAK = new BitSet();
255:
256:
259: public static final BitSet bWHITESPACE = new BitSet();
260:
261:
264: public static final BitSet bQUOTING = new BitSet();
265:
266:
269: public static final BitSet bNAME = new BitSet();
270:
271:
272:
273:
276: public static final int ENTITY_NAMED = 1;
277:
278:
281: public static final int ENTITY_NUMERIC = 2;
282:
283: static
284: {
285: bQUOTING.set(AP);
286: bQUOTING.set(QUOT);
287:
288: bSINGLE_CHAR_TOKEN.set(BEGIN);
289: bSINGLE_CHAR_TOKEN.set(END);
290: bSINGLE_CHAR_TOKEN.set(EXCLAMATION);
291: bSINGLE_CHAR_TOKEN.set(SLASH);
292: bSINGLE_CHAR_TOKEN.set(EQ);
293: bSINGLE_CHAR_TOKEN.set(EOF);
294:
295: bSINGLE_CHAR_TOKEN.or(bQUOTING);
296:
297: bLINEBREAK.set('\r');
298: bLINEBREAK.set('\n');
299:
300: bWHITESPACE.set(' ');
301: bWHITESPACE.set('\t');
302: bWHITESPACE.set(0xC);
303: bWHITESPACE.or(bLINEBREAK);
304:
305: for (char i = '0'; i <= '9'; i++)
306: {
307: bDIGIT.set(i);
308: }
309:
310: for (char i = 'a'; i <= 'z'; i++)
311: {
312: bLETTER.set(i);
313: }
314:
315: for (char i = 'A'; i <= 'Z'; i++)
316: {
317: bLETTER.set(i);
318: }
319:
320: bSPECIAL.set('-');
321: bSPECIAL.set('_');
322: bSPECIAL.set(':');
323: bSPECIAL.set('.');
324:
325: bNAME.or(bLETTER);
326: bNAME.or(bDIGIT);
327: bNAME.or(bSPECIAL);
328: }
329:
330:
339: public Token endMatches(Buffer b)
340: {
341: if (b.length() < 2)
342: return null;
343:
344: int p = b.length() - 2;
345:
346: if (b.length() > 2 && b.charAt(p) == '-' && b.charAt(p - 1) == '-')
347: return new Token(DOUBLE_DASH, "--", b.getLocation(p - 1, p + 1));
348:
349: char last = b.charAt(p);
350:
351: if (bSINGLE_CHAR_TOKEN.get(last))
352: return new Token(last, last, b.getLocation(p, p + 1));
353:
354: char future = b.charAt(p + 1);
355:
356:
357: if (bNAME.get(last) && !bNAME.get(future))
358: {
359:
360: int u = p - 1;
361: while (u >= 0 && bNAME.get(b.charAt(u)))
362: u--;
363: u++;
364:
365: char[] token = new char[ p - u + 1 ];
366:
367:
368: b.getChars(u, p + 1, token, 0);
369:
370:
371: String e = new String(token);
372:
373:
374: if (u > 0 && b.charAt(u - 1) == '&')
375: {
376:
377:
378:
379: return new Token(ENTITY, ENTITY_NAMED, "&" + e,
380: b.getLocation(u - 1, p + 1)
381: );
382: }
383:
384:
385: if (u > 1 && b.charAt(u - 1) == '#' && b.charAt(u - 2) == '&')
386: {
387:
388:
389:
390: return new Token(ENTITY, ENTITY_NUMERIC, "&#" + e,
391: b.getLocation(u - 2, p + 2)
392: );
393: }
394:
395: Location le = b.getLocation(u, p + 1);
396:
397: if (e.equalsIgnoreCase("SCRIPT"))
398: return new Token(SCRIPT, e, le);
399: else if (e.equalsIgnoreCase("STYLE"))
400: return new Token(STYLE, e, le);
401: else
402: return new Token(NUMTOKEN, e, le);
403: }
404:
405:
406: if (bWHITESPACE.get(last) && !bWHITESPACE.get(future))
407: {
408:
409: int u = p - 1;
410: while (u >= 0 && bWHITESPACE.get(b.charAt(u)))
411: u--;
412: u++;
413:
414: char[] token = new char[ p - u + 1 ];
415: b.getChars(u, p + 1, token, 0);
416:
417: return new Token(WS, new String(token), b.getLocation(u, p + 1));
418: }
419:
420: return null;
421: }
422: }