1:
37:
38: package ;
39: import ;
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45:
46:
111:
112: public class RE extends REToken {
113:
114: private static final class IntPair implements Serializable {
115: public int first, second;
116: }
117:
118: private static final class CharUnit implements Serializable {
119: public char ch;
120: public boolean bk;
121: }
122:
123:
124: private static final String VERSION = "1.1.5-dev";
125:
126:
127: private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
128:
129:
130:
131: private REToken firstToken, lastToken;
132:
133:
134:
135: private int numSubs;
136:
137:
138: private int minimumLength;
139: private int maximumLength;
140:
141:
145: public static final int REG_ICASE = 0x02;
146:
147:
153: public static final int REG_DOT_NEWLINE = 0x04;
154:
155:
160: public static final int REG_MULTILINE = 0x08;
161:
162:
189: public static final int REG_NOTBOL = 0x10;
190:
191:
196: public static final int REG_NOTEOL = 0x20;
197:
198:
210: public static final int REG_ANCHORINDEX = 0x40;
211:
212:
219: public static final int REG_NO_INTERPOLATE = 0x80;
220:
221:
226: public static final int REG_TRY_ENTIRE_MATCH = 0x0100;
227:
228:
236: public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
237:
238:
239: public static final String version() {
240: return VERSION;
241: }
242:
243:
244: static final String getLocalizedMessage(String key) {
245: return messages.getString(key);
246: }
247:
248:
258: public RE(Object pattern) throws REException {
259: this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
260: }
261:
262:
273: public RE(Object pattern, int cflags) throws REException {
274: this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
275: }
276:
277:
289: public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
290: this(pattern,cflags,syntax,0,0);
291: }
292:
293:
294: private RE(REToken first, REToken last,int subs, int subIndex, int minLength, int maxLength) {
295: super(subIndex);
296: firstToken = first;
297: lastToken = last;
298: numSubs = subs;
299: minimumLength = minLength;
300: maximumLength = maxLength;
301: addToken(new RETokenEndSub(subIndex));
302: }
303:
304: private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
305: super(myIndex);
306: initialize(patternObj, cflags, syntax, myIndex, nextSub);
307: }
308:
309:
310: protected RE() { super(0); }
311:
312:
313: protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
314: char[] pattern;
315: if (patternObj instanceof String) {
316: pattern = ((String) patternObj).toCharArray();
317: } else if (patternObj instanceof char[]) {
318: pattern = (char[]) patternObj;
319: } else if (patternObj instanceof StringBuffer) {
320: pattern = new char [((StringBuffer) patternObj).length()];
321: ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
322: } else {
323: pattern = patternObj.toString().toCharArray();
324: }
325:
326: int pLength = pattern.length;
327:
328: numSubs = 0;
329: Vector branches = null;
330:
331:
332: firstToken = lastToken = null;
333:
334:
335:
336: boolean insens = ((cflags & REG_ICASE) > 0);
337:
338:
339:
340:
341:
342: int index = 0;
343:
344:
345: CharUnit unit = new CharUnit();
346:
347:
348: IntPair minMax = new IntPair();
349:
350:
351: REToken currentToken = null;
352: char ch;
353: boolean quot = false;
354:
355:
356: RESyntax savedSyntax = null;
357: int savedCflags = 0;
358: boolean flagsSaved = false;
359:
360: while (index < pLength) {
361:
362: index = getCharUnit(pattern,index,unit,quot);
363:
364: if (unit.bk)
365: if (unit.ch == 'Q') {
366: quot = true;
367: continue;
368: } else if (unit.ch == 'E') {
369: quot = false;
370: continue;
371: }
372: if (quot)
373: unit.bk = false;
374:
375:
376:
377:
378:
379:
380:
381: if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
382: || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
383: && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
384:
385: addToken(currentToken);
386: RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength, maximumLength);
387: minimumLength = 0;
388: maximumLength = 0;
389: if (branches == null) {
390: branches = new Vector();
391: }
392: branches.addElement(theBranch);
393: firstToken = lastToken = currentToken = null;
394: }
395:
396:
397:
398:
399:
400:
401:
402:
403:
404:
405:
406: else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
407: int newIndex = getMinMax(pattern,index,minMax,syntax);
408: if (newIndex > index) {
409: if (minMax.first > minMax.second)
410: throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
411: if (currentToken == null)
412: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
413: if (currentToken instanceof RETokenRepeated)
414: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
415: if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
416: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
417: index = newIndex;
418: currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);
419: }
420: else {
421: addToken(currentToken);
422: currentToken = new RETokenChar(subIndex,unit.ch,insens);
423: }
424: }
425:
426:
427:
428:
429: else if ((unit.ch == '[') && !(unit.bk || quot)) {
430:
431: ParseCharClassResult result = parseCharClass(
432: subIndex, pattern, index, pLength, cflags, syntax, 0);
433: addToken(currentToken);
434: currentToken = result.token;
435: index = result.index;
436: }
437:
438:
439:
440:
441: else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
442: boolean pure = false;
443: boolean comment = false;
444: boolean lookAhead = false;
445: boolean lookBehind = false;
446: boolean independent = false;
447: boolean negativelh = false;
448: boolean negativelb = false;
449: if ((index+1 < pLength) && (pattern[index] == '?')) {
450: switch (pattern[index+1]) {
451: case '!':
452: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
453: pure = true;
454: negativelh = true;
455: lookAhead = true;
456: index += 2;
457: }
458: break;
459: case '=':
460: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
461: pure = true;
462: lookAhead = true;
463: index += 2;
464: }
465: break;
466: case '<':
467:
468:
469: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
470: index++;
471: switch (pattern[index +1]) {
472: case '!':
473: pure = true;
474: negativelb = true;
475: lookBehind = true;
476: index += 2;
477: break;
478: case '=':
479: pure = true;
480: lookBehind = true;
481: index += 2;
482: }
483: }
484: break;
485: case '>':
486:
487:
488: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
489: pure = true;
490: independent = true;
491: index += 2;
492: }
493: break;
494: case 'i':
495: case 'd':
496: case 'm':
497: case 's':
498:
499:
500: case '-':
501: if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
502:
503: int flagIndex = index + 1;
504: int endFlag = -1;
505: RESyntax newSyntax = new RESyntax(syntax);
506: int newCflags = cflags;
507: boolean negate = false;
508: while (flagIndex < pLength && endFlag < 0) {
509: switch(pattern[flagIndex]) {
510: case 'i':
511: if (negate)
512: newCflags &= ~REG_ICASE;
513: else
514: newCflags |= REG_ICASE;
515: flagIndex++;
516: break;
517: case 'd':
518: if (negate)
519: newSyntax.setLineSeparator(RESyntax.DEFAULT_LINE_SEPARATOR);
520: else
521: newSyntax.setLineSeparator("\n");
522: flagIndex++;
523: break;
524: case 'm':
525: if (negate)
526: newCflags &= ~REG_MULTILINE;
527: else
528: newCflags |= REG_MULTILINE;
529: flagIndex++;
530: break;
531: case 's':
532: if (negate)
533: newCflags &= ~REG_DOT_NEWLINE;
534: else
535: newCflags |= REG_DOT_NEWLINE;
536: flagIndex++;
537: break;
538:
539:
540: case '-':
541: negate = true;
542: flagIndex++;
543: break;
544: case ':':
545: case ')':
546: endFlag = pattern[flagIndex];
547: break;
548: default:
549: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
550: }
551: }
552: if (endFlag == ')') {
553: syntax = newSyntax;
554: cflags = newCflags;
555: insens = ((cflags & REG_ICASE) > 0);
556:
557: comment = true;
558: index = flagIndex - 1;
559: break;
560: }
561: if (endFlag == ':') {
562: savedSyntax = syntax;
563: savedCflags = cflags;
564: flagsSaved = true;
565: syntax = newSyntax;
566: cflags = newCflags;
567: insens = ((cflags & REG_ICASE) > 0);
568: index = flagIndex -1;
569:
570: }
571: else {
572: throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
573: }
574: case ':':
575: if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
576: pure = true;
577: index += 2;
578: }
579: break;
580: case '#':
581: if (syntax.get(RESyntax.RE_COMMENTS)) {
582: comment = true;
583: }
584: break;
585: default:
586: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
587: }
588: }
589:
590: if (index >= pLength) {
591: throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
592: }
593:
594:
595: int endIndex = index;
596: int nextIndex = index;
597: int nested = 0;
598:
599: while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
600: && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) ) {
601: if ((endIndex = nextIndex) >= pLength)
602: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
603: else if ((unit.ch == '[') && !(unit.bk || quot)) {
604:
605:
606: int listIndex = nextIndex;
607: if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;
608: if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;
609: int listEndIndex = -1;
610: int listNest = 0;
611: while (listIndex < pLength && listEndIndex < 0) {
612: switch(pattern[listIndex++]) {
613: case '\\':
614: listIndex++;
615: break;
616: case '[':
617:
618:
619: listNest++;
620: if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;
621: if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;
622: break;
623: case ']':
624: if (listNest == 0)
625: listEndIndex = listIndex;
626: listNest--;
627: break;
628: }
629: }
630: if (listEndIndex >= 0) {
631: nextIndex = listEndIndex;
632: if ((endIndex = nextIndex) >= pLength)
633: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
634: else
635: continue;
636: }
637: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
638: }
639: else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
640: nested++;
641: else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
642: nested--;
643: }
644:
645:
646:
647:
648: if (comment) index = nextIndex;
649: else {
650:
651: addToken(currentToken);
652: if (!pure) {
653: numSubs++;
654: }
655:
656: int useIndex = (pure || lookAhead || lookBehind || independent) ?
657: 0 : nextSub + numSubs;
658: currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
659: numSubs += ((RE) currentToken).getNumSubs();
660:
661: if (lookAhead) {
662: currentToken = new RETokenLookAhead(currentToken,negativelh);
663: }
664: else if (lookBehind) {
665: currentToken = new RETokenLookBehind(currentToken,negativelb);
666: }
667: else if (independent) {
668: currentToken = new RETokenIndependent(currentToken);
669: }
670:
671: index = nextIndex;
672: if (flagsSaved) {
673: syntax = savedSyntax;
674: cflags = savedCflags;
675: insens = ((cflags & REG_ICASE) > 0);
676: flagsSaved = false;
677: }
678: }
679: }
680:
681:
682:
683:
684: else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
685: throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
686: }
687:
688:
689:
690:
691: else if ((unit.ch == '^') && !(unit.bk || quot)) {
692: addToken(currentToken);
693: currentToken = null;
694: addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
695: }
696:
697:
698:
699:
700: else if ((unit.ch == '$') && !(unit.bk || quot)) {
701: addToken(currentToken);
702: currentToken = null;
703: addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
704: }
705:
706:
707:
708:
709: else if ((unit.ch == '.') && !(unit.bk || quot)) {
710: addToken(currentToken);
711: currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
712: }
713:
714:
715:
716:
717:
718:
719:
720: else if ((unit.ch == '*') && !(unit.bk || quot)) {
721: if (currentToken == null)
722: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
723: if (currentToken instanceof RETokenRepeated)
724: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
725: if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
726: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
727: currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
728: }
729:
730:
731:
732:
733:
734:
735:
736:
737: else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
738: if (currentToken == null)
739: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
740:
741:
742: if (currentToken instanceof RETokenRepeated) {
743: RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
744: if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
745: tokenRep.makePossessive();
746: else
747: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
748:
749: }
750: else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
751: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
752: else
753: currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
754: }
755:
756:
757:
758:
759:
760:
761: else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
762: if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
763:
764:
765: if (currentToken instanceof RETokenRepeated) {
766: RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
767: if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
768: tokenRep.makeStingy();
769: else
770: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
771: }
772: else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
773: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
774: else
775: currentToken = setRepeated(currentToken,0,1,index);
776: }
777:
778:
779:
780:
781: else if (unit.bk && (unit.ch == '0') && syntax.get(RESyntax.RE_OCTAL_CHAR)) {
782: CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
783: if (ce == null)
784: throw new REException("invalid octal character", REException.REG_ESCAPE, index);
785: index = index - 2 + ce.len;
786: addToken(currentToken);
787: currentToken = new RETokenChar(subIndex,ce.ch,insens);
788: }
789:
790:
791:
792:
793:
794:
795:
796:
797:
798:
799:
800:
801:
802: else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
803: addToken(currentToken);
804: int numBegin = index - 1;
805: int numEnd = pLength;
806: for (int i = index; i < pLength; i++) {
807: if (! Character.isDigit(pattern[i])) {
808: numEnd = i;
809: break;
810: }
811: }
812: int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);
813:
814: currentToken = new RETokenBackRef(subIndex,num,insens);
815: index = numEnd;
816: }
817:
818:
819:
820:
821: else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
822: addToken(currentToken);
823: currentToken = new RETokenStart(subIndex,null);
824: }
825:
826:
827:
828:
829: else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
830: addToken(currentToken);
831: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
832: }
833:
834:
835:
836: else if (unit.bk && (unit.ch == '<')) {
837: addToken(currentToken);
838: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
839: }
840:
841:
842:
843: else if (unit.bk && (unit.ch == '>')) {
844: addToken(currentToken);
845: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
846: }
847:
848:
849:
850:
851: else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
852: addToken(currentToken);
853: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
854: }
855:
856:
857:
858:
859:
860: else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
861: addToken(currentToken);
862: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
863: }
864:
865:
866:
867:
868: else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
869: addToken(currentToken);
870: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
871: }
872:
873:
874:
875:
876: else if (unit.bk && (unit.ch == 'n')) {
877: addToken(currentToken);
878: currentToken = new RETokenChar(subIndex,'\n',false);
879: }
880:
881:
882:
883:
884: else if (unit.bk && (unit.ch == 'r')) {
885: addToken(currentToken);
886: currentToken = new RETokenChar(subIndex,'\r',false);
887: }
888:
889:
890:
891:
892: else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
893: addToken(currentToken);
894: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
895: }
896:
897:
898:
899:
900: else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
901: addToken(currentToken);
902: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
903: }
904:
905:
906:
907:
908: else if (unit.bk && (unit.ch == 't')) {
909: addToken(currentToken);
910: currentToken = new RETokenChar(subIndex,'\t',false);
911: }
912:
913:
914:
915:
916: else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
917: addToken(currentToken);
918: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
919: }
920:
921:
922:
923:
924: else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
925: addToken(currentToken);
926: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
927: }
928:
929:
930:
931:
932: else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
933: addToken(currentToken);
934: currentToken = new RETokenEnd(subIndex,null);
935: }
936:
937:
938:
939:
940: else if ((unit.bk && (unit.ch == 'x') && syntax.get(RESyntax.RE_HEX_CHAR)) ||
941: (unit.bk && (unit.ch == 'u') && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
942: CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
943: if (ce == null)
944: throw new REException("invalid hex character", REException.REG_ESCAPE, index);
945: index = index - 2 + ce.len;
946: addToken(currentToken);
947: currentToken = new RETokenChar(subIndex,ce.ch,insens);
948: }
949:
950:
951:
952:
953: else if ((unit.bk && (unit.ch == 'p') && syntax.get(RESyntax.RE_NAMED_PROPERTY)) ||
954: (unit.bk && (unit.ch == 'P') && syntax.get(RESyntax.RE_NAMED_PROPERTY))) {
955: NamedProperty np = getNamedProperty(pattern, index - 2, pLength);
956: if (np == null)
957: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
958: index = index - 2 + np.len;
959: addToken(currentToken);
960: currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
961: }
962:
963:
964:
965:
966: else {
967: addToken(currentToken);
968: currentToken = new RETokenChar(subIndex,unit.ch,insens);
969: }
970: }
971:
972:
973: addToken(currentToken);
974:
975: if (branches != null) {
976: branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength, maximumLength));
977: branches.trimToSize();
978: minimumLength = 0;
979: maximumLength = 0;
980: firstToken = lastToken = null;
981: addToken(new RETokenOneOf(subIndex,branches,false));
982: }
983: else addToken(new RETokenEndSub(subIndex));
984:
985: }
986:
987: private static class ParseCharClassResult {
988: RETokenOneOf token;
989: int index;
990: boolean returnAtAndOperator = false;
991: }
992:
993:
1003: private static ParseCharClassResult parseCharClass(int subIndex,
1004: char[] pattern, int index,
1005: int pLength, int cflags, RESyntax syntax, int pflags)
1006: throws REException {
1007:
1008: boolean insens = ((cflags & REG_ICASE) > 0);
1009: Vector options = new Vector();
1010: Vector addition = new Vector();
1011: boolean additionAndAppeared = false;
1012: final int RETURN_AT_AND = 0x01;
1013: boolean returnAtAndOperator = ((pflags & RETURN_AT_AND) != 0);
1014: boolean negative = false;
1015: char ch;
1016:
1017: char lastChar = 0;
1018: boolean lastCharIsSet = false;
1019: if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
1020:
1021:
1022: if ((ch = pattern[index]) == '^') {
1023: negative = true;
1024: if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1025: ch = pattern[index];
1026: }
1027:
1028:
1029: if (ch == ']') {
1030: lastChar = ch; lastCharIsSet = true;
1031: if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1032: }
1033:
1034: while ((ch = pattern[index++]) != ']') {
1035: if ((ch == '-') && (lastCharIsSet)) {
1036: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1037: if ((ch = pattern[index]) == ']') {
1038: options.addElement(new RETokenChar(subIndex,lastChar,insens));
1039: lastChar = '-';
1040: } else {
1041: if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
1042: CharExpression ce = getCharExpression(pattern, index, pLength, syntax);
1043: if (ce == null)
1044: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1045: ch = ce.ch;
1046: index = index + ce.len - 1;
1047: }
1048: options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
1049: lastChar = 0; lastCharIsSet = false;
1050: index++;
1051: }
1052: } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
1053: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1054: int posixID = -1;
1055: boolean negate = false;
1056: char asciiEsc = 0;
1057: boolean asciiEscIsSet = false;
1058: NamedProperty np = null;
1059: if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
1060: switch (pattern[index]) {
1061: case 'D':
1062: negate = true;
1063: case 'd':
1064: posixID = RETokenPOSIX.DIGIT;
1065: break;
1066: case 'S':
1067: negate = true;
1068: case 's':
1069: posixID = RETokenPOSIX.SPACE;
1070: break;
1071: case 'W':
1072: negate = true;
1073: case 'w':
1074: posixID = RETokenPOSIX.ALNUM;
1075: break;
1076: }
1077: }
1078: if (("pP".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_NAMED_PROPERTY)) {
1079: np = getNamedProperty(pattern, index - 1, pLength);
1080: if (np == null)
1081: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1082: index = index - 1 + np.len - 1;
1083: }
1084: else {
1085: CharExpression ce = getCharExpression(pattern, index - 1, pLength, syntax);
1086: if (ce == null)
1087: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1088: asciiEsc = ce.ch; asciiEscIsSet = true;
1089: index = index - 1 + ce.len - 1;
1090: }
1091: if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
1092:
1093: if (posixID != -1) {
1094: options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
1095: } else if (np != null) {
1096: options.addElement(getRETokenNamedProperty(subIndex,np,insens,index));
1097: } else if (asciiEscIsSet) {
1098: lastChar = asciiEsc; lastCharIsSet = true;
1099: } else {
1100: lastChar = pattern[index]; lastCharIsSet = true;
1101: }
1102: ++index;
1103: } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
1104: StringBuffer posixSet = new StringBuffer();
1105: index = getPosixSet(pattern,index+1,posixSet);
1106: int posixId = RETokenPOSIX.intValue(posixSet.toString());
1107: if (posixId != -1)
1108: options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
1109: } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {
1110: ParseCharClassResult result = parseCharClass(
1111: subIndex, pattern, index, pLength, cflags, syntax, 0);
1112: addition.addElement(result.token);
1113: addition.addElement("|");
1114: index = result.index;
1115: } else if ((ch == '&') &&
1116: (syntax.get(RESyntax.RE_NESTED_CHARCLASS)) &&
1117: (index < pLength) && (pattern[index] == '&')) {
1118: if (returnAtAndOperator) {
1119: ParseCharClassResult result = new ParseCharClassResult();
1120: options.trimToSize();
1121: if (additionAndAppeared) addition.addElement("&");
1122: if (addition.size() == 0) addition = null;
1123: result.token = new RETokenOneOf(subIndex,
1124: options, addition, negative);
1125: result.index = index - 1;
1126: result.returnAtAndOperator = true;
1127: return result;
1128: }
1129:
1130:
1131:
1132:
1133:
1134:
1135:
1136: if (additionAndAppeared) addition.addElement("&");
1137: addition.addElement(Boolean.FALSE);
1138: additionAndAppeared = true;
1139:
1140:
1141:
1142:
1143:
1144:
1145:
1146:
1147: if ((index + 1 < pLength) && (pattern[index + 1] != '[')) {
1148: ParseCharClassResult result = parseCharClass(
1149: subIndex, pattern, index+1, pLength, cflags, syntax,
1150: RETURN_AT_AND);
1151: addition.addElement(result.token);
1152: addition.addElement("|");
1153:
1154:
1155:
1156:
1157: index = (result.returnAtAndOperator ?
1158: result.index: result.index - 1);
1159: }
1160: } else {
1161: if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
1162: lastChar = ch; lastCharIsSet = true;
1163: }
1164: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1165: }
1166:
1167:
1168: if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
1169:
1170: ParseCharClassResult result = new ParseCharClassResult();
1171:
1172: options.trimToSize();
1173: if (additionAndAppeared) addition.addElement("&");
1174: if (addition.size() == 0) addition = null;
1175: result.token = new RETokenOneOf(subIndex,options, addition, negative);
1176: result.index = index;
1177: return result;
1178: }
1179:
1180: private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
1181: unit.ch = input[index++];
1182: unit.bk = (unit.ch == '\\'
1183: && (!quot || index >= input.length || input[index] == 'E'));
1184: if (unit.bk)
1185: if (index < input.length)
1186: unit.ch = input[index++];
1187: else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
1188: return index;
1189: }
1190:
1191: private static int parseInt(char[] input, int pos, int len, int radix) {
1192: int ret = 0;
1193: for (int i = pos; i < pos + len; i++) {
1194: ret = ret * radix + Character.digit(input[i], radix);
1195: }
1196: return ret;
1197: }
1198:
1199:
1206: private static class CharExpression {
1207:
1208: char ch;
1209:
1210: String expr;
1211:
1212: int len;
1213: public String toString() { return expr; }
1214: }
1215:
1216: private static CharExpression getCharExpression(char[] input, int pos, int lim,
1217: RESyntax syntax) {
1218: CharExpression ce = new CharExpression();
1219: char c = input[pos];
1220: if (c == '\\') {
1221: if (pos + 1 >= lim) return null;
1222: c = input[pos + 1];
1223: switch(c) {
1224: case 't':
1225: ce.ch = '\t';
1226: ce.len = 2;
1227: break;
1228: case 'n':
1229: ce.ch = '\n';
1230: ce.len = 2;
1231: break;
1232: case 'r':
1233: ce.ch = '\r';
1234: ce.len = 2;
1235: break;
1236: case 'x':
1237: case 'u':
1238: if ((c == 'x' && syntax.get(RESyntax.RE_HEX_CHAR)) ||
1239: (c == 'u' && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
1240: int l = 0;
1241: int expectedLength = (c == 'x' ? 2 : 4);
1242: for (int i = pos + 2; i < pos + 2 + expectedLength; i++) {
1243: if (i >= lim) break;
1244: if (!((input[i] >= '0' && input[i] <= '9') ||
1245: (input[i] >= 'A' && input[i] <= 'F') ||
1246: (input[i] >= 'a' && input[i] <= 'f')))
1247: break;
1248: l++;
1249: }
1250: if (l != expectedLength) return null;
1251: ce.ch = (char)(parseInt(input, pos + 2, l, 16));
1252: ce.len = l + 2;
1253: }
1254: else {
1255: ce.ch = c;
1256: ce.len = 2;
1257: }
1258: break;
1259: case '0':
1260: if (syntax.get(RESyntax.RE_OCTAL_CHAR)) {
1261: int l = 0;
1262: for (int i = pos + 2; i < pos + 2 + 3; i++) {
1263: if (i >= lim) break;
1264: if (input[i] < '0' || input[i] > '7') break;
1265: l++;
1266: }
1267: if (l == 3 && input[pos + 2] > '3') l--;
1268: if (l <= 0) return null;
1269: ce.ch = (char)(parseInt(input, pos + 2, l, 8));
1270: ce.len = l + 2;
1271: }
1272: else {
1273: ce.ch = c;
1274: ce.len = 2;
1275: }
1276: break;
1277: default:
1278: ce.ch = c;
1279: ce.len = 2;
1280: break;
1281: }
1282: }
1283: else {
1284: ce.ch = input[pos];
1285: ce.len = 1;
1286: }
1287: ce.expr = new String(input, pos, ce.len);
1288: return ce;
1289: }
1290:
1291:
1299: private static class NamedProperty {
1300:
1301: String name;
1302:
1303: boolean negate;
1304:
1305: int len;
1306: }
1307:
1308: private static NamedProperty getNamedProperty(char[] input, int pos, int lim) {
1309: NamedProperty np = new NamedProperty();
1310: char c = input[pos];
1311: if (c == '\\') {
1312: if (++pos >= lim) return null;
1313: c = input[pos++];
1314: switch(c) {
1315: case 'p':
1316: np.negate = false;
1317: break;
1318: case 'P':
1319: np.negate = true;
1320: break;
1321: default:
1322: return null;
1323: }
1324: c = input[pos++];
1325: if (c == '{') {
1326: int p = -1;
1327: for (int i = pos; i < lim; i++) {
1328: if (input[i] == '}') {
1329: p = i;
1330: break;
1331: }
1332: }
1333: if (p < 0) return null;
1334: int len = p - pos;
1335: np.name = new String(input, pos, len);
1336: np.len = len + 4;
1337: }
1338: else {
1339: np.name = new String(input, pos - 1, 1);
1340: np.len = 3;
1341: }
1342: return np;
1343: }
1344: else return null;
1345: }
1346:
1347: private static RETokenNamedProperty getRETokenNamedProperty(
1348: int subIndex, NamedProperty np, boolean insens, int index)
1349: throws REException {
1350: try {
1351: return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
1352: }
1353: catch (REException e) {
1354: REException ree;
1355: ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
1356: ree.initCause(e);
1357: throw ree;
1358: }
1359: }
1360:
1361:
1366: public boolean isMatch(Object input) {
1367: return isMatch(input,0,0);
1368: }
1369:
1370:
1377: public boolean isMatch(Object input,int index) {
1378: return isMatch(input,index,0);
1379: }
1380:
1381:
1382:
1390: public boolean isMatch(Object input,int index,int eflags) {
1391: return isMatchImpl(makeCharIndexed(input,index),index,eflags);
1392: }
1393:
1394: private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
1395: if (firstToken == null)
1396: return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
1397: REMatch m = new REMatch(numSubs, index, eflags);
1398: if (firstToken.match(input, m)) {
1399: while (m != null) {
1400: if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
1401: return true;
1402: }
1403: m = m.next;
1404: }
1405: }
1406: return false;
1407: }
1408:
1409:
1414: public int getNumSubs() {
1415: return numSubs;
1416: }
1417:
1418:
1419: void setUncle(REToken uncle) {
1420: if (lastToken != null) {
1421: lastToken.setUncle(uncle);
1422: } else super.setUncle(uncle);
1423: }
1424:
1425:
1426:
1427: boolean chain(REToken next) {
1428: super.chain(next);
1429: setUncle(next);
1430: return true;
1431: }
1432:
1433:
1437: public int getMinimumLength() {
1438: return minimumLength;
1439: }
1440:
1441: public int getMaximumLength() {
1442: return maximumLength;
1443: }
1444:
1445:
1454: public REMatch[] getAllMatches(Object input) {
1455: return getAllMatches(input,0,0);
1456: }
1457:
1458:
1469: public REMatch[] getAllMatches(Object input, int index) {
1470: return getAllMatches(input,index,0);
1471: }
1472:
1473:
1486: public REMatch[] getAllMatches(Object input, int index, int eflags) {
1487: return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
1488: }
1489:
1490:
1491: private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
1492: Vector all = new Vector();
1493: REMatch m = null;
1494: while ((m = getMatchImpl(input,index,eflags,null)) != null) {
1495: all.addElement(m);
1496: index = m.getEndIndex();
1497: if (m.end[0] == 0) {
1498: index++;
1499: input.move(1);
1500: } else {
1501: input.move(m.end[0]);
1502: }
1503: if (!input.isValid()) break;
1504: }
1505: REMatch[] mset = new REMatch[all.size()];
1506: all.copyInto(mset);
1507: return mset;
1508: }
1509:
1510:
1511: boolean match(CharIndexed input, REMatch mymatch) {
1512: if (firstToken == null) {
1513: return next(input, mymatch);
1514: }
1515:
1516:
1517: mymatch.start[subIndex] = mymatch.index;
1518:
1519: return firstToken.match(input, mymatch);
1520: }
1521:
1522:
1529: public REMatch getMatch(Object input) {
1530: return getMatch(input,0,0);
1531: }
1532:
1533:
1542: public REMatch getMatch(Object input, int index) {
1543: return getMatch(input,index,0);
1544: }
1545:
1546:
1556: public REMatch getMatch(Object input, int index, int eflags) {
1557: return getMatch(input,index,eflags,null);
1558: }
1559:
1560:
1573: public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1574: return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1575: }
1576:
1577: REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1578: boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0);
1579: RE re = (tryEntireMatch ? (RE) this.clone() : this);
1580: if (tryEntireMatch) {
1581: re.chain(new RETokenEnd(0, null));
1582: }
1583:
1584: REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1585: do {
1586:
1587: if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1588: if (re.match(input, mymatch)) {
1589: REMatch best = mymatch;
1590:
1591:
1592:
1593:
1594:
1595:
1603: best.end[0] = best.index;
1604: best.finish(input);
1605: return best;
1606: }
1607: }
1608: mymatch.clear(++anchor);
1609:
1610: if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1611: buffer.append(input.charAt(0));
1612: }
1613: } while (input.move(1));
1614:
1615:
1616: if (minimumLength == 0) {
1617: if (match(input, mymatch)) {
1618: mymatch.finish(input);
1619: return mymatch;
1620: }
1621: }
1622:
1623: return null;
1624: }
1625:
1626:
1633: public REMatchEnumeration getMatchEnumeration(Object input) {
1634: return getMatchEnumeration(input,0,0);
1635: }
1636:
1637:
1638:
1647: public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1648: return getMatchEnumeration(input,index,0);
1649: }
1650:
1651:
1661: public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1662: return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1663: }
1664:
1665:
1666:
1674: public String substitute(Object input,String replace) {
1675: return substitute(input,replace,0,0);
1676: }
1677:
1678:
1691: public String substitute(Object input,String replace,int index) {
1692: return substitute(input,replace,index,0);
1693: }
1694:
1695:
1708: public String substitute(Object input,String replace,int index,int eflags) {
1709: return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1710: }
1711:
1712: private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1713: StringBuffer buffer = new StringBuffer();
1714: REMatch m = getMatchImpl(input,index,eflags,buffer);
1715: if (m==null) return buffer.toString();
1716: buffer.append(getReplacement(replace, m, eflags));
1717: if (input.move(m.end[0])) {
1718: do {
1719: buffer.append(input.charAt(0));
1720: } while (input.move(1));
1721: }
1722: return buffer.toString();
1723: }
1724:
1725:
1734: public String substituteAll(Object input,String replace) {
1735: return substituteAll(input,replace,0,0);
1736: }
1737:
1738:
1752: public String substituteAll(Object input,String replace,int index) {
1753: return substituteAll(input,replace,index,0);
1754: }
1755:
1756:
1769: public String substituteAll(Object input,String replace,int index,int eflags) {
1770: return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1771: }
1772:
1773: private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1774: StringBuffer buffer = new StringBuffer();
1775: REMatch m;
1776: while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1777: buffer.append(getReplacement(replace, m, eflags));
1778: index = m.getEndIndex();
1779: if (m.end[0] == 0) {
1780: char ch = input.charAt(0);
1781: if (ch != CharIndexed.OUT_OF_BOUNDS)
1782: buffer.append(ch);
1783: input.move(1);
1784: } else {
1785: input.move(m.end[0]);
1786: }
1787:
1788: if (!input.isValid()) break;
1789: }
1790: return buffer.toString();
1791: }
1792:
1793: public static String getReplacement(String replace, REMatch m, int eflags) {
1794: if ((eflags & REG_NO_INTERPOLATE) > 0)
1795: return replace;
1796: else {
1797: if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) {
1798: StringBuffer sb = new StringBuffer();
1799: int l = replace.length();
1800: for (int i = 0; i < l; i++) {
1801: char c = replace.charAt(i);
1802: switch(c) {
1803: case '\\':
1804: i++;
1805:
1806: sb.append(replace.charAt(i));
1807: break;
1808: case '$':
1809: int i1 = i + 1;
1810: while (i1 < replace.length() &&
1811: Character.isDigit(replace.charAt(i1))) i1++;
1812: sb.append(m.substituteInto(replace.substring(i, i1)));
1813: i = i1 - 1;
1814: break;
1815: default:
1816: sb.append(c);
1817: }
1818: }
1819: return sb.toString();
1820: }
1821: else
1822: return m.substituteInto(replace);
1823: }
1824: }
1825:
1826:
1827: private void addToken(REToken next) {
1828: if (next == null) return;
1829: minimumLength += next.getMinimumLength();
1830: int nmax = next.getMaximumLength();
1831: if (nmax < Integer.MAX_VALUE && maximumLength < Integer.MAX_VALUE)
1832: maximumLength += nmax;
1833: else
1834: maximumLength = Integer.MAX_VALUE;
1835:
1836: if (firstToken == null) {
1837: lastToken = firstToken = next;
1838: } else {
1839:
1840:
1841: if (lastToken.chain(next)) {
1842: lastToken = next;
1843: }
1844: }
1845: }
1846:
1847: private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1848: if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1849: return new RETokenRepeated(current.subIndex,current,min,max);
1850: }
1851:
1852: private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
1853:
1854:
1855: int i;
1856: for (i=index; i<(pattern.length-1); i++) {
1857: if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1858: return i+2;
1859: buf.append(pattern[i]);
1860: }
1861: return index;
1862: }
1863:
1864: private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
1865:
1866:
1867: boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
1868: int startIndex = index;
1869: if (index == input.length) {
1870: if (mustMatch)
1871: throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
1872: else
1873: return startIndex;
1874: }
1875:
1876: int min,max=0;
1877: CharUnit unit = new CharUnit();
1878: StringBuffer buf = new StringBuffer();
1879:
1880:
1881: do {
1882: index = getCharUnit(input,index,unit,false);
1883: if (Character.isDigit(unit.ch))
1884: buf.append(unit.ch);
1885: } while ((index != input.length) && Character.isDigit(unit.ch));
1886:
1887:
1888: if (buf.length() == 0) {
1889: if (mustMatch)
1890: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1891: else
1892: return startIndex;
1893: }
1894:
1895: min = Integer.parseInt(buf.toString());
1896:
1897: if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
1898: max = min;
1899: else if (index == input.length)
1900: if (mustMatch)
1901: throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
1902: else
1903: return startIndex;
1904: else if ((unit.ch == ',') && !unit.bk) {
1905: buf = new StringBuffer();
1906:
1907: while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch))
1908: buf.append(unit.ch);
1909:
1910: if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
1911: if (mustMatch)
1912: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1913: else
1914: return startIndex;
1915:
1916:
1917: if (buf.length() == 0) max = Integer.MAX_VALUE;
1918: else max = Integer.parseInt(buf.toString());
1919: } else
1920: if (mustMatch)
1921: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1922: else
1923: return startIndex;
1924:
1925:
1926:
1927: minMax.first = min;
1928: minMax.second = max;
1929:
1930:
1931: return index;
1932: }
1933:
1934:
1938: public String toString() {
1939: StringBuffer sb = new StringBuffer();
1940: dump(sb);
1941: return sb.toString();
1942: }
1943:
1944: void dump(StringBuffer os) {
1945: os.append('(');
1946: if (subIndex == 0)
1947: os.append("?:");
1948: if (firstToken != null)
1949: firstToken.dumpAll(os);
1950: os.append(')');
1951: }
1952:
1953:
1954: private static CharIndexed makeCharIndexed(Object input, int index) {
1955:
1956:
1957: if (input instanceof String)
1958: return new CharIndexedString((String) input,index);
1959: else if (input instanceof char[])
1960: return new CharIndexedCharArray((char[]) input,index);
1961: else if (input instanceof StringBuffer)
1962: return new CharIndexedStringBuffer((StringBuffer) input,index);
1963: else if (input instanceof InputStream)
1964: return new CharIndexedInputStream((InputStream) input,index);
1965: else if (input instanceof CharIndexed)
1966: return (CharIndexed) input;
1967: else
1968: return new CharIndexedString(input.toString(), index);
1969: }
1970: }