1:
37:
38: package ;
39: import ;
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46:
47:
117:
118: public class RE extends REToken {
119:
120: private static final class IntPair implements Serializable {
121: public int first, second;
122: }
123:
124: private static final class CharUnit implements Serializable {
125: public char ch;
126: public boolean bk;
127: }
128:
129:
130: private static final String VERSION = "1.1.5-dev";
131:
132:
133:
134: private static ResourceBundle messages;
135:
136:
137: private static final String bundle = "gnu/java/util/regex/MessagesBundle";
138:
139:
140:
141: private REToken firstToken, lastToken;
142:
143:
144:
145: private int numSubs;
146:
147:
148: private int minimumLength;
149: private int maximumLength;
150:
151:
155: public static final int REG_ICASE = 0x02;
156:
157:
163: public static final int REG_DOT_NEWLINE = 0x04;
164:
165:
170: public static final int REG_MULTILINE = 0x08;
171:
172:
199: public static final int REG_NOTBOL = 0x10;
200:
201:
206: public static final int REG_NOTEOL = 0x20;
207:
208:
220: public static final int REG_ANCHORINDEX = 0x40;
221:
222:
229: public static final int REG_NO_INTERPOLATE = 0x80;
230:
231:
236: public static final int REG_TRY_ENTIRE_MATCH = 0x0100;
237:
238:
246: public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
247:
248:
252: public static final int REG_X_COMMENTS = 0x0400;
253:
254:
257: public static final int REG_ICASE_USASCII = 0x0800;
258:
259:
264: public static final int REG_FIX_STARTING_POSITION = 0x1000;
265:
266:
267: public static final String version() {
268: return VERSION;
269: }
270:
271:
272: static final String getLocalizedMessage(String key) {
273: if (messages == null)
274: messages = PropertyResourceBundle.getBundle(bundle, Locale.getDefault());
275: return messages.getString(key);
276: }
277:
278:
288: public RE(Object pattern) throws REException {
289: this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
290: }
291:
292:
303: public RE(Object pattern, int cflags) throws REException {
304: this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
305: }
306:
307:
319: public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
320: this(pattern,cflags,syntax,0,0);
321: }
322:
323:
324: private RE(REToken first, REToken last,int subs, int subIndex, int minLength, int maxLength) {
325: super(subIndex);
326: firstToken = first;
327: lastToken = last;
328: numSubs = subs;
329: minimumLength = minLength;
330: maximumLength = maxLength;
331: addToken(new RETokenEndSub(subIndex));
332: }
333:
334: private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
335: super(myIndex);
336: initialize(patternObj, cflags, syntax, myIndex, nextSub);
337: }
338:
339:
340: protected RE() { super(0); }
341:
342:
343: protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
344: char[] pattern;
345: if (patternObj instanceof String) {
346: pattern = ((String) patternObj).toCharArray();
347: } else if (patternObj instanceof char[]) {
348: pattern = (char[]) patternObj;
349: } else if (patternObj instanceof StringBuffer) {
350: pattern = new char [((StringBuffer) patternObj).length()];
351: ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
352: } else {
353: pattern = patternObj.toString().toCharArray();
354: }
355:
356: int pLength = pattern.length;
357:
358: numSubs = 0;
359: Vector branches = null;
360:
361:
362: firstToken = lastToken = null;
363:
364:
365:
366: boolean insens = ((cflags & REG_ICASE) > 0);
367: boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
368:
369:
370:
371:
372:
373: int index = 0;
374:
375:
376: CharUnit unit = new CharUnit();
377:
378:
379: IntPair minMax = new IntPair();
380:
381:
382: REToken currentToken = null;
383: char ch;
384: boolean quot = false;
385:
386:
387: RESyntax savedSyntax = null;
388: int savedCflags = 0;
389: boolean flagsSaved = false;
390:
391: while (index < pLength) {
392:
393: index = getCharUnit(pattern,index,unit,quot);
394:
395: if (unit.bk)
396: if (unit.ch == 'Q') {
397: quot = true;
398: continue;
399: } else if (unit.ch == 'E') {
400: quot = false;
401: continue;
402: }
403: if (quot)
404: unit.bk = false;
405:
406: if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) {
407: if (Character.isWhitespace(unit.ch)) {
408: continue;
409: }
410: if (unit.ch == '#') {
411: for (int i = index; i < pLength; i++) {
412: if (pattern[i] == '\n') {
413: index = i + 1;
414: continue;
415: }
416: else if (pattern[i] == '\r') {
417: if (i + 1 < pLength && pattern[i + 1] == '\n') {
418: index = i + 2;
419: }
420: else {
421: index = i + 1;
422: }
423: continue;
424: }
425: }
426: index = pLength;
427: continue;
428: }
429: }
430:
431:
432:
433:
434:
435:
436:
437: if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
438: || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
439: && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
440:
441: addToken(currentToken);
442: RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength, maximumLength);
443: minimumLength = 0;
444: maximumLength = 0;
445: if (branches == null) {
446: branches = new Vector();
447: }
448: branches.addElement(theBranch);
449: firstToken = lastToken = currentToken = null;
450: }
451:
452:
453:
454:
455:
456:
457:
458:
459:
460:
461:
462: else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
463: int newIndex = getMinMax(pattern,index,minMax,syntax);
464: if (newIndex > index) {
465: if (minMax.first > minMax.second)
466: throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
467: if (currentToken == null)
468: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
469: if (currentToken instanceof RETokenRepeated)
470: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
471: if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
472: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
473: index = newIndex;
474: currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);
475: }
476: else {
477: addToken(currentToken);
478: currentToken = new RETokenChar(subIndex,unit.ch,insens);
479: if (insensUSASCII) currentToken.unicodeAware = false;
480: }
481: }
482:
483:
484:
485:
486: else if ((unit.ch == '[') && !(unit.bk || quot)) {
487:
488: ParseCharClassResult result = parseCharClass(
489: subIndex, pattern, index, pLength, cflags, syntax, 0);
490: addToken(currentToken);
491: currentToken = result.token;
492: index = result.index;
493: }
494:
495:
496:
497:
498: else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
499: boolean pure = false;
500: boolean comment = false;
501: boolean lookAhead = false;
502: boolean lookBehind = false;
503: boolean independent = false;
504: boolean negativelh = false;
505: boolean negativelb = false;
506: if ((index+1 < pLength) && (pattern[index] == '?')) {
507: switch (pattern[index+1]) {
508: case '!':
509: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
510: pure = true;
511: negativelh = true;
512: lookAhead = true;
513: index += 2;
514: }
515: break;
516: case '=':
517: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
518: pure = true;
519: lookAhead = true;
520: index += 2;
521: }
522: break;
523: case '<':
524:
525:
526: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
527: index++;
528: switch (pattern[index +1]) {
529: case '!':
530: pure = true;
531: negativelb = true;
532: lookBehind = true;
533: index += 2;
534: break;
535: case '=':
536: pure = true;
537: lookBehind = true;
538: index += 2;
539: }
540: }
541: break;
542: case '>':
543:
544:
545: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
546: pure = true;
547: independent = true;
548: index += 2;
549: }
550: break;
551: case 'i':
552: case 'd':
553: case 'm':
554: case 's':
555: case 'u':
556: case 'x':
557: case '-':
558: if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
559:
560: int flagIndex = index + 1;
561: int endFlag = -1;
562: RESyntax newSyntax = new RESyntax(syntax);
563: int newCflags = cflags;
564: boolean negate = false;
565: while (flagIndex < pLength && endFlag < 0) {
566: switch(pattern[flagIndex]) {
567: case 'i':
568: if (negate)
569: newCflags &= ~REG_ICASE;
570: else
571: newCflags |= REG_ICASE;
572: flagIndex++;
573: break;
574: case 'd':
575: if (negate)
576: newSyntax.setLineSeparator(RESyntax.DEFAULT_LINE_SEPARATOR);
577: else
578: newSyntax.setLineSeparator("\n");
579: flagIndex++;
580: break;
581: case 'm':
582: if (negate)
583: newCflags &= ~REG_MULTILINE;
584: else
585: newCflags |= REG_MULTILINE;
586: flagIndex++;
587: break;
588: case 's':
589: if (negate)
590: newCflags &= ~REG_DOT_NEWLINE;
591: else
592: newCflags |= REG_DOT_NEWLINE;
593: flagIndex++;
594: break;
595: case 'u':
596: if (negate)
597: newCflags |= REG_ICASE_USASCII;
598: else
599: newCflags &= ~REG_ICASE_USASCII;
600: flagIndex++;
601: break;
602: case 'x':
603: if (negate)
604: newCflags &= ~REG_X_COMMENTS;
605: else
606: newCflags |= REG_X_COMMENTS;
607: flagIndex++;
608: break;
609: case '-':
610: negate = true;
611: flagIndex++;
612: break;
613: case ':':
614: case ')':
615: endFlag = pattern[flagIndex];
616: break;
617: default:
618: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
619: }
620: }
621: if (endFlag == ')') {
622: syntax = newSyntax;
623: cflags = newCflags;
624: insens = ((cflags & REG_ICASE) > 0);
625: insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
626:
627: comment = true;
628: index = flagIndex - 1;
629: break;
630: }
631: if (endFlag == ':') {
632: savedSyntax = syntax;
633: savedCflags = cflags;
634: flagsSaved = true;
635: syntax = newSyntax;
636: cflags = newCflags;
637: insens = ((cflags & REG_ICASE) > 0);
638: insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
639: index = flagIndex -1;
640:
641: }
642: else {
643: throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
644: }
645: case ':':
646: if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
647: pure = true;
648: index += 2;
649: }
650: break;
651: case '#':
652: if (syntax.get(RESyntax.RE_COMMENTS)) {
653: comment = true;
654: }
655: break;
656: default:
657: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
658: }
659: }
660:
661: if (index >= pLength) {
662: throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
663: }
664:
665:
666: int endIndex = index;
667: int nextIndex = index;
668: int nested = 0;
669:
670: while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
671: && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) ) {
672: if ((endIndex = nextIndex) >= pLength)
673: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
674: else if ((unit.ch == '[') && !(unit.bk || quot)) {
675:
676:
677: int listIndex = nextIndex;
678: if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;
679: if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;
680: int listEndIndex = -1;
681: int listNest = 0;
682: while (listIndex < pLength && listEndIndex < 0) {
683: switch(pattern[listIndex++]) {
684: case '\\':
685: listIndex++;
686: break;
687: case '[':
688:
689:
690: listNest++;
691: if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;
692: if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;
693: break;
694: case ']':
695: if (listNest == 0)
696: listEndIndex = listIndex;
697: listNest--;
698: break;
699: }
700: }
701: if (listEndIndex >= 0) {
702: nextIndex = listEndIndex;
703: if ((endIndex = nextIndex) >= pLength)
704: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
705: else
706: continue;
707: }
708: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
709: }
710: else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
711: nested++;
712: else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
713: nested--;
714: }
715:
716:
717:
718:
719: if (comment) index = nextIndex;
720: else {
721:
722: addToken(currentToken);
723: if (!pure) {
724: numSubs++;
725: }
726:
727: int useIndex = (pure || lookAhead || lookBehind || independent) ?
728: 0 : nextSub + numSubs;
729: currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
730: numSubs += ((RE) currentToken).getNumSubs();
731:
732: if (lookAhead) {
733: currentToken = new RETokenLookAhead(currentToken,negativelh);
734: }
735: else if (lookBehind) {
736: currentToken = new RETokenLookBehind(currentToken,negativelb);
737: }
738: else if (independent) {
739: currentToken = new RETokenIndependent(currentToken);
740: }
741:
742: index = nextIndex;
743: if (flagsSaved) {
744: syntax = savedSyntax;
745: cflags = savedCflags;
746: insens = ((cflags & REG_ICASE) > 0);
747: insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
748: flagsSaved = false;
749: }
750: }
751: }
752:
753:
754:
755:
756: else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
757: throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
758: }
759:
760:
761:
762:
763: else if ((unit.ch == '^') && !(unit.bk || quot)) {
764: addToken(currentToken);
765: currentToken = null;
766: RETokenStart token = null;
767: if ((cflags & REG_MULTILINE) > 0) {
768: String sep = syntax.getLineSeparator();
769: if (sep == null) {
770: token = new RETokenStart(subIndex, null, true);
771: }
772: else {
773: token = new RETokenStart(subIndex, sep);
774: }
775: }
776: else {
777: token = new RETokenStart(subIndex, null);
778: }
779: addToken(token);
780: }
781:
782:
783:
784:
785: else if ((unit.ch == '$') && !(unit.bk || quot)) {
786: addToken(currentToken);
787: currentToken = null;
788: RETokenEnd token = null;
789: if ((cflags & REG_MULTILINE) > 0) {
790: String sep = syntax.getLineSeparator();
791: if (sep == null) {
792: token = new RETokenEnd(subIndex, null, true);
793: }
794: else {
795: token = new RETokenEnd(subIndex, sep);
796: }
797: }
798: else {
799: token = new RETokenEnd(subIndex, null);
800: }
801: addToken(token);
802: }
803:
804:
805:
806:
807: else if ((unit.ch == '.') && !(unit.bk || quot)) {
808: addToken(currentToken);
809: currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
810: }
811:
812:
813:
814:
815:
816:
817:
818: else if ((unit.ch == '*') && !(unit.bk || quot)) {
819: if (currentToken == null)
820: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
821: if (currentToken instanceof RETokenRepeated)
822: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
823: if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
824: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
825: currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
826: }
827:
828:
829:
830:
831:
832:
833:
834:
835: else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
836: if (currentToken == null)
837: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
838:
839:
840: if (currentToken instanceof RETokenRepeated) {
841: RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
842: if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
843: tokenRep.makePossessive();
844: else
845: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
846:
847: }
848: else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
849: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
850: else
851: currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
852: }
853:
854:
855:
856:
857:
858:
859: else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
860: if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
861:
862:
863: if (currentToken instanceof RETokenRepeated) {
864: RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
865: if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
866: tokenRep.makeStingy();
867: else
868: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
869: }
870: else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
871: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
872: else
873: currentToken = setRepeated(currentToken,0,1,index);
874: }
875:
876:
877:
878:
879: else if (unit.bk && (unit.ch == '0') && syntax.get(RESyntax.RE_OCTAL_CHAR)) {
880: CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
881: if (ce == null)
882: throw new REException("invalid octal character", REException.REG_ESCAPE, index);
883: index = index - 2 + ce.len;
884: addToken(currentToken);
885: currentToken = new RETokenChar(subIndex,ce.ch,insens);
886: if (insensUSASCII) currentToken.unicodeAware = false;
887: }
888:
889:
890:
891:
892:
893:
894:
895:
896:
897:
898:
899:
900:
901: else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
902: addToken(currentToken);
903: int numBegin = index - 1;
904: int numEnd = pLength;
905: for (int i = index; i < pLength; i++) {
906: if (! Character.isDigit(pattern[i])) {
907: numEnd = i;
908: break;
909: }
910: }
911: int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);
912:
913: currentToken = new RETokenBackRef(subIndex,num,insens);
914: if (insensUSASCII) currentToken.unicodeAware = false;
915: index = numEnd;
916: }
917:
918:
919:
920:
921: else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
922: addToken(currentToken);
923: currentToken = new RETokenStart(subIndex,null);
924: }
925:
926:
927:
928:
929: else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
930: addToken(currentToken);
931: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
932: }
933:
934:
935:
936: else if (unit.bk && (unit.ch == '<')) {
937: addToken(currentToken);
938: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
939: }
940:
941:
942:
943: else if (unit.bk && (unit.ch == '>')) {
944: addToken(currentToken);
945: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
946: }
947:
948:
949:
950:
951: else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
952: addToken(currentToken);
953: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
954: }
955:
956:
957:
958:
959:
960: else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
961: addToken(currentToken);
962: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
963: if (insensUSASCII) currentToken.unicodeAware = false;
964: }
965:
966:
967:
968:
969: else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
970: addToken(currentToken);
971: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
972: if (insensUSASCII) currentToken.unicodeAware = false;
973: }
974:
975:
976:
977:
978: else if (unit.bk && (unit.ch == 'n')) {
979: addToken(currentToken);
980: currentToken = new RETokenChar(subIndex,'\n',false);
981: }
982:
983:
984:
985:
986: else if (unit.bk && (unit.ch == 'r')) {
987: addToken(currentToken);
988: currentToken = new RETokenChar(subIndex,'\r',false);
989: }
990:
991:
992:
993:
994: else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
995: addToken(currentToken);
996: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
997: if (insensUSASCII) currentToken.unicodeAware = false;
998: }
999:
1000:
1001:
1002:
1003: else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
1004: addToken(currentToken);
1005: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
1006: if (insensUSASCII) currentToken.unicodeAware = false;
1007: }
1008:
1009:
1010:
1011:
1012: else if (unit.bk && (unit.ch == 't')) {
1013: addToken(currentToken);
1014: currentToken = new RETokenChar(subIndex,'\t',false);
1015: }
1016:
1017:
1018:
1019:
1020: else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
1021: addToken(currentToken);
1022: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
1023: if (insensUSASCII) currentToken.unicodeAware = false;
1024: }
1025:
1026:
1027:
1028:
1029: else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
1030: addToken(currentToken);
1031: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
1032: if (insensUSASCII) currentToken.unicodeAware = false;
1033: }
1034:
1035:
1036:
1037:
1038:
1039:
1040:
1041:
1042:
1043: else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') &&
1044: syntax.get(RESyntax.RE_STRING_ANCHORS)) {
1045: addToken(currentToken);
1046: currentToken = new RETokenEnd(subIndex,null);
1047: }
1048:
1049:
1050:
1051:
1052: else if ((unit.bk && (unit.ch == 'x') && syntax.get(RESyntax.RE_HEX_CHAR)) ||
1053: (unit.bk && (unit.ch == 'u') && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
1054: CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
1055: if (ce == null)
1056: throw new REException("invalid hex character", REException.REG_ESCAPE, index);
1057: index = index - 2 + ce.len;
1058: addToken(currentToken);
1059: currentToken = new RETokenChar(subIndex,ce.ch,insens);
1060: if (insensUSASCII) currentToken.unicodeAware = false;
1061: }
1062:
1063:
1064:
1065:
1066: else if ((unit.bk && (unit.ch == 'p') && syntax.get(RESyntax.RE_NAMED_PROPERTY)) ||
1067: (unit.bk && (unit.ch == 'P') && syntax.get(RESyntax.RE_NAMED_PROPERTY))) {
1068: NamedProperty np = getNamedProperty(pattern, index - 2, pLength);
1069: if (np == null)
1070: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1071: index = index - 2 + np.len;
1072: addToken(currentToken);
1073: currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
1074: if (insensUSASCII) currentToken.unicodeAware = false;
1075: }
1076:
1077:
1078:
1079:
1080: else if (unit.bk && (unit.ch == 'G') &&
1081: syntax.get(RESyntax.RE_STRING_ANCHORS)) {
1082: addToken(currentToken);
1083: currentToken = new RETokenEndOfPreviousMatch(subIndex);
1084: }
1085:
1086:
1087:
1088:
1089: else {
1090: addToken(currentToken);
1091: currentToken = new RETokenChar(subIndex,unit.ch,insens);
1092: if (insensUSASCII) currentToken.unicodeAware = false;
1093: }
1094: }
1095:
1096:
1097: addToken(currentToken);
1098:
1099: if (branches != null) {
1100: branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength, maximumLength));
1101: branches.trimToSize();
1102: minimumLength = 0;
1103: maximumLength = 0;
1104: firstToken = lastToken = null;
1105: addToken(new RETokenOneOf(subIndex,branches,false));
1106: }
1107: else addToken(new RETokenEndSub(subIndex));
1108:
1109: }
1110:
1111: private static class ParseCharClassResult {
1112: RETokenOneOf token;
1113: int index;
1114: boolean returnAtAndOperator = false;
1115: }
1116:
1117:
1127: private static ParseCharClassResult parseCharClass(int subIndex,
1128: char[] pattern, int index,
1129: int pLength, int cflags, RESyntax syntax, int pflags)
1130: throws REException {
1131:
1132: boolean insens = ((cflags & REG_ICASE) > 0);
1133: boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
1134: Vector options = new Vector();
1135: Vector addition = new Vector();
1136: boolean additionAndAppeared = false;
1137: final int RETURN_AT_AND = 0x01;
1138: boolean returnAtAndOperator = ((pflags & RETURN_AT_AND) != 0);
1139: boolean negative = false;
1140: char ch;
1141:
1142: char lastChar = 0;
1143: boolean lastCharIsSet = false;
1144: if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
1145:
1146:
1147: if ((ch = pattern[index]) == '^') {
1148: negative = true;
1149: if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1150: ch = pattern[index];
1151: }
1152:
1153:
1154: if (ch == ']') {
1155: lastChar = ch; lastCharIsSet = true;
1156: if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1157: }
1158:
1159: while ((ch = pattern[index++]) != ']') {
1160: if ((ch == '-') && (lastCharIsSet)) {
1161: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1162: if ((ch = pattern[index]) == ']') {
1163: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1164: if (insensUSASCII) t.unicodeAware = false;
1165: options.addElement(t);
1166: lastChar = '-';
1167: } else {
1168: if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
1169: CharExpression ce = getCharExpression(pattern, index, pLength, syntax);
1170: if (ce == null)
1171: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1172: ch = ce.ch;
1173: index = index + ce.len - 1;
1174: }
1175: RETokenRange t = new RETokenRange(subIndex,lastChar,ch,insens);
1176: if (insensUSASCII) t.unicodeAware = false;
1177: options.addElement(t);
1178: lastChar = 0; lastCharIsSet = false;
1179: index++;
1180: }
1181: } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
1182: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1183: int posixID = -1;
1184: boolean negate = false;
1185: char asciiEsc = 0;
1186: boolean asciiEscIsSet = false;
1187: NamedProperty np = null;
1188: if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
1189: switch (pattern[index]) {
1190: case 'D':
1191: negate = true;
1192: case 'd':
1193: posixID = RETokenPOSIX.DIGIT;
1194: break;
1195: case 'S':
1196: negate = true;
1197: case 's':
1198: posixID = RETokenPOSIX.SPACE;
1199: break;
1200: case 'W':
1201: negate = true;
1202: case 'w':
1203: posixID = RETokenPOSIX.ALNUM;
1204: break;
1205: }
1206: }
1207: if (("pP".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_NAMED_PROPERTY)) {
1208: np = getNamedProperty(pattern, index - 1, pLength);
1209: if (np == null)
1210: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1211: index = index - 1 + np.len - 1;
1212: }
1213: else {
1214: CharExpression ce = getCharExpression(pattern, index - 1, pLength, syntax);
1215: if (ce == null)
1216: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1217: asciiEsc = ce.ch; asciiEscIsSet = true;
1218: index = index - 1 + ce.len - 1;
1219: }
1220: if (lastCharIsSet) {
1221: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1222: if (insensUSASCII) t.unicodeAware = false;
1223: options.addElement(t);
1224: }
1225:
1226: if (posixID != -1) {
1227: RETokenPOSIX t = new RETokenPOSIX(subIndex,posixID,insens,negate);
1228: if (insensUSASCII) t.unicodeAware = false;
1229: options.addElement(t);
1230: } else if (np != null) {
1231: RETokenNamedProperty t = getRETokenNamedProperty(subIndex,np,insens,index);
1232: if (insensUSASCII) t.unicodeAware = false;
1233: options.addElement(t);
1234: } else if (asciiEscIsSet) {
1235: lastChar = asciiEsc; lastCharIsSet = true;
1236: } else {
1237: lastChar = pattern[index]; lastCharIsSet = true;
1238: }
1239: ++index;
1240: } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
1241: StringBuffer posixSet = new StringBuffer();
1242: index = getPosixSet(pattern,index+1,posixSet);
1243: int posixId = RETokenPOSIX.intValue(posixSet.toString());
1244: if (posixId != -1) {
1245: RETokenPOSIX t = new RETokenPOSIX(subIndex,posixId,insens,false);
1246: if (insensUSASCII) t.unicodeAware = false;
1247: options.addElement(t);
1248: }
1249: } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {
1250: ParseCharClassResult result = parseCharClass(
1251: subIndex, pattern, index, pLength, cflags, syntax, 0);
1252: addition.addElement(result.token);
1253: addition.addElement("|");
1254: index = result.index;
1255: } else if ((ch == '&') &&
1256: (syntax.get(RESyntax.RE_NESTED_CHARCLASS)) &&
1257: (index < pLength) && (pattern[index] == '&')) {
1258: if (returnAtAndOperator) {
1259: ParseCharClassResult result = new ParseCharClassResult();
1260: options.trimToSize();
1261: if (additionAndAppeared) addition.addElement("&");
1262: if (addition.size() == 0) addition = null;
1263: result.token = new RETokenOneOf(subIndex,
1264: options, addition, negative);
1265: result.index = index - 1;
1266: result.returnAtAndOperator = true;
1267: return result;
1268: }
1269:
1270:
1271:
1272:
1273:
1274:
1275:
1276: if (additionAndAppeared) addition.addElement("&");
1277: addition.addElement(Boolean.FALSE);
1278: additionAndAppeared = true;
1279:
1280:
1281:
1282:
1283:
1284:
1285:
1286:
1287: if ((index + 1 < pLength) && (pattern[index + 1] != '[')) {
1288: ParseCharClassResult result = parseCharClass(
1289: subIndex, pattern, index+1, pLength, cflags, syntax,
1290: RETURN_AT_AND);
1291: addition.addElement(result.token);
1292: addition.addElement("|");
1293:
1294:
1295:
1296:
1297: index = (result.returnAtAndOperator ?
1298: result.index: result.index - 1);
1299: }
1300: } else {
1301: if (lastCharIsSet) {
1302: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1303: if (insensUSASCII) t.unicodeAware = false;
1304: options.addElement(t);
1305: }
1306: lastChar = ch; lastCharIsSet = true;
1307: }
1308: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1309: }
1310:
1311:
1312: if (lastCharIsSet) {
1313: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1314: if (insensUSASCII) t.unicodeAware = false;
1315: options.addElement(t);
1316: }
1317:
1318: ParseCharClassResult result = new ParseCharClassResult();
1319:
1320: options.trimToSize();
1321: if (additionAndAppeared) addition.addElement("&");
1322: if (addition.size() == 0) addition = null;
1323: result.token = new RETokenOneOf(subIndex,options, addition, negative);
1324: result.index = index;
1325: return result;
1326: }
1327:
1328: private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
1329: unit.ch = input[index++];
1330: unit.bk = (unit.ch == '\\'
1331: && (!quot || index >= input.length || input[index] == 'E'));
1332: if (unit.bk)
1333: if (index < input.length)
1334: unit.ch = input[index++];
1335: else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
1336: return index;
1337: }
1338:
1339: private static int parseInt(char[] input, int pos, int len, int radix) {
1340: int ret = 0;
1341: for (int i = pos; i < pos + len; i++) {
1342: ret = ret * radix + Character.digit(input[i], radix);
1343: }
1344: return ret;
1345: }
1346:
1347:
1354: private static class CharExpression {
1355:
1356: char ch;
1357:
1358: String expr;
1359:
1360: int len;
1361: public String toString() { return expr; }
1362: }
1363:
1364: private static CharExpression getCharExpression(char[] input, int pos, int lim,
1365: RESyntax syntax) {
1366: CharExpression ce = new CharExpression();
1367: char c = input[pos];
1368: if (c == '\\') {
1369: if (pos + 1 >= lim) return null;
1370: c = input[pos + 1];
1371: switch(c) {
1372: case 't':
1373: ce.ch = '\t';
1374: ce.len = 2;
1375: break;
1376: case 'n':
1377: ce.ch = '\n';
1378: ce.len = 2;
1379: break;
1380: case 'r':
1381: ce.ch = '\r';
1382: ce.len = 2;
1383: break;
1384: case 'x':
1385: case 'u':
1386: if ((c == 'x' && syntax.get(RESyntax.RE_HEX_CHAR)) ||
1387: (c == 'u' && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
1388: int l = 0;
1389: int expectedLength = (c == 'x' ? 2 : 4);
1390: for (int i = pos + 2; i < pos + 2 + expectedLength; i++) {
1391: if (i >= lim) break;
1392: if (!((input[i] >= '0' && input[i] <= '9') ||
1393: (input[i] >= 'A' && input[i] <= 'F') ||
1394: (input[i] >= 'a' && input[i] <= 'f')))
1395: break;
1396: l++;
1397: }
1398: if (l != expectedLength) return null;
1399: ce.ch = (char)(parseInt(input, pos + 2, l, 16));
1400: ce.len = l + 2;
1401: }
1402: else {
1403: ce.ch = c;
1404: ce.len = 2;
1405: }
1406: break;
1407: case '0':
1408: if (syntax.get(RESyntax.RE_OCTAL_CHAR)) {
1409: int l = 0;
1410: for (int i = pos + 2; i < pos + 2 + 3; i++) {
1411: if (i >= lim) break;
1412: if (input[i] < '0' || input[i] > '7') break;
1413: l++;
1414: }
1415: if (l == 3 && input[pos + 2] > '3') l--;
1416: if (l <= 0) return null;
1417: ce.ch = (char)(parseInt(input, pos + 2, l, 8));
1418: ce.len = l + 2;
1419: }
1420: else {
1421: ce.ch = c;
1422: ce.len = 2;
1423: }
1424: break;
1425: default:
1426: ce.ch = c;
1427: ce.len = 2;
1428: break;
1429: }
1430: }
1431: else {
1432: ce.ch = input[pos];
1433: ce.len = 1;
1434: }
1435: ce.expr = new String(input, pos, ce.len);
1436: return ce;
1437: }
1438:
1439:
1447: private static class NamedProperty {
1448:
1449: String name;
1450:
1451: boolean negate;
1452:
1453: int len;
1454: }
1455:
1456: private static NamedProperty getNamedProperty(char[] input, int pos, int lim) {
1457: NamedProperty np = new NamedProperty();
1458: char c = input[pos];
1459: if (c == '\\') {
1460: if (++pos >= lim) return null;
1461: c = input[pos++];
1462: switch(c) {
1463: case 'p':
1464: np.negate = false;
1465: break;
1466: case 'P':
1467: np.negate = true;
1468: break;
1469: default:
1470: return null;
1471: }
1472: c = input[pos++];
1473: if (c == '{') {
1474: int p = -1;
1475: for (int i = pos; i < lim; i++) {
1476: if (input[i] == '}') {
1477: p = i;
1478: break;
1479: }
1480: }
1481: if (p < 0) return null;
1482: int len = p - pos;
1483: np.name = new String(input, pos, len);
1484: np.len = len + 4;
1485: }
1486: else {
1487: np.name = new String(input, pos - 1, 1);
1488: np.len = 3;
1489: }
1490: return np;
1491: }
1492: else return null;
1493: }
1494:
1495: private static RETokenNamedProperty getRETokenNamedProperty(
1496: int subIndex, NamedProperty np, boolean insens, int index)
1497: throws REException {
1498: try {
1499: return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
1500: }
1501: catch (REException e) {
1502: REException ree;
1503: ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
1504: ree.initCause(e);
1505: throw ree;
1506: }
1507: }
1508:
1509:
1514: public boolean isMatch(Object input) {
1515: return isMatch(input,0,0);
1516: }
1517:
1518:
1525: public boolean isMatch(Object input,int index) {
1526: return isMatch(input,index,0);
1527: }
1528:
1529:
1530:
1538: public boolean isMatch(Object input,int index,int eflags) {
1539: return isMatchImpl(makeCharIndexed(input,index),index,eflags);
1540: }
1541:
1542: private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
1543: if (firstToken == null)
1544: return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
1545: REMatch m = new REMatch(numSubs, index, eflags);
1546: if (firstToken.match(input, m)) {
1547: if (m != null) {
1548: if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
1549: return true;
1550: }
1551: }
1552: }
1553: return false;
1554: }
1555:
1556:
1561: public int getNumSubs() {
1562: return numSubs;
1563: }
1564:
1565:
1566: void setUncle(REToken uncle) {
1567: if (lastToken != null) {
1568: lastToken.setUncle(uncle);
1569: } else super.setUncle(uncle);
1570: }
1571:
1572:
1573:
1574: boolean chain(REToken next) {
1575: super.chain(next);
1576: setUncle(next);
1577: return true;
1578: }
1579:
1580:
1584: public int getMinimumLength() {
1585: return minimumLength;
1586: }
1587:
1588: public int getMaximumLength() {
1589: return maximumLength;
1590: }
1591:
1592:
1601: public REMatch[] getAllMatches(Object input) {
1602: return getAllMatches(input,0,0);
1603: }
1604:
1605:
1616: public REMatch[] getAllMatches(Object input, int index) {
1617: return getAllMatches(input,index,0);
1618: }
1619:
1620:
1633: public REMatch[] getAllMatches(Object input, int index, int eflags) {
1634: return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
1635: }
1636:
1637:
1638: private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
1639: Vector all = new Vector();
1640: REMatch m = null;
1641: while ((m = getMatchImpl(input,index,eflags,null)) != null) {
1642: all.addElement(m);
1643: index = m.getEndIndex();
1644: if (m.end[0] == 0) {
1645: index++;
1646: input.move(1);
1647: } else {
1648: input.move(m.end[0]);
1649: }
1650: if (!input.isValid()) break;
1651: }
1652: REMatch[] mset = new REMatch[all.size()];
1653: all.copyInto(mset);
1654: return mset;
1655: }
1656:
1657:
1658: boolean match(CharIndexed input, REMatch mymatch) {
1659: input.setHitEnd(mymatch);
1660: if (firstToken == null) {
1661: return next(input, mymatch);
1662: }
1663:
1664:
1665: mymatch.start1[subIndex] = mymatch.index;
1666:
1667: return firstToken.match(input, mymatch);
1668: }
1669:
1670: REMatch findMatch(CharIndexed input, REMatch mymatch) {
1671: if (mymatch.backtrackStack == null)
1672: mymatch.backtrackStack = new BacktrackStack();
1673: boolean b = match(input, mymatch);
1674: if (b) {
1675: return mymatch;
1676: }
1677: return null;
1678: }
1679:
1680:
1687: public REMatch getMatch(Object input) {
1688: return getMatch(input,0,0);
1689: }
1690:
1691:
1700: public REMatch getMatch(Object input, int index) {
1701: return getMatch(input,index,0);
1702: }
1703:
1704:
1714: public REMatch getMatch(Object input, int index, int eflags) {
1715: return getMatch(input,index,eflags,null);
1716: }
1717:
1718:
1731: public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1732: return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1733: }
1734:
1735: REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1736: boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0);
1737: boolean doMove = ((eflags & REG_FIX_STARTING_POSITION) == 0);
1738: RE re = (tryEntireMatch ? (RE) this.clone() : this);
1739: if (tryEntireMatch) {
1740: RETokenEnd reEnd = new RETokenEnd(0, null);
1741: reEnd.setFake(true);
1742: re.chain(reEnd);
1743: }
1744:
1745: REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1746: do {
1747:
1754: if (re.match(input, mymatch)) {
1755: REMatch best = mymatch;
1756:
1757:
1758:
1759:
1760:
1761:
1769: best.end[0] = best.index;
1770: best.finish(input);
1771: input.setLastMatch(best);
1772: return best;
1773: }
1774:
1777: mymatch.clear(++anchor);
1778:
1779: if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1780: buffer.append(input.charAt(0));
1781: }
1782:
1783:
1784: } while (doMove && input.move1(1));
1785:
1786:
1787: if (minimumLength == 0) {
1788: if (match(input, mymatch)) {
1789: mymatch.finish(input);
1790: return mymatch;
1791: }
1792: }
1793:
1794: return null;
1795: }
1796:
1797:
1804: public REMatchEnumeration getMatchEnumeration(Object input) {
1805: return getMatchEnumeration(input,0,0);
1806: }
1807:
1808:
1809:
1818: public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1819: return getMatchEnumeration(input,index,0);
1820: }
1821:
1822:
1832: public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1833: return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1834: }
1835:
1836:
1837:
1845: public String substitute(Object input,String replace) {
1846: return substitute(input,replace,0,0);
1847: }
1848:
1849:
1862: public String substitute(Object input,String replace,int index) {
1863: return substitute(input,replace,index,0);
1864: }
1865:
1866:
1879: public String substitute(Object input,String replace,int index,int eflags) {
1880: return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1881: }
1882:
1883: private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1884: StringBuffer buffer = new StringBuffer();
1885: REMatch m = getMatchImpl(input,index,eflags,buffer);
1886: if (m==null) return buffer.toString();
1887: buffer.append(getReplacement(replace, m, eflags));
1888: if (input.move(m.end[0])) {
1889: do {
1890: buffer.append(input.charAt(0));
1891: } while (input.move(1));
1892: }
1893: return buffer.toString();
1894: }
1895:
1896:
1905: public String substituteAll(Object input,String replace) {
1906: return substituteAll(input,replace,0,0);
1907: }
1908:
1909:
1923: public String substituteAll(Object input,String replace,int index) {
1924: return substituteAll(input,replace,index,0);
1925: }
1926:
1927:
1940: public String substituteAll(Object input,String replace,int index,int eflags) {
1941: return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1942: }
1943:
1944: private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1945: StringBuffer buffer = new StringBuffer();
1946: REMatch m;
1947: while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1948: buffer.append(getReplacement(replace, m, eflags));
1949: index = m.getEndIndex();
1950: if (m.end[0] == 0) {
1951: char ch = input.charAt(0);
1952: if (ch != CharIndexed.OUT_OF_BOUNDS)
1953: buffer.append(ch);
1954: input.move(1);
1955: } else {
1956: input.move(m.end[0]);
1957: }
1958:
1959: if (!input.isValid()) break;
1960: }
1961: return buffer.toString();
1962: }
1963:
1964: public static String getReplacement(String replace, REMatch m, int eflags) {
1965: if ((eflags & REG_NO_INTERPOLATE) > 0)
1966: return replace;
1967: else {
1968: if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) {
1969: StringBuffer sb = new StringBuffer();
1970: int l = replace.length();
1971: for (int i = 0; i < l; i++) {
1972: char c = replace.charAt(i);
1973: switch(c) {
1974: case '\\':
1975: i++;
1976:
1977: sb.append(replace.charAt(i));
1978: break;
1979: case '$':
1980: int i1 = i + 1;
1981: while (i1 < replace.length() &&
1982: Character.isDigit(replace.charAt(i1))) i1++;
1983: sb.append(m.substituteInto(replace.substring(i, i1)));
1984: i = i1 - 1;
1985: break;
1986: default:
1987: sb.append(c);
1988: }
1989: }
1990: return sb.toString();
1991: }
1992: else
1993: return m.substituteInto(replace);
1994: }
1995: }
1996:
1997:
1998: private void addToken(REToken next) {
1999: if (next == null) return;
2000: minimumLength += next.getMinimumLength();
2001: int nmax = next.getMaximumLength();
2002: if (nmax < Integer.MAX_VALUE && maximumLength < Integer.MAX_VALUE)
2003: maximumLength += nmax;
2004: else
2005: maximumLength = Integer.MAX_VALUE;
2006:
2007: if (firstToken == null) {
2008: lastToken = firstToken = next;
2009: } else {
2010:
2011:
2012: if (lastToken.chain(next)) {
2013: lastToken = next;
2014: }
2015: }
2016: }
2017:
2018: private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
2019: if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
2020: return new RETokenRepeated(current.subIndex,current,min,max);
2021: }
2022:
2023: private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
2024:
2025:
2026: int i;
2027: for (i=index; i<(pattern.length-1); i++) {
2028: if ((pattern[i] == ':') && (pattern[i+1] == ']'))
2029: return i+2;
2030: buf.append(pattern[i]);
2031: }
2032: return index;
2033: }
2034:
2035: private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
2036:
2037:
2038: boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
2039: int startIndex = index;
2040: if (index == input.length) {
2041: if (mustMatch)
2042: throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
2043: else
2044: return startIndex;
2045: }
2046:
2047: int min,max=0;
2048: CharUnit unit = new CharUnit();
2049: StringBuffer buf = new StringBuffer();
2050:
2051:
2052: do {
2053: index = getCharUnit(input,index,unit,false);
2054: if (Character.isDigit(unit.ch))
2055: buf.append(unit.ch);
2056: } while ((index != input.length) && Character.isDigit(unit.ch));
2057:
2058:
2059: if (buf.length() == 0) {
2060: if (mustMatch)
2061: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
2062: else
2063: return startIndex;
2064: }
2065:
2066: min = Integer.parseInt(buf.toString());
2067:
2068: if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
2069: max = min;
2070: else if (index == input.length)
2071: if (mustMatch)
2072: throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
2073: else
2074: return startIndex;
2075: else if ((unit.ch == ',') && !unit.bk) {
2076: buf = new StringBuffer();
2077:
2078: while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch))
2079: buf.append(unit.ch);
2080:
2081: if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
2082: if (mustMatch)
2083: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
2084: else
2085: return startIndex;
2086:
2087:
2088: if (buf.length() == 0) max = Integer.MAX_VALUE;
2089: else max = Integer.parseInt(buf.toString());
2090: } else
2091: if (mustMatch)
2092: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
2093: else
2094: return startIndex;
2095:
2096:
2097:
2098: minMax.first = min;
2099: minMax.second = max;
2100:
2101:
2102: return index;
2103: }
2104:
2105:
2109: public String toString() {
2110: StringBuffer sb = new StringBuffer();
2111: dump(sb);
2112: return sb.toString();
2113: }
2114:
2115: void dump(StringBuffer os) {
2116: os.append("(?#startRE subIndex=" + subIndex + ")");
2117: if (subIndex == 0)
2118: os.append("?:");
2119: if (firstToken != null)
2120: firstToken.dumpAll(os);
2121: if (subIndex == 0)
2122: os.append(")");
2123: os.append("(?#endRE subIndex=" + subIndex + ")");
2124: }
2125:
2126:
2127:
2128:
2129: public static CharIndexed makeCharIndexed(Object input, int index) {
2130:
2131:
2132:
2133:
2134:
2135:
2136: if (input instanceof CharIndexed) {
2137: CharIndexed ci = (CharIndexed) input;
2138: ci.setAnchor(index);
2139: return ci;
2140: }
2141: else if (input instanceof CharSequence)
2142: return new CharIndexedCharSequence((CharSequence) input,index);
2143: else if (input instanceof String)
2144: return new CharIndexedString((String) input,index);
2145: else if (input instanceof char[])
2146: return new CharIndexedCharArray((char[]) input,index);
2147: else if (input instanceof StringBuffer)
2148: return new CharIndexedStringBuffer((StringBuffer) input,index);
2149: else if (input instanceof InputStream)
2150: return new CharIndexedInputStream((InputStream) input,index);
2151: else
2152: return new CharIndexedString(input.toString(), index);
2153: }
2154: }