hxt-regex-xmlschema-9.0.0: A regular expression library for W3C XML Schema regular expressionsSource codeContentsIndex
Text.Regex.XMLSchema.String.Regex
Portabilityportable
Stabilitystable
MaintainerUwe Schmidt <uwe@fh-wedel.de>
Description

W3C XML Schema Regular Expression Matcher

Grammar can be found under http://www.w3.org/TR/xmlschema11-2/#regexs

Synopsis
type Regex = GenRegex String
data GenRegex l
mkZero :: String -> GenRegex l
mkUnit :: GenRegex l
mkSym :: CharSet -> GenRegex l
mkSym1 :: Char -> GenRegex l
mkSymRng :: Char -> Char -> GenRegex l
mkWord :: [Char] -> GenRegex l
mkDot :: GenRegex l
mkStar :: Eq l => GenRegex l -> GenRegex l
mkAll :: Eq l => GenRegex l
mkAlt :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
mkElse :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
mkSeq :: GenRegex l -> GenRegex l -> GenRegex l
mkSeqs :: [GenRegex l] -> GenRegex l
mkRep :: Eq l => Int -> GenRegex l -> GenRegex l
mkRng :: Int -> Int -> GenRegex l -> GenRegex l
mkOpt :: GenRegex l -> GenRegex l
mkDiff :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
mkIsect :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
mkExor :: Eq l => GenRegex l -> GenRegex l -> GenRegex l
mkInterleave :: GenRegex l -> GenRegex l -> GenRegex l
mkCompl :: Eq l => GenRegex l -> GenRegex l
mkBr :: l -> GenRegex l -> GenRegex l
isZero :: GenRegex l -> Bool
errRegex :: GenRegex l -> String
nullable :: GenRegex l -> Bool
nullable' :: GenRegex l -> Nullable l
delta1 :: Eq l => GenRegex l -> Char -> GenRegex l
delta :: Eq l => GenRegex l -> String -> GenRegex l
firstChars :: GenRegex l -> CharSet
matchWithRegex :: Eq l => GenRegex l -> String -> Bool
matchWithRegex' :: Eq l => GenRegex l -> String -> Maybe [(Label l, String)]
splitWithRegex :: Eq l => GenRegex l -> String -> Maybe ([(Label l, String)], String)
splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String)
splitWithRegexCS :: Eq l => GenRegex l -> CharSet -> String -> Maybe ([(Label l, String)], String)
splitWithRegexCS' :: Eq l => GenRegex l -> CharSet -> String -> Maybe (GenRegex l, String)
Documentation
type Regex = GenRegex StringSource
data GenRegex l Source
mkZero :: String -> GenRegex lSource
construct the r.e. for the empty set. An (error-) message may be attached
mkUnit :: GenRegex lSource
construct the r.e. for the set containing the empty word
mkSym :: CharSet -> GenRegex lSource
construct the r.e. for a set of chars
mkSym1 :: Char -> GenRegex lSource
construct an r.e. for a single char set
mkSymRng :: Char -> Char -> GenRegex lSource
construct an r.e. for an intervall of chars
mkWord :: [Char] -> GenRegex lSource
mkSym generaized for strings
mkDot :: GenRegex lSource
construct an r.e. for the set of all Unicode chars
mkStar :: Eq l => GenRegex l -> GenRegex lSource
construct r.e. for r*
mkAll :: Eq l => GenRegex lSource
construct an r.e. for the set of all Unicode words
mkAlt :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource
construct the r.e for r1|r2
mkElse :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

construct the r.e. for r1{|}r2 (r1 orElse r2).

This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized

example

 splitSubex "({1}x)|({2}.)"   "x" = ([("1","x"),("2","x")], "")

 splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")
mkSeq :: GenRegex l -> GenRegex l -> GenRegex lSource
Construct the sequence r.e. r1.r2
mkSeqs :: [GenRegex l] -> GenRegex lSource
mkSeq extened to lists
mkRep :: Eq l => Int -> GenRegex l -> GenRegex lSource
Construct repetition r{i,}
mkRng :: Int -> Int -> GenRegex l -> GenRegex lSource
Construct range r{i,j}
mkOpt :: GenRegex l -> GenRegex lSource
Construct option r?
mkDiff :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

Construct difference r.e.: r1 {\} r2

example

 match "[a-z]+{\\}bush" "obama"     = True
 match "[a-z]+{\\}bush" "clinton"   = True
 match "[a-z]+{\\}bush" "bush"      = False     -- not important any more
mkIsect :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

Construct r.e. for intersection: r1 {&} r2

example

 match ".*a.*{&}.*b.*" "-a-b-"  = True
 match ".*a.*{&}.*b.*" "-b-a-"  = True
 match ".*a.*{&}.*b.*" "-a-a-"  = False
 match ".*a.*{&}.*b.*" "---b-"  = False
mkExor :: Eq l => GenRegex l -> GenRegex l -> GenRegex lSource

Construct r.e. for exclusive or: r1 {^} r2

example

 match "[a-c]+{^}[c-d]+" "abc"  = True
 match "[a-c]+{^}[c-d]+" "acdc" = False
 match "[a-c]+{^}[c-d]+" "ccc"  = False
 match "[a-c]+{^}[c-d]+" "cdc"  = True
mkInterleave :: GenRegex l -> GenRegex l -> GenRegex lSource
mkCompl :: Eq l => GenRegex l -> GenRegex lSource
Construct the Complement of an r.e.: whole set of words - r
mkBr :: l -> GenRegex l -> GenRegex lSource
Construct a labeled subexpression: ({label}r)
isZero :: GenRegex l -> BoolSource
errRegex :: GenRegex l -> StringSource
nullable :: GenRegex l -> BoolSource
nullable' :: GenRegex l -> Nullable lSource
delta1 :: Eq l => GenRegex l -> Char -> GenRegex lSource
delta :: Eq l => GenRegex l -> String -> GenRegex lSource
firstChars :: GenRegex l -> CharSetSource

FIRST for regular expressions

this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators

matchWithRegex :: Eq l => GenRegex l -> String -> BoolSource
matchWithRegex' :: Eq l => GenRegex l -> String -> Maybe [(Label l, String)]Source
splitWithRegex :: Eq l => GenRegex l -> String -> Maybe ([(Label l, String)], String)Source
This function wraps the whole regex in a subexpression before starting the parse. This is done for getting acces to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.
splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String)Source
The main scanner function
splitWithRegexCS :: Eq l => GenRegex l -> CharSet -> String -> Maybe ([(Label l, String)], String)Source
splitWithRegexCS' :: Eq l => GenRegex l -> CharSet -> String -> Maybe (GenRegex l, String)Source

speedup version for splitWithRegex'

This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split

Produced by Haddock version 2.6.1