|
- const char *test_array[] =
- {
- "# regular expression test set",
- "# Lines are at least three fields, separated by one or more tabs. \"\" stands",
- "# for an empty field. First field is an RE. Second field is flags. If",
- "# C flag given, regcomp() is expected to fail, and the third field is the",
- "# error name (minus the leading REG_).",
- "#",
- "# Otherwise it is expected to succeed, and the third field is the string to",
- "# try matching it against. If there is no fourth field, the match is",
- "# expected to fail. If there is a fourth field, it is the substring that",
- "# the RE is expected to match. If there is a fifth field, it is a comma-",
- "# separated list of what the subexpressions should match, with - indicating",
- "# no match for that one. In both the fourth and fifth fields, a (sub)field",
- "# starting with @ indicates that the (sub)expression is expected to match",
- "# a null string followed by the stuff after the @; this provides a way to",
- "# test where null strings match. The character `N' in REs and strings",
- "# is newline, `S' is space, `T' is tab, `Z' is NUL.",
- "#",
- "# The full list of flags:",
- "# - placeholder, does nothing",
- "# b RE is a BRE, not an ERE",
- "# & try it as both an ERE and a BRE",
- "# C regcomp() error expected, third field is error name",
- "# i REG_ICASE",
- "# m (\"mundane\") REG_NOSPEC",
- "# s REG_NOSUB (not really testable)",
- "# n REG_NEWLINE",
- "# ^ REG_NOTBOL",
- "# $ REG_NOTEOL",
- "# # REG_STARTEND (see below)",
- "# p REG_PEND",
- "#",
- "# For REG_STARTEND, the start/end offsets are those of the substring",
- "# enclosed in ().",
- "",
- "# basics",
- "a & a a",
- "abc & abc abc",
- "abc|de - abc abc",
- "a|b|c - abc a",
- "",
- "# parentheses and perversions thereof",
- "a(b)c - abc abc",
- "a\\(b\\)c b abc abc",
- "a( C EPAREN",
- "a( b a( a(",
- "a\\( - a( a(",
- "a\\( bC EPAREN",
- "a\\(b bC EPAREN",
- "a(b C EPAREN",
- "a(b b a(b a(b",
- "# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)",
- "a) - a) a)",
- ") - ) )",
- "# end gagging (in a just world, those *should* give EPAREN)",
- "a) b a) a)",
- "a\\) bC EPAREN",
- "\\) bC EPAREN",
- "a()b - ab ab",
- "a\\(\\)b b ab ab",
- "",
- "# anchoring and REG_NEWLINE",
- "^abc$ & abc abc",
- "a^b - a^b",
- "a^b b a^b a^b",
- "a$b - a$b",
- "a$b b a$b a$b",
- "^ & abc @abc",
- "$ & abc @",
- "^$ & \"\" @",
- "$^ - \"\" @",
- "\\($\\)\\(^\\) b \"\" @",
- "# stop retching, those are legitimate (although disgusting)",
- "^^ - \"\" @",
- "$$ - \"\" @",
- "b$ & abNc",
- "b$ &n abNc b",
- "^b$ & aNbNc",
- "^b$ &n aNbNc b",
- "^$ &n aNNb @Nb",
- "^$ n abc",
- "^$ n abcN @",
- "$^ n aNNb @Nb",
- "\\($\\)\\(^\\) bn aNNb @Nb",
- "^^ n^ aNNb @Nb",
- "$$ n aNNb @NN",
- "^a ^ a",
- "a$ $ a",
- "^a ^n aNb",
- "^b ^n aNb b",
- "a$ $n bNa",
- "b$ $n bNa b",
- "a*(^b$)c* - b b",
- "a*\\(^b$\\)c* b b b",
- "",
- "# certain syntax errors and non-errors",
- "| C EMPTY",
- "| b | |",
- "* C BADRPT",
- "* b * *",
- "+ C BADRPT",
- "? C BADRPT",
- "\"\" &C EMPTY",
- "() - abc @abc",
- "\\(\\) b abc @abc",
- "a||b C EMPTY",
- "|ab C EMPTY",
- "ab| C EMPTY",
- "(|a)b C EMPTY",
- "(a|)b C EMPTY",
- "(*a) C BADRPT",
- "(+a) C BADRPT",
- "(?a) C BADRPT",
- "({1}a) C BADRPT",
- "\\(\\{1\\}a\\) bC BADRPT",
- "(a|*b) C BADRPT",
- "(a|+b) C BADRPT",
- "(a|?b) C BADRPT",
- "(a|{1}b) C BADRPT",
- "^* C BADRPT",
- "^* b * *",
- "^+ C BADRPT",
- "^? C BADRPT",
- "^{1} C BADRPT",
- "^\\{1\\} bC BADRPT",
- "",
- "# metacharacters, backslashes",
- "a.c & abc abc",
- "a[bc]d & abd abd",
- "a\\*c & a*c a*c",
- "a\\\\b & a\\b a\\b",
- "a\\\\\\*b & a\\*b a\\*b",
- "a\\bc & abc abc",
- "a\\ &C EESCAPE",
- "a\\\\bc & a\\bc a\\bc",
- "\\{ bC BADRPT",
- "a\\[b & a[b a[b",
- "a[b &C EBRACK",
- "# trailing $ is a peculiar special case for the BRE code",
- "a$ & a a",
- "a$ & a$",
- "a\\$ & a",
- "a\\$ & a$ a$",
- "a\\\\$ & a",
- "a\\\\$ & a$",
- "a\\\\$ & a\\$",
- "a\\\\$ & a\\ a\\",
- "",
- "# back references, ugh",
- "a\\(b\\)\\2c bC ESUBREG",
- "a\\(b\\1\\)c bC ESUBREG",
- "a\\(b*\\)c\\1d b abbcbbd abbcbbd bb",
- "a\\(b*\\)c\\1d b abbcbd",
- "a\\(b*\\)c\\1d b abbcbbbd",
- "^\\(.\\)\\1 b abc",
- "a\\([bc]\\)\\1d b abcdabbd abbd b",
- "a\\(\\([bc]\\)\\2\\)*d b abbccd abbccd",
- "a\\(\\([bc]\\)\\2\\)*d b abbcbd",
- "# actually, this next one probably ought to fail, but the spec is unclear",
- "a\\(\\(b\\)*\\2\\)*d b abbbd abbbd",
- "# here is a case that no NFA implementation does right",
- "\\(ab*\\)[ab]*\\1 b ababaaa ababaaa a",
- "# check out normal matching in the presence of back refs",
- "\\(a\\)\\1bcd b aabcd aabcd",
- "\\(a\\)\\1bc*d b aabcd aabcd",
- "\\(a\\)\\1bc*d b aabd aabd",
- "\\(a\\)\\1bc*d b aabcccd aabcccd",
- "\\(a\\)\\1bc*[ce]d b aabcccd aabcccd",
- "^\\(a\\)\\1b\\(c\\)*cd$ b aabcccd aabcccd",
- "",
- "# ordinary repetitions",
- "ab*c & abc abc",
- "ab+c - abc abc",
- "ab?c - abc abc",
- "a\\(*\\)b b a*b a*b",
- "a\\(**\\)b b ab ab",
- "a\\(***\\)b bC BADRPT",
- "*a b *a *a",
- "**a b a a",
- "***a bC BADRPT",
- "",
- "# the dreaded bounded repetitions",
- "{ & { {",
- "{abc & {abc {abc",
- "{1 C BADRPT",
- "{1} C BADRPT",
- "a{b & a{b a{b",
- "a{1}b - ab ab",
- "a\\{1\\}b b ab ab",
- "a{1,}b - ab ab",
- "a\\{1,\\}b b ab ab",
- "a{1,2}b - aab aab",
- "a\\{1,2\\}b b aab aab",
- "a{1 C EBRACE",
- "a\\{1 bC EBRACE",
- "a{1a C EBRACE",
- "a\\{1a bC EBRACE",
- "a{1a} C BADBR",
- "a\\{1a\\} bC BADBR",
- "a{,2} - a{,2} a{,2}",
- "a\\{,2\\} bC BADBR",
- "a{,} - a{,} a{,}",
- "a\\{,\\} bC BADBR",
- "a{1,x} C BADBR",
- "a\\{1,x\\} bC BADBR",
- "a{1,x C EBRACE",
- "a\\{1,x bC EBRACE",
- "a{300} C BADBR",
- "a\\{300\\} bC BADBR",
- "a{1,0} C BADBR",
- "a\\{1,0\\} bC BADBR",
- "ab{0,0}c - abcac ac",
- "ab\\{0,0\\}c b abcac ac",
- "ab{0,1}c - abcac abc",
- "ab\\{0,1\\}c b abcac abc",
- "ab{0,3}c - abbcac abbc",
- "ab\\{0,3\\}c b abbcac abbc",
- "ab{1,1}c - acabc abc",
- "ab\\{1,1\\}c b acabc abc",
- "ab{1,3}c - acabc abc",
- "ab\\{1,3\\}c b acabc abc",
- "ab{2,2}c - abcabbc abbc",
- "ab\\{2,2\\}c b abcabbc abbc",
- "ab{2,4}c - abcabbc abbc",
- "ab\\{2,4\\}c b abcabbc abbc",
- "((a{1,10}){1,10}){1,10} - a a a,a",
- "",
- "# multiple repetitions",
- "a** &C BADRPT",
- "a++ C BADRPT",
- "a?? C BADRPT",
- "a*+ C BADRPT",
- "a*? C BADRPT",
- "a+* C BADRPT",
- "a+? C BADRPT",
- "a?* C BADRPT",
- "a?+ C BADRPT",
- "a{1}{1} C BADRPT",
- "a*{1} C BADRPT",
- "a+{1} C BADRPT",
- "a?{1} C BADRPT",
- "a{1}* C BADRPT",
- "a{1}+ C BADRPT",
- "a{1}? C BADRPT",
- "a*{b} - a{b} a{b}",
- "a\\{1\\}\\{1\\} bC BADRPT",
- "a*\\{1\\} bC BADRPT",
- "a\\{1\\}* bC BADRPT",
- "",
- "# brackets, and numerous perversions thereof",
- "a[b]c & abc abc",
- "a[ab]c & abc abc",
- "a[^ab]c & adc adc",
- "a[]b]c & a]c a]c",
- "a[[b]c & a[c a[c",
- "a[-b]c & a-c a-c",
- "a[^]b]c & adc adc",
- "a[^-b]c & adc adc",
- "a[b-]c & a-c a-c",
- "a[b &C EBRACK",
- "a[] &C EBRACK",
- "a[1-3]c & a2c a2c",
- "a[3-1]c &C ERANGE",
- "a[1-3-5]c &C ERANGE",
- "a[[.-.]--]c & a-c a-c",
- "a[1- &C ERANGE",
- "a[[. &C EBRACK",
- "a[[.x &C EBRACK",
- "a[[.x. &C EBRACK",
- "a[[.x.] &C EBRACK",
- "a[[.x.]] & ax ax",
- "a[[.x,.]] &C ECOLLATE",
- "a[[.one.]]b & a1b a1b",
- "a[[.notdef.]]b &C ECOLLATE",
- "a[[.].]]b & a]b a]b",
- "a[[:notdef:]]c &C ECTYPE",
- "a[[: &C EBRACK",
- "a[[:alpha &C EBRACK",
- "a[[:alpha:] &C EBRACK",
- "a[[:alpha,:] &C ECTYPE",
- "a[[:]:]]b &C ECTYPE",
- "a[[:-:]]b &C ECTYPE",
- "a[[:alph:]] &C ECTYPE",
- "a[[:alphabet:]] &C ECTYPE",
- "[[:alnum:]]+ - -%@a5X- a5X",
- "[[:alpha:]]+ - -%@aX0- aX",
- "# ctype_latin1 does not tag '\\t' as blank, so we will match only two spaces.",
- "# See: Bug #55427 REGEXP does not recognize '\\t' as [:blank:]",
- "#[[:blank:]]+ - aSSTb SST",
- "[[:blank:]]+ - aSSTb SS",
- "[[:cntrl:]]+ - aNTb NT",
- "[[:digit:]]+ - a019b 019",
- "[[:graph:]]+ - Sa%bS a%b",
- "[[:lower:]]+ - AabC ab",
- "[[:print:]]+ - NaSbN aSb",
- "[[:punct:]]+ - S%-&T %-&",
- "[[:space:]]+ - aSNTb SNT",
- "[[:upper:]]+ - aBCd BC",
- "[[:xdigit:]]+ - p0f3Cq 0f3C",
- "a[[=b=]]c & abc abc",
- "a[[= &C EBRACK",
- "a[[=b &C EBRACK",
- "a[[=b= &C EBRACK",
- "a[[=b=] &C EBRACK",
- "a[[=b,=]] &C ECOLLATE",
- "a[[=one=]]b & a1b a1b",
- "",
- "# complexities",
- "a(((b)))c - abc abc",
- "a(b|(c))d - abd abd",
- "a(b*|c)d - abbd abbd",
- "# just gotta have one DFA-buster, of course",
- "a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab",
- "# and an inline expansion in case somebody gets tricky",
- "a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab",
- "# and in case somebody just slips in an NFA...",
- "a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights",
- "# fish for anomalies as the number of states passes 32",
- "12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789",
- "123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890",
- "1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901",
- "12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012",
- "123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123",
- "# and one really big one, beyond any plausible word width",
- "1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890",
- "# fish for problems as brackets go past 8",
- "[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm",
- "[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo",
- "[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq",
- "[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq",
- "",
- "# subtleties of matching",
- "abc & xabcy abc",
- "a\\(b\\)?c\\1d b acd",
- "aBc i Abc Abc",
- "a[Bc]*d i abBCcd abBCcd",
- "0[[:upper:]]1 &i 0a1 0a1",
- "0[[:lower:]]1 &i 0A1 0A1",
- "a[^b]c &i abc",
- "a[^b]c &i aBc",
- "a[^b]c &i adc adc",
- "[a]b[c] - abc abc",
- "[a]b[a] - aba aba",
- "[abc]b[abc] - abc abc",
- "[abc]b[abd] - abd abd",
- "a(b?c)+d - accd accd",
- "(wee|week)(knights|night) - weeknights weeknights",
- "(we|wee|week|frob)(knights|night|day) - weeknights weeknights",
- "a[bc]d - xyzaaabcaababdacd abd",
- "a[ab]c - aaabc abc",
- "abc s abc abc",
- "a* & b @b",
- "",
- "# Let's have some fun -- try to match a C comment.",
- "# first the obvious, which looks okay at first glance...",
- "/\\*.*\\*/ - /*x*/ /*x*/",
- "# but...",
- "/\\*.*\\*/ - /*x*/y/*z*/ /*x*/y/*z*/",
- "# okay, we must not match */ inside; try to do that...",
- "/\\*([^*]|\\*[^/])*\\*/ - /*x*/ /*x*/",
- "/\\*([^*]|\\*[^/])*\\*/ - /*x*/y/*z*/ /*x*/",
- "# but...",
- "/\\*([^*]|\\*[^/])*\\*/ - /*x**/y/*z*/ /*x**/y/*z*/",
- "# and a still fancier version, which does it right (I think)...",
- "/\\*([^*]|\\*+[^*/])*\\*+/ - /*x*/ /*x*/",
- "/\\*([^*]|\\*+[^*/])*\\*+/ - /*x*/y/*z*/ /*x*/",
- "/\\*([^*]|\\*+[^*/])*\\*+/ - /*x**/y/*z*/ /*x**/",
- "/\\*([^*]|\\*+[^*/])*\\*+/ - /*x****/y/*z*/ /*x****/",
- "/\\*([^*]|\\*+[^*/])*\\*+/ - /*x**x*/y/*z*/ /*x**x*/",
- "/\\*([^*]|\\*+[^*/])*\\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/",
- "",
- "# subexpressions",
- "a(b)(c)d - abcd abcd b,c",
- "a(((b)))c - abc abc b,b,b",
- "a(b|(c))d - abd abd b,-",
- "a(b*|c|e)d - abbd abbd bb",
- "a(b*|c|e)d - acd acd c",
- "a(b*|c|e)d - ad ad @d",
- "a(b?)c - abc abc b",
- "a(b?)c - ac ac @c",
- "a(b+)c - abc abc b",
- "a(b+)c - abbbc abbbc bbb",
- "a(b*)c - ac ac @c",
- "(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de",
- "# the regression tester only asks for 9 subexpressions",
- "a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j",
- "a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k",
- "a([bc]?)c - abc abc b",
- "a([bc]?)c - ac ac @c",
- "a([bc]+)c - abc abc b",
- "a([bc]+)c - abcc abcc bc",
- "a([bc]+)bc - abcbc abcbc bc",
- "a(bb+|b)b - abb abb b",
- "a(bbb+|bb+|b)b - abb abb b",
- "a(bbb+|bb+|b)b - abbb abbb bb",
- "a(bbb+|bb+|b)bb - abbb abbb b",
- "(.*).* - abcdef abcdef abcdef",
- "(a*)* - bc @b @b",
- "",
- "# do we get the right subexpression when it is used more than once?",
- "a(b|c)*d - ad ad -",
- "a(b|c)*d - abcd abcd c",
- "a(b|c)+d - abd abd b",
- "a(b|c)+d - abcd abcd c",
- "a(b|c?)+d - ad ad @d",
- "a(b|c?)+d - abcd abcd @d",
- "a(b|c){0,0}d - ad ad -",
- "a(b|c){0,1}d - ad ad -",
- "a(b|c){0,1}d - abd abd b",
- "a(b|c){0,2}d - ad ad -",
- "a(b|c){0,2}d - abcd abcd c",
- "a(b|c){0,}d - ad ad -",
- "a(b|c){0,}d - abcd abcd c",
- "a(b|c){1,1}d - abd abd b",
- "a(b|c){1,1}d - acd acd c",
- "a(b|c){1,2}d - abd abd b",
- "a(b|c){1,2}d - abcd abcd c",
- "a(b|c){1,}d - abd abd b",
- "a(b|c){1,}d - abcd abcd c",
- "a(b|c){2,2}d - acbd acbd b",
- "a(b|c){2,2}d - abcd abcd c",
- "a(b|c){2,4}d - abcd abcd c",
- "a(b|c){2,4}d - abcbd abcbd b",
- "a(b|c){2,4}d - abcbcd abcbcd c",
- "a(b|c){2,}d - abcd abcd c",
- "a(b|c){2,}d - abcbd abcbd b",
- "a(b+|((c)*))+d - abd abd @d,@d,-",
- "a(b+|((c)*))+d - abcd abcd @d,@d,-",
- "",
- "# check out the STARTEND option",
- "[abc] &# a(b)c b",
- "[abc] &# a(d)c",
- "[abc] &# a(bc)d b",
- "[abc] &# a(dc)d c",
- ". &# a()c",
- "b.*c &# b(bc)c bc",
- "b.* &# b(bc)c bc",
- ".*c &# b(bc)c bc",
- "",
- "# plain strings, with the NOSPEC flag",
- "abc m abc abc",
- "abc m xabcy abc",
- "abc m xyz",
- "a*b m aba*b a*b",
- "a*b m ab",
- "\"\" mC EMPTY",
- "",
- "# cases involving NULs",
- "aZb & a a",
- "aZb &p a",
- "aZb &p# (aZb) aZb",
- "aZ*b &p# (ab) ab",
- "a.b &# (aZb) aZb",
- "a.* &# (aZb)c aZb",
- "",
- "# word boundaries (ick)",
- "[[:<:]]a & a a",
- "[[:<:]]a & ba",
- "[[:<:]]a & -a a",
- "a[[:>:]] & a a",
- "a[[:>:]] & ab",
- "a[[:>:]] & a- a",
- "[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc",
- "[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc",
- "[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc",
- "[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc",
- "[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_",
- "[[:<:]]a_b[[:>:]] & x_a_b",
- "",
- "# past problems, and suspected problems",
- "(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1",
- "abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop",
- "abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv",
- "(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11",
- "CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11",
- "Char \\([a-z0-9_]*\\)\\[.* b Char xyz[k Char xyz[k xyz",
- "a?b - ab ab",
- "-\\{0,1\\}[0-9]*$ b -5 -5",
- "",
- NULL
- };
|