ok(!$re->source, 're pattern-1 foolish-\\d+ source');
//    SKIP: {
//        skip( "matched() is not implemented in this version of perl ($])", 1 ) if $PERL_VERSION_TOO_LOW;
ok(!$re->matched, 're pattern-1 foolish-\\d+ 5');
//    }
//    if ($] < 5.009005) {
//        ok( do {use re 'eval'; 'cat' !~ /$re/}, 're pattern-1 cat <5.10' );
//        ok( do {use re 'eval'; 'foolish-808' =~ /$re/}, 're pattern-1 foolish-808 <5.10' );
//    }
//    else {
unlike('cat', '/' . $re . '/', 're pattern-1 cat 5.10');
unlike('foolish-808', '/' . $re . '/', 're pattern-1 foolish-808 5.10');
//    }
$re = new Regexp_Assemble(['track' => 1]);
$re->add('^a-\\d+$')->add('^a-\\d+-\\d+$');
$str = $re->as_string();
//    SKIP: {
//        skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 2 ) if $PERL_VERSION_5_005;
ok(!$re->match('foo'), 'match pattern-2 foo');
ok($re->match('a-22-44'), 'match pattern-2 a-22-44');
//    }
//    SKIP: {
//           skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 1 ) if $PERL_VERSION_5_005;
is($re->match('a-22-55555'), '^a-\\d+-\\d+$', 're pattern-2 a-22-55555');
//    }
//    SKIP: {
//        skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 1 ) if $PERL_VERSION_5_005;
ok($re->match('a-000'), 're pattern-2 a-000 match');
//    }
//    SKIP: {
//        skip( "matched() is not implemented in this version of perl ($])", 1 ) if $PERL_VERSION_TOO_LOW;
        $ptr = preg_replace('/\\\\/', '/\\\\\\\\/', $ptr);
        //            $ptr =~ s/\n/\\n/;
        $ptr = preg_replace('/\\n/', '/\\\\n/', $ptr);
        //            my $bug_success = ($s =~ /\n/) ? 0 : 1;
        $bug_success = preg_match("/\n/", $s) ? 0 : 1;
        $bug_fail = 1 - $bug_success;
        ////保留
        //            is( preg_match("/$re/" , $s ) ? $bug_success : $bug_fail, $ok,
        //                "Folded meta pairs behave as list for \\$meta ($ptr,ok=$ok/$bug_success/$bug_fail)"
        //	            );
        is(preg_match("/{$re_fold}/", $s) ? 1 : 0, $ok, "Unfolded meta pairs behave as list for \\{$meta} ({$ptr},ok={$ok})");
    }
}
$u = new Regexp_Assemble(['unroll_plus' => 1]);
$u->add("a+b", 'ac');
$str = $u->as_string();
is($str, 'a(?:a*b|c)', 'unroll plus a+b ac');
$u->add("\\LA+B", "ac");
$str = $u->as_string();
is($str, 'a(?:a*b|c)', 'unroll plus \\LA+B ac');
$u->add("\\Ua+?b", "AC");
$str = $u->as_string();
is($str, 'A(?:A*?B|C)', 'unroll plus \\Ua+?b AC');
$u->add('\\d+d', '\\de', '\\w+?x', '\\wy');
$str = $u->as_string();
is($str, '(?:\\w(?:\\w*?x|y)|\\d(?:\\d*d|e))', 'unroll plus \\d and \\w');
//
//PHPではUTF-8以外のマルチバイトは扱えません。
//    $u->add( '\\xab+f', '\\xabg', '\\xcd+?h', '\\xcdi');
//    $str = $u->as_string();
//    is( $str, "(?:\xcd(?:\xcd*?h|i)|\xab(?:\xab*f|g))", 'unroll plus meta x' );
Exemplo n.º 3
0
     $list[] = $_;
 }
 if (count($list) <= 0) {
     $out['wordlist'] = "内容がないよう\nここに改行区切りで単語を入れよう。\n";
     $out['regexp'] = '';
     $out['code'] = '';
     $out['bad'] = false;
     $out = hh($out);
     include "index.tpl";
 }
 //正規表現生成
 $assemble = new Regexp_Assemble();
 foreach ($list as $_) {
     $assemble->add(_perl_quotemeta($_));
 }
 $out['regexp'] = $assemble->as_string();
 //念のためマッチテストする
 $bad = false;
 foreach ($list as $_) {
     if (!preg_match('/^' . $out['regexp'] . '$/u', $_)) {
         $bad = true;
     }
 }
 mylog("入力単語:" . join("\t", $list) . "\t出力正規表現:" . $out['regexp'] . "\t判定:{$bad}\r\n");
 $out['code'] = 'require_once(\'Regexp_Assemble.php\');  //正規表現自動生成モジュール' . "\n";
 $out['code'] .= "\n";
 $out['code'] .= "\n";
 $out['code'] .= '$assemble = new Regexp_Assemble();      //オブジェクトを作って・・・' . "\n";
 $out['code'] .= "\n";
 $out['code'] .= '//単語を add で追加するだけ。 ' . "\n";
 foreach ($list as $_) {
Exemplo n.º 4
0
$_ = $fixed;

is( Regexp::Assemble->new->as_string(), $Regexp::Assemble::Always_Fail, 'empty' );
*/
foreach (array(['(?:)?', ['']], ['d', ['d']], ['dot', ['d', 'o', 't']], ['[dot]', ['d'], ['o'], ['t']], ['d?', ['d'], ['']], ['da', ['d', 'a']], ['da?', ['d', 'a'], ['d']], ['(?:da)?', ['d', 'a'], ['']], ['[ad]?', ['d'], [''], ['a']], ['(?:do|a)?', ['d', 'o'], [''], ['a']], ['.', ['x'], ['.']], ['.', ['\\033'], ['.']], ['.', ['\\d'], ['\\s'], ['.']], ['.', ['\\d'], ['\\D']], ['.', ['\\s'], ['\\S']], ['.', ['\\w'], ['\\W']], ['.', ['\\w'], ['\\W'], ["\t"]], ['\\d', ['\\d'], ['5']], ['\\d', ['\\d'], [5], [7], [0]], ['\\d?', ['\\d'], ['5'], ['']], ['\\s', ['\\s'], [' ']], ['\\s?', ['\\s'], ['']], ['[\\dx]', ['\\d'], [5], [7], [0], ['x']], ['[\\d\\s]', ['\\d'], ['\\s'], [5], [7], [0], [' ']], ['[.p]', ['\\.'], ['p']], ['\\w', ['\\w'], [5], [1], [0], ['a'], ['_']], ['[*\\d]?', ['\\d'], [''], ['\\*']], ['[\\d^]?', ['\\d'], [''], ['\\^']], ['a[?@]z', ['a', '@', 'z'], ['a', "\\?", 'z']], ['\\+', ['\\+']], ['\\+', [quotemeta('+')]], ['[*+]', ['\\+'], ['\\*']], ['[*+]', [quotemeta('+')], [quotemeta('*')]], ['[-0z]', ['-'], ['0'], ['z']], ['[-.z]', ['-'], ['\\.'], ['z']], ['[-*+]', ['\\+'], ['-'], ['\\*']], ['[-.]', ['\\.'], ['-']], ['(?:[0z]|^)', ['^'], ['0'], ['z']], ['(?:[-0z]|^)', ['^'], ['0'], ['-'], ['z']], ['(?:[-\\w]|^)', ['^'], ['0'], ['-'], ['z'], ['\\w']], ['(?:[-0]|$)', ['$'], ['0'], ['-']], ['(?:[-0]|$|^)', ['$'], ['0'], ['-'], ['^']], ['\\d', [0], [1], [2], [3], [4], [5], [6], [7], [8], [9]], ['[\\dx]', [0], [1], [2], [3], [4], [5], [6], [7], [8], [9], ['x']], ['(?:b[ey])?', ['b', 'e'], [''], ['b', 'y']], ['(?:be|do)?', ['b', 'e'], [''], ['d', 'o']], ['(?:b[ey]|a)?', ['b', 'e'], [''], ['b', 'y'], ['a']], ['da[by]', ['d', 'a', 'b'], ['d', 'a', 'y']], ['da(?:ily|b)', ['d', 'a', 'b'], ['d', 'a', 'i', 'l', 'y']], ['(?:night|day)', ['n', 'i', 'g', 'h', 't'], ['d', 'a', 'y']], ['da(?:(?:il)?y|b)', ['d', 'a', 'b'], ['d', 'a', 'y'], ['d', 'a', 'i', 'l', 'y']], ['dab(?:ble)?', ['d', 'a', 'b'], ['d', 'a', 'b', 'b', 'l', 'e']], ['d(?:o(?:ne?)?)?', ['d'], ['d', 'o'], ['d', 'o', 'n'], ['d', 'o', 'n', 'e']], ['(?:d(?:o(?:ne?)?)?)?', ['d'], ['d', 'o'], ['d', 'o', 'n'], ['d', 'o', 'n', 'e'], ['']], ['d(?:o[begnt]|u[bd])', ['d', 'o', 'b'], ['d', 'o', 'e'], ['d', 'o', 'g'], ['d', 'o', 'n'], ['d', 'o', 't'], ['d', 'u', 'b'], ['d', 'u', 'd']], ['da(?:m[ep]|r[kt])', ['d', 'a', 'm', 'p'], ['d', 'a', 'm', 'e'], ['d', 'a', 'r', 't'], ['d', 'a', 'r', 'k']]) as $test) {
    $result = array_shift($test);
    $r = new Regexp_Assemble();
    foreach ($test as $_) {
        $r->insert($_);
    }
    $_temp_array = [];
    foreach ($test as $_) {
        $_temp_array[] = join('', $_);
    }
    $args = join(') (', $_temp_array);
    is($r->as_string(), $result, "insert ({$args})");
}
//my {$xism} = ($] < 5.013) ? '-xism' : '^:';
//$xism = '-xism:';
foreach ([["(?{$xism}(?:^|m)a)", '^a', 'ma'], ["(?{$xism}(?:[mw]|^)a)", '^a', 'ma', 'wa'], ["(?{$xism}(?:^|\\^)a)", '^a', '\\^a'], ["(?{$xism}(?:^|0)a)", '^a', '0a'], ["(?{$xism}(?:[m^]|^)a)", '^a', 'ma', '\\^a'], ["(?{$xism}(?:ma|^)a)", '^a', 'maa'], ["(?{$xism}a.+)", 'a.+'], ["(?{$xism}b?)", '[b]?'], ["(?{$xism}\\.)", '[.]'], ["(?{$xism}\\.+)", '[.]+'], ["(?{$xism}\\.+)", '[\\.]+'], ["(?{$xism}\\^+)", '[\\^]+'], ["(?{$xism}%)", '[%]'], ["(?{$xism}%)", '[\\%]'], ["(?{$xism}!)", '[!]'], ["(?{$xism}!)", '[\\!]'], ["(?{$xism}@)", '[@]'], ["(?{$xism}@)", '[\\@]'], ["(?{$xism}a|[bc])", 'a|[bc]'], ["(?{$xism}ad?|[bc])", 'ad?|[bc]'], ["(?{$xism}" . "b(?:\$|e))", 'b$', 'be'], ["(?{$xism}" . "b(?:[ae]|\$))", 'b$', 'be', 'ba'], ["(?{$xism}" . "b(?:\$|\\\$))", 'b$', 'b\\$'], ["(?{$xism}(?:^a[bc]|de))", '^ab', '^ac', 'de'], ["(?i:/)", '/', ['flags' => 'i']], ["(?i:(?:^a[bc]|de))", '^ab', '^ac', 'de', ['flags' => 'i']], ["(?im:(?:^a[bc]|de))", '^ab', '^ac', 'de', ['flags' => 'im']], ["(?{$xism}a(?:%[de]|=[bc]))", quotemeta('a%d'), quotemeta('a=b'), quotemeta('a%e'), quotemeta('a=c')], ["(?{$xism}\\^[,:])", quotemeta('^:'), quotemeta('^,')], ["(?{$xism}a[-*=])", quotemeta('a='), quotemeta('a*'), quotemeta('a-')], ["(?{$xism}l(?:im)?it)", 'lit', 'limit'], ["(?{$xism}a(?:(?:g[qr]|h)w|[de]n|m)z)", 'amz', 'adnz', 'aenz', 'agrwz', 'agqwz', 'ahwz'], ["(?{$xism}a(?:(?:e(?:[gh]u|ft)|dkt|f)w|(?:(?:ij|g)m|hn)v)z)", 'adktwz', 'aeftwz', 'aeguwz', 'aehuwz', 'afwz', 'agmvz', 'ahnvz', 'aijmvz'], ["(?{$xism}b(?:d(?:kt?|i)|ckt?)x)", 'bcktx', 'bckx', 'bdix', 'bdktx', 'bdkx'], ["(?{$xism}d(?:[ln]dr?t|x))", 'dldrt', 'dndrt', 'dldt', 'dndt', 'dx'], ["(?{$xism}d(?:[ln][dp]t|x))", 'dldt', 'dndt', 'dlpt', 'dnpt', 'dx'], ["(?{$xism}d(?:[ln][dp][mr]t|x))", 'dldrt', 'dndrt', 'dldmt', 'dndmt', 'dlprt', 'dnprt', 'dlpmt', 'dnpmt', 'dx'], ["(?{$xism}" . "(?:\\(scan|\\*mens|\\[mail))", '\\*mens', '\\(scan', '\\[mail'], ["(?{$xism}a\\[b\\[c)", '\\Qa[b[c'], ["(?{$xism}a\\]b\\]c)", '\\Qa]b]c'], ["(?{$xism}a\\(b\\(c)", '\\Qa(b(c'], ["(?{$xism}a\\)b\\)c)", '\\Qa)b)c'], ["(?{$xism}a[(+[]b)", '\\Qa(b', '\\Qa[b', '\\Qa+b'], ["(?{$xism}a[-+^]b)", '\\Qa^b', '\\Qa-b', '\\Qa+b'], ["(?{$xism}car(?:rot)?)", 'car', 'carrot', ['lookahead' => 1]], ["(?{$xism}car[dpt]?)", 'car', 'cart', 'card', 'carp', ['lookahead' => 1]], ["(?{$xism}[bc]a[nr]e)", 'bane', 'bare', 'cane', 'care', ['lookahead' => 1]], ["(?{$xism}(?=[ru])(?:ref)?use)", 'refuse', 'use', ['lookahead' => 1]], ["(?{$xism}(?=[bcd])(?:bird|cat|dog))", 'bird', 'cat', 'dog', ['lookahead' => 1]], ["(?{$xism}sea(?=[hs])(?:horse|son))", 'seahorse', 'season', ['lookahead' => 1]], ["(?{$xism}car(?:(?=[dr])(?:rot|d))?)", 'car', 'card', 'carrot', ['lookahead' => 1]], ["(?{$xism}(?:(?:[hl]o|s?t|ch)o|[bf]a)ked)", 'looked', 'choked', 'hooked', 'stoked', 'toked', 'baked', 'faked'], ["(?{$xism}(?=[frt])(?:trans|re|f)action)", 'faction', 'reaction', 'transaction', ['lookahead' => 1]], ["(?{$xism}c(?=[ao])(?:or(?=[np])(?:pse|n)|ar(?=[de])(?:et|d)))", 'card', 'caret', 'corn', 'corpse', ['lookahead' => 1]], ["(?{$xism}car(?:(?=[dipt])(?:[dpt]|i(?=[no])(?:ng|on)))?)", 'car', 'cart', 'card', 'carp', 'carion', 'caring', ['lookahead' => 1]], ["(?{$xism}(?=[dfrst])(?:(?=[frt])(?:trans|re|f)a|(?=[ds])(?:dir|s)e)ction)", 'faction', 'reaction', 'transaction', 'direction', 'section', ['lookahead' => 1]], ["(?{$xism}car(?=[eir])(?:e(?=[flst])(?:(?=[ls])(?:le)?ss|ful|t)|i(?=[no])(?:ng|on)|r(?=[iy])(?:ied|y)))", 'caret', 'caress', 'careful', 'careless', 'caring', 'carion', 'carry', 'carried', ['lookahead' => 1]], ["(?{$xism}(?=[uv])(?:u(?=[nr])(?:n(?=[iprs])(?:(?=[ip])(?:(?:p[or]|impr))?i|(?:sea)?|rea)|r)|v(?=[ei])(?:en(?=[it])(?:trime|i)|i))son)", 'unimprison', 'unison', 'unpoison', 'unprison', 'unreason', 'unseason', 'unson', 'urson', 'venison', 'ventrimeson', 'vison', ['lookahead' => 1]], ["(?{$xism}(?:a?bc?)?d)", 'abcd', 'abd', 'bcd', 'bd', 'd'], ["(?{$xism}(?:a?bc?|c)d)", 'abcd', 'abd', 'bcd', 'bd', 'cd'], ["(?{$xism}(?:(?:a?bc?)?d|c))", 'abcd', 'abd', 'bcd', 'bd', 'c', 'd'], ["(?{$xism}(?:(?:a?bc?)?d|cd?))", 'abcd', 'abd', 'bcd', 'bd', 'c', 'cd', 'd'], ["(?{$xism}(?:(?:ab?|b)c?)?d)", 'abcd', 'abd', 'acd', 'ad', 'bcd', 'bd', 'd'], ["(?{$xism}(?:(?:ab)?cd?)?e)", 'abcde', 'abce', 'cde', 'ce', 'e'], ["(?{$xism}(?:(?:(?:ab?|b)c?)?d|c))", 'abcd', 'abd', 'acd', 'ad', 'bcd', 'bd', 'c', 'd'], ["(?{$xism}(?:(?:(?:ab?|b)c?)?d|cd?))", 'abcd', 'abd', 'acd', 'ad', 'bcd', 'bd', 'c', 'cd', 'd'], ["(?{$xism}" . "^(?:b?cd?|ab)\$)", '^ab$', '^bc$', '^bcd$', '^c$', '^cd$'], ["(?{$xism}" . "^(?:(?:ab?c|cd?)e?|e)\$)", '^abc$', '^abce$', '^ac$', '^ace$', '^c$', '^cd$', '^cde$', '^ce$', '^e$'], ["(?{$xism}" . "^(?:abc|bcd)e?\$)", '^abc$', '^abce$', '^bcd$', '^bcde$'], ["(?{$xism}" . "^(?:abcdef|bcdefg)h?\$)", '^abcdef$', '^abcdefh$', '^bcdefg$', '^bcdefgh$'], ["(?{$xism}" . "^(?:bcdefg|abcd)h?\$)", '^abcd$', '^abcdh$', '^bcdefg$', '^bcdefgh$'], ["(?{$xism}" . "^(?:abcdef|bcd)h?\$)", '^abcdef$', '^abcdefh$', '^bcd$', '^bcdh$'], ["(?{$xism}" . "^(?:a(?:bcd|cd?)e?|e)\$)", '^abcd$', '^abcde$', '^ac$', '^acd$', '^acde$', '^ace$', '^e$'], ["(?{$xism}" . "^(?:bcd|cd?)e?\$)", '^bcd$', '^bcde$', '^c$', '^cd$', '^cde$', '^ce$'], ["(?{$xism}" . "^(?:abc|bc?)(?:de)?\$)", '^abc$', '^abcde$', '^b$', '^bc$', '^bcde$', '^bde$'], ["(?{$xism}" . "^(?:b(?:cd)?|abd)e?\$)", '^abd$', '^abde$', '^b$', '^bcd$', '^bcde$', '^be$'], ["(?{$xism}" . "^(?:ad?|bcd)e?\$)", '^a$', '^ad$', '^ade$', '^ae$', '^bcd$', '^bcde$'], ["(?{$xism}" . "^(?:a(?:bcd|cd?)e?|de)\$)", '^abcd$', '^abcde$', '^ac$', '^acd$', '^acde$', '^ace$', '^de$'], ["(?{$xism}" . "^(?:a(?:bcde)?|bc?d?e)\$)", '^a$', '^abcde$', '^bcde$', '^bce$', '^bde$', '^be$'], ["(?{$xism}" . "^(?:a(?:b[cd]?)?|bd?e?f)\$)", '^a$', '^ab$', '^abc$', '^abd$', '^bdef$', '^bdf$', '^bef$', '^bf$'], ["(?{$xism}" . "^(?:a(?:bc?|dd)?|bd?e?f)\$)", '^a$', '^ab$', '^abc$', '^add$', '^bdef$', '^bdf$', '^bef$', '^bf$'], ["(?{$xism}" . "^(?:a(?:bc?|de)?|bc?d?f)\$)", '^a$', '^ab$', '^abc$', '^ade$', '^bcdf$', '^bcf$', '^bdf$', '^bf$'], ["(?{$xism}" . "^(?:a(?:bc?|de)?|cd?e?f)\$)", '^a$', '^ab$', '^abc$', '^ade$', '^cdef$', '^cdf$', '^cef$', '^cf$'], ["(?{$xism}" . "^(?:a(?:bc?|e)?|bc?de?f)\$)", '^a$', '^ab$', '^abc$', '^ae$', '^bcdef$', '^bcdf$', '^bdef$', '^bdf$'], ["(?{$xism}" . "^(?:a(?:bc?|e)?|b(?:cd)?e?f)\$)", '^a$', '^ab$', '^abc$', '^ae$', '^bcdef$', '^bcdf$', '^bef$', '^bf$'], ["(?{$xism}" . "^(?:b(?:cde?|d?e)f|a(?:bc?|e)?)\$)", '^a$', '^ab$', '^abc$', '^ae$', '^bcdef$', '^bcdf$', '^bdef$', '^bef$'], ["(?{$xism}\\b(?:c[de]|ab)\\b)", 'ab', 'cd', 'ce', ['anchor_word' => 1]], ["(?{$xism}\\b(?:c[de]|ab))", 'ab', 'cd', 'ce', ['anchor_word_begin' => 1]], ["(?{$xism}" . "^(?:c[de]|ab)\$)", 'ab', 'cd', 'ce', ['anchor_line' => 1]], ["(?{$xism}(?:c[de]|ab))", 'ab', 'cd', 'ce', ['anchor_line' => 0]], ["(?{$xism}" . "(?:c[de]|ab)\$)", 'ab', 'cd', 'ce', ['anchor_line_end' => 1]], ["(?{$xism}\\A(?:c[de]|ab)\\Z)", 'ab', 'cd', 'ce', ['anchor_string' => 1]], ["(?{$xism}(?:c[de]|ab))", 'ab', 'cd', 'ce', ['anchor_string' => 0]], ["(?{$xism}x[[:punct:]][yz])", 'x[[:punct:]]y', 'x[[:punct:]]z']] as $test) {
    $result = array_shift($test);
    $param = is_array($test[count($test) - 1]) ? array_pop($test) : [];
    $r = new Regexp_Assemble($param);
    $r->add($test);
    if (!isset($param['flags'])) {
        //        $r->__flags = '-xism';
    }
    $args = '(' . join(') (', $test) . ')';
    if (count($param)) {
        $args .= '{';
        foreach ($param as $key => $value) {