//}
//{
$r = new Regexp_Assemble();
$r->__lex = '\\([^(]*(?:\\([^)]*\\))?[^)]*\\)|.';
$r->reset()->add('ab(cd)ef');
is_deeply($r->__path, ['a', 'b', '(cd)', 'e', 'f'], 'ab(cd)ef (with parenthetical lexer)');
$r->reset()->add('ab(cd(ef)gh)ij');
is_deeply($r->__path, ['a', 'b', '(cd(ef)gh)', 'i', 'j'], 'ab(cd(ef)gh)ij (with parenthetical lexer)');
$r->reset()->add('ab((ef)gh)ij');
is_deeply($r->__path, ['a', 'b', '((ef)gh)', 'i', 'j'], 'ab((ef)gh)ij (with parenthetical lexer)');
$r->reset()->add('ab(cd(ef))ij');
is_deeply($r->__path, ['a', 'b', '(cd(ef))', 'i', 'j'], 'ab(cd(ef))ij (with parenthetical lexer)');
$r->reset()->add('ab((ef))ij');
is_deeply($r->__path, ['a', 'b', '((ef))', 'i', 'j'], 'ab((ef))ij (with parenthetical lexer)');
$r = new Regexp_Assemble(['lex' => '\\d']);
is_deeply($r->add('0\\Q0C,+')->__path, ['0', '0', 'C', ',', '\\+'], '0\\Q0C,+ with \\d lexer');
//{
$ra = new Regexp_Assemble();
$ra->insert(['d', 'a', 'b']);
is_deeply($ra->__path, ['d', 'a', 'b'], '/dab/');
//}
//{
$ra = new Regexp_Assemble();
$ra->insert(['0', '1']);
$ra->insert(['0', '2']);
is_deeply($ra->__path, ['0', ['1' => ['1'], '2' => ['2']]], '/01/ /02/');
//}
//{
$ra = new Regexp_Assemble();
$ra->insert(['0']);
$ra->insert(['0', '1']);
//           skip( "match()/matched() return undef in this version of perl ($])", 8 ) if $PERL_VERSION_5_005;
ok(!$re->match('foo'), 'not match pattern-6 foo');
$target = 'cat';
ok($re->match($target), "match pattern-6 {$target}");
is($re->matched, '^cat', "match pattern-6 {$target} re");
$target = 'CATFOOD';
ok($re->match($target), "match pattern-6 {$target}");
is($re->matched, '^cat', "match pattern-6 {$target} re");
$target = 'candle';
ok($re->match($target), "match pattern-6 {$target}");
is($re->matched, '^candle$', "match pattern-6 {$target} re");
$target = 'Candlestick';
ok(!$re->match($target), "match pattern-6 {$target}");
//    }
$re = new Regexp_Assemble(['track' => 1]);
$re->add('^ab-(\\d+)-(\\d+)')->add('^ac-(\\d+)')->add('^nothing')->add('^ad-((\\d+)-(\\d+))');
//    SKIP: {
//        skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 15 ) if $PERL_VERSION_5_005;
ok(!$re->capture(), 'match p7 no prior capture');
ok($re->match('nothing captured'), 'match p7-1');
is(count($re->capture()), 0, 'match p7-1 no capture');
ok($re->match('ac-417 captured'), 'match p7-2');
$capture = $re->capture();
is(count($capture), 1, 'match p7-2 capture');
is($capture[0], 417, "match p7-2 value 0 ok");
ok($re->match('ab-21-17 captured'), 'match p7-3');
$capture = $re->capture();
is(count($capture), 2, 'match p7-3 capture');
is($capture[0], 21, "match p7-3 value 0 ok");
is($capture[1], 17, "match p7-3 value 1 ok");
ok($re->match('ad-808-245 captured'), 'match p7-4');
$ra = new Regexp_Assemble();
foreach ($list as $p) {
    $ra->insert(str_split($p));
}
$ra->_reduce();
is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list));
$list = ['tough', 'though', 'trough', 'through', 'thorough'];
$ra = new Regexp_Assemble();
foreach ($list as $p) {
    $ra->insert(str_split($p));
}
$ra->_reduce();
is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['o' => [['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list));
$list = ['tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie'];
$ra = new Regexp_Assemble();
foreach ($list as $p) {
    $ra->insert(str_split($p));
}
$ra->_reduce();
is_deeply($ra->__path, ['t', 'i', 't', ['' => 0, 'a' => ['a', 'n', ['a' => ['a', 't', 'e'], 'i' => ['i', ['a' => ['a'], 't' => ['t', 'e']]], 'o' => ['o']]], 'i' => ['i', ['' => 0, 'a' => [['e' => ['e'], 'a' => ['a']], 'n']]], 't' => [['' => 0, 't' => ['t', 'i']], 'e']]], join(' ', $list));
$ra = new Regexp_Assemble();
$ra->add('dasin');
$ra->add('dosin');
$ra->add('dastin');
$ra->add('dostin');
$ra->_reduce();
is_deeply($ra->__path, ['d', ['a' => ['a'], 'o' => ['o']], 's', ['' => 0, 't' => ['t']], 'i', 'n'], 'dasin/dosin/dastin/dosting');
/*
is( $_, $fixed, '$_ has not been altered' );
*/
echo "===OK===\n";
Exemplo n.º 4
0
<?php

require_once "Regexp_Assemble.php";
for ($i = 0; $i < 10000; ++$i) {
    $reg = new Regexp_Assemble();
    $reg->add('神岸あかり');
    $reg->add('赤座あかり');
    $reg->add('黒座あかり');
    $str = $reg->re();
    $reg = new Regexp_Assemble();
    $reg->add('スティーブ・ジョブズ');
    $reg->add('スティーブ・ウォズアニック');
    $str = $reg->re();
    $reg = new Regexp_Assemble();
    $reg->add('お兄ちゃま');
    $reg->add('あにぃ');
    $reg->add('お兄様');
    $reg->add('おにいたま');
    $reg->add('兄上様');
    $reg->add('にいさま');
    $reg->add('アニキ');
    $reg->add('兄くん');
    $reg->add('兄君さま');
    $reg->add('兄チャマ');
    $reg->add('兄や');
    $str = $reg->re();
}
function match_list($tag, $patt, $test)
{
    $re = new Regexp_Assemble();
    $re->add($patt);
    $rela = new Regexp_Assemble();
    $rela->lookahead(1)->add($patt);
    foreach ($test as $str) {
        like($str, '/^' . $re . '$/', "re {$tag}: {$str}");
        like($str, '/^' . $rela . '$/', "rela {$tag}: {$str}");
    }
}
Exemplo n.º 6
0
         continue;
     }
     $list[] = $_;
 }
 if (count($list) <= 0) {
     $out['wordlist'] = "内容がないよう\nここに改行区切りで単語を入れよう。\n";
     $out['regexp'] = '';
     $out['code'] = '';
     $out['bad'] = false;
     $out = hh($out);
     include "index.tpl";
 }
 //正規表現生成
 $assemble = new Regexp_Assemble();
 foreach ($list as $_) {
     $assemble->add(_perl_quotemeta($_));
 }
 $out['regexp'] = $assemble->as_string();
 //念のためマッチテストする
 $bad = false;
 foreach ($list as $_) {
     if (!preg_match('/^' . $out['regexp'] . '$/u', $_)) {
         $bad = true;
     }
 }
 mylog("入力単語:" . join("\t", $list) . "\t出力正規表現:" . $out['regexp'] . "\t判定:{$bad}\r\n");
 $out['code'] = 'require_once(\'Regexp_Assemble.php\');  //正規表現自動生成モジュール' . "\n";
 $out['code'] .= "\n";
 $out['code'] .= "\n";
 $out['code'] .= '$assemble = new Regexp_Assemble();      //オブジェクトを作って・・・' . "\n";
 $out['code'] .= "\n";
        //            $ptr =~ s/\\/\\\\/;
        $ptr = preg_replace('/\\\\/', '/\\\\\\\\/', $ptr);
        //            $ptr =~ s/\n/\\n/;
        $ptr = preg_replace('/\\n/', '/\\\\n/', $ptr);
        //            my $bug_success = ($s =~ /\n/) ? 0 : 1;
        $bug_success = preg_match("/\n/", $s) ? 0 : 1;
        $bug_fail = 1 - $bug_success;
        ////保留
        //            is( preg_match("/$re/" , $s ) ? $bug_success : $bug_fail, $ok,
        //                "Folded meta pairs behave as list for \\$meta ($ptr,ok=$ok/$bug_success/$bug_fail)"
        //	            );
        is(preg_match("/{$re_fold}/", $s) ? 1 : 0, $ok, "Unfolded meta pairs behave as list for \\{$meta} ({$ptr},ok={$ok})");
    }
}
$u = new Regexp_Assemble(['unroll_plus' => 1]);
$u->add("a+b", 'ac');
$str = $u->as_string();
is($str, 'a(?:a*b|c)', 'unroll plus a+b ac');
$u->add("\\LA+B", "ac");
$str = $u->as_string();
is($str, 'a(?:a*b|c)', 'unroll plus \\LA+B ac');
$u->add("\\Ua+?b", "AC");
$str = $u->as_string();
is($str, 'A(?:A*?B|C)', 'unroll plus \\Ua+?b AC');
$u->add('\\d+d', '\\de', '\\w+?x', '\\wy');
$str = $u->as_string();
is($str, '(?:\\w(?:\\w*?x|y)|\\d(?:\\d*d|e))', 'unroll plus \\d and \\w');
//
//PHPではUTF-8以外のマルチバイトは扱えません。
//    $u->add( '\\xab+f', '\\xabg', '\\xcd+?h', '\\xcdi');
//    $str = $u->as_string();
is_deeply($rt->_unrev_path([0, 1], $context), [1, 0], 'path(0,1)');
is_deeply($rt->_unrev_path(['ab', 'cd', 'ef'], $context), ['ef', 'cd', 'ab'], 'path(ab,cd,ef)');
is_deeply($rt->_unrev_path($rt->_unrev_path([['ab', 'cd', 'ef']], $context), $context), [['ab', 'cd', 'ef']], 'path(ab,cd,ef) back');
is_deeply($rt->_unrev_path(['ab', 'cd', 'ef', '\\d+', '\\D', 'ghi', 'jkl', 'mno'], $context), ['mno', 'jkl', 'ghi', '\\D', '\\d+', 'ef', 'cd', 'ab'], 'path(ab cd...)');
is_deeply($rt->_unrev_path($rt->_unrev_path(['ab', 'cd', 'ef', '\\d+', '\\D', 'ghi', 'jkl', 'mno'], $context), $context), ['ab', 'cd', 'ef', '\\d+', '\\D', 'ghi', 'jkl', 'mno'], 'path(ab cd...) back');
is_deeply($rt->_unrev_node([0 => [0, 1]], $context), [1 => [1, 0]], 'node(0)');
is_deeply($rt->_unrev_node([0 => [0, 1], 2 => [2, 0]], $context), [1 => [1, 0], 0 => [0, 2]], 'node(0,2)');
is_deeply($rt->_unrev_node(['' => 0, 'a' => ['a', 'b']], $context), ['' => 0, 'b' => ['b', 'a']], 'node(*,a,b)');
is_deeply($rt->_unrev_node(['' => 0, 'a' => ['a', 'b'], 'b' => ['b', 'c', 'd', 'e', 'f', 'g']], $context), ['' => 0, 'b' => ['b', 'a'], 'g' => ['g', 'f', 'e', 'd', 'c', 'b']], 'node(*a,b2)');
is_deeply($rt->_unrev_path([['x' => ['x', '0'], '' => 0]], $context), [['0' => ['0', 'x'], '' => 0]], 'node(* 0)');
is_deeply($rt->_unrev_node(['ab' => ['ab', 'bc'], 'bc' => ['bc', 'cd', 'de', 'ef', 'fg', 'gh'], 'ef' => ['ef', 'gh', 'ij']], $context), ['bc' => ['bc', 'ab'], 'gh' => ['gh', 'fg', 'ef', 'de', 'cd', 'bc'], 'ij' => ['ij', 'gh', 'ef']], 'node(ab,bc,ef)');
is_deeply($rt->_unrev_node(['' => 0, 'b' => [[['b' => ['b'], 'b?' => [['' => 0, 'b' => ['b']], 'a']]], ['' => 0, 'c' => ['c']]]], $context), ['' => 0, 'c' => [['' => 0, 'c' => ['c']], [['a' => ['a', ['' => 0, 'b' => ['b']]], 'b' => ['b']]]]], 'node of (?:(?:ab?|b)c?)?');
is_deeply($rt->_unrev_path(['a', 'b', ['c' => ['c', 'd', 'e'], 'f' => ['f', 'g', 'h'], 'i' => ['i', 'j', ['k' => ['k', 'l', 'm'], 'n' => ['n', 'o', 'p']], 'x']]], $context), [['e' => ['e', 'd', 'c'], 'h' => ['h', 'g', 'f'], 'x' => ['x', ['m' => ['m', 'l', 'k'], 'p' => ['p', 'o', 'n']], 'j', 'i']], 'b', 'a'], 'path(node(path))');
//{
$r = new Regexp_Assemble();
$ra = $r->add('refused')->add('fused')->add('used');
$ra->_reduce();
ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['f', 'r']), '_lookahead refused/fused/used');
$ra->reset()->add('refused')->add('reamused')->add('fused')->add('amused')->add('used')->_reduce();
ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['a', 'f', 'r']), '_lookahead reamused/refused/amused/fused/used');
$ra->reset()->add('reran')->add('ran')->_reduce();
ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['r']), '_lookahead reran/ran');
$ra->reset()->add('cruised')->add('bruised')->add('hosed')->add('gazed')->add('used')->_reduce();
ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['b', 'c', 'g', 'h', 'u']), '_lookahead cruised/bruised/hosed/gazed/used');
//}
is($rt->_dump([1, 0, NULL]), '[1 0 *]', 'dump undef');
is($rt->_dump([1, 0, ' ']), "[1 0 ' ']", 'dump space');
is($rt->_dump(['a' => ['a', 'b'], 'b' => ['b']]), '{a=>[a b] b=>[b]}', 'dump node');
is($rt->_dump(['a', chr(7), 'b']), '[a \\x07 b]', 'dump pretty');
//保留
//is( $r->insert(' ')->insert(' ')->dump(),
Exemplo n.º 9
0
  )
  |m
  (?:
    inatur
    |ers
  )
  |
  (?:
    ligh
    |jec
  )
  t
  |e
  (?:
    rwe
    |d
  )
  |athwe
  |press
  |vil
)
ed', 'indent de.*ed');
//追加
$r = new Regexp_Assemble();
is($r->add('unimped', 'unimpeded', 'unimpelled')->as_string(), 'unimpe(?:(?:de)?|lle)d', 'unimped unimpeded unimpelled');
$r = new Regexp_Assemble();
is($r->add('tiao', 'tie', 'tien', 'tin', 'tine', 'tinea', 'tinean', 'tineine', 'tininess', 'tinnet', 'tinniness', 'tinosa', 'tinstone', 'tint', 'tinta', 'tintie', 'tintiness', 'tintist', 'tisane', 'tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie')->as_string(), 'ti(?:n(?:t(?:i(?:ness|st|e)|a)?|e(?:an?|ine)?|n(?:iness|et)|iness|stone|osa)?|t(?:an(?:i(?:te|a)|ate|o)|i(?:[ae]n)?|(?:ti)?e)?|sane|en?|ao)', 'tiao tie ....  titien tittie');
/*
is( $_, $fixed, '$_ has not been altered' );
*/
echo "===OK===\n";