//    SKIP: {
//           skip( "match()/matched() return undef in this version of perl ($])", 8 ) if $PERL_VERSION_5_005;
ok(!$re->match('foo'), 'not match pattern-6 foo');
$target = 'cat';
ok($re->match($target), "match pattern-6 {$target}");
is($re->matched, '^cat', "match pattern-6 {$target} re");
$target = 'CATFOOD';
ok($re->match($target), "match pattern-6 {$target}");
is($re->matched, '^cat', "match pattern-6 {$target} re");
$target = 'candle';
ok($re->match($target), "match pattern-6 {$target}");
is($re->matched, '^candle$', "match pattern-6 {$target} re");
$target = 'Candlestick';
ok(!$re->match($target), "match pattern-6 {$target}");
//    }
$re = new Regexp_Assemble(['track' => 1]);
$re->add('^ab-(\\d+)-(\\d+)')->add('^ac-(\\d+)')->add('^nothing')->add('^ad-((\\d+)-(\\d+))');
//    SKIP: {
//        skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 15 ) if $PERL_VERSION_5_005;
ok(!$re->capture(), 'match p7 no prior capture');
ok($re->match('nothing captured'), 'match p7-1');
is(count($re->capture()), 0, 'match p7-1 no capture');
ok($re->match('ac-417 captured'), 'match p7-2');
$capture = $re->capture();
is(count($capture), 1, 'match p7-2 capture');
is($capture[0], 417, "match p7-2 value 0 ok");
ok($re->match('ab-21-17 captured'), 'match p7-3');
$capture = $re->capture();
is(count($capture), 2, 'match p7-3 capture');
is($capture[0], 21, "match p7-3 value 0 ok");
is($capture[1], 17, "match p7-3 value 1 ok");
 function _do_reduce($path, $ctx)
 {
     //    my $indent = ' ' x $ctx->{depth};
     $indent = str_repeat(' ', $ctx['depth']);
     //    my $debug  =       $ctx->{debug};
     $debug = $ctx['debug'];
     //    my $ra = Regexp::Assemble->new(chomp=>0);
     $ra = new Regexp_Assemble(array('chomp' => 0));
     //    $ra->debug($debug);
     $ra->__debug = $debug;
     //$ra->__debug = 255;
     //    $debug and print "# $indent| do @{[_dump($path)]}\n";
     if ($debug) {
         echo "# {$indent}| do " . $this->_dump($path) . "\n";
     }
     //    $ra->_insertr( $_ ) for
     //        # When nodes come into the picture, we have to be careful
     //        # about how we insert the paths into the assembly.
     //        # Paths with nodes first, then closest node to front
     //        # then shortest path. Merely because if we can control
     //        # order in which paths containing nodes get inserted,
     //        # then we can make a couple of assumptions that simplify
     //        # the code in _insert_node.
     //        sort {
     //            scalar(grep {ref($_) eq 'HASH'} @$a)
     //            <=> scalar(grep {ref($_) eq 'HASH'} @$b)
     //                ||
     //            _node_offset($b) <=> _node_offset($a)
     //                ||
     //            scalar @$a <=> scalar @$b
     //        }
     //        @$path
     //    ;
     $self = $this;
     //php5.4からは thisは暗黙なのだが・・ php5.3で動かしたいので、一度代入して自分で束縛する.
     $_temp_path = $this->_perl_sort(function ($a, $b) use($self) {
         $scalar_count_a = count($self->_perl_grep(function ($_) {
             return is_array($_);
         }, $a));
         $scalar_count_b = count($self->_perl_grep(function ($_) {
             return is_array($_);
         }, $b));
         if ($scalar_count_a > $scalar_count_b) {
             return 1;
         } else {
             if ($scalar_count_a < $scalar_count_b) {
                 return -1;
             }
         }
         $temp_b = $self->_node_offset($b);
         $temp_a = $self->_node_offset($a);
         if ($temp_b > $temp_a) {
             return 1;
         } else {
             if ($temp_b < $temp_a) {
                 return -1;
             }
         }
         $temp_a = count($a);
         $temp_b = count($b);
         if ($temp_a > $temp_b) {
             return 1;
         } else {
             if ($temp_a < $temp_b) {
                 return -1;
             }
         }
         //挙動を見ていると辞書順な気がする・・・
         return strcmp(is_array($a) ? json_encode($a) : $a, is_array($b) ? json_encode($b) : $b);
         //            return 0;
     }, $path);
     foreach ($_temp_path as $_) {
         $ra->_insertr($_);
     }
     //    $path = $ra->_path;
     $path = $ra->__path;
     //    my $common = [];
     $common = array();
     //    push @$common, shift @$path while( ref($path->[0]) ne 'HASH' );
     //これでいいんかな?
     while (count($path) > 0 && !is_array($path[0])) {
         //        $common = $this->_perl_push2($common, array_shift($path));
         $_temp_node = array_shift($path);
         if (is_array($_temp_node)) {
             $common = array_merge($common, $_temp_node);
         } else {
             $common[] = $_temp_node;
         }
     }
     //    my $tail = scalar( @$path ) > 1 ? [@$path] : $path->[0];
     $tail = count($path) > 1 ? $path : $path[0];
     //    $debug and print "# $indent| _do_reduce common={[_dump($common)]} tail=@{[_dump($tail)]}\n";
     if ($debug) {
         echo "# {$indent}| _do_reduce common=" . $this->_dump($common) . " tail=" . $this->_dump($tail) . "\n";
     }
     //    return ($common, $tail);
     return array($common, $tail);
     //}
 }
function permute($target, $path)
{
    foreach (range(0, 4) as $x1) {
        foreach (range(0, 4) as $x2) {
            if ($x2 == $x1) {
                continue;
            }
            foreach (range(0, 4) as $x3) {
                if (count(array_filter([$x1, $x2], function ($_) use($x3) {
                    return $_ == $x3;
                }))) {
                    continue;
                }
                foreach (range(0, 4) as $x4) {
                    if (count(array_filter([$x1, $x2, $x3], function ($_) use($x4) {
                        return $_ == $x4;
                    }))) {
                        continue;
                    }
                    foreach (range(0, 4) as $x5) {
                        if (count(array_filter([$x1, $x2, $x3, $x4], function ($_) use($x5) {
                            return $_ == $x5;
                        }))) {
                            continue;
                        }
                        $ra = new Regexp_Assemble();
                        $ra->insert($path[$x1])->insert($path[$x2])->insert($path[$x3])->insert($path[$x4])->insert($path[$x5]);
                        is_deeply($ra->__path, $target, 'join: /' . join('/ /', array(join('', $path[$x1]), join('', $path[$x2]), join('', $path[$x3]), join('', $path[$x4]), join('', $path[$x5]))) . '/\\n' . $ra->dump() . ' versus ' . $ra->_dump($target) . "\n");
                    }
                }
            }
        }
    }
}
$ra = new Regexp_Assemble();
foreach ($list as $p) {
    $ra->insert(str_split($p));
}
$ra->_reduce();
is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list));
$list = ['tough', 'though', 'trough', 'through', 'thorough'];
$ra = new Regexp_Assemble();
foreach ($list as $p) {
    $ra->insert(str_split($p));
}
$ra->_reduce();
is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['o' => [['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list));
$list = ['tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie'];
$ra = new Regexp_Assemble();
foreach ($list as $p) {
    $ra->insert(str_split($p));
}
$ra->_reduce();
is_deeply($ra->__path, ['t', 'i', 't', ['' => 0, 'a' => ['a', 'n', ['a' => ['a', 't', 'e'], 'i' => ['i', ['a' => ['a'], 't' => ['t', 'e']]], 'o' => ['o']]], 'i' => ['i', ['' => 0, 'a' => [['e' => ['e'], 'a' => ['a']], 'n']]], 't' => [['' => 0, 't' => ['t', 'i']], 'e']]], join(' ', $list));
$ra = new Regexp_Assemble();
$ra->add('dasin');
$ra->add('dosin');
$ra->add('dastin');
$ra->add('dostin');
$ra->_reduce();
is_deeply($ra->__path, ['d', ['a' => ['a'], 'o' => ['o']], 's', ['' => 0, 't' => ['t']], 'i', 'n'], 'dasin/dosin/dastin/dosting');
/*
is( $_, $fixed, '$_ has not been altered' );
*/
echo "===OK===\n";
<?php

require_once "Regexp_Assemble.php";
for ($i = 0; $i < 10000; ++$i) {
    $reg = new Regexp_Assemble();
    $reg->add('神岸あかり');
    $reg->add('赤座あかり');
    $reg->add('黒座あかり');
    $str = $reg->re();
    $reg = new Regexp_Assemble();
    $reg->add('スティーブ・ジョブズ');
    $reg->add('スティーブ・ウォズアニック');
    $str = $reg->re();
    $reg = new Regexp_Assemble();
    $reg->add('お兄ちゃま');
    $reg->add('あにぃ');
    $reg->add('お兄様');
    $reg->add('おにいたま');
    $reg->add('兄上様');
    $reg->add('にいさま');
    $reg->add('アニキ');
    $reg->add('兄くん');
    $reg->add('兄君さま');
    $reg->add('兄チャマ');
    $reg->add('兄や');
    $str = $reg->re();
}
function match_list($tag, $patt, $test)
{
    $re = new Regexp_Assemble();
    $re->add($patt);
    $rela = new Regexp_Assemble();
    $rela->lookahead(1)->add($patt);
    foreach ($test as $str) {
        like($str, '/^' . $re . '$/', "re {$tag}: {$str}");
        like($str, '/^' . $rela . '$/', "rela {$tag}: {$str}");
    }
}
Example #7
0
     $_ = rtrim($_);
     if ($_ === '') {
         continue;
     }
     $list[] = $_;
 }
 if (count($list) <= 0) {
     $out['wordlist'] = "内容がないよう\nここに改行区切りで単語を入れよう。\n";
     $out['regexp'] = '';
     $out['code'] = '';
     $out['bad'] = false;
     $out = hh($out);
     include "index.tpl";
 }
 //正規表現生成
 $assemble = new Regexp_Assemble();
 foreach ($list as $_) {
     $assemble->add(_perl_quotemeta($_));
 }
 $out['regexp'] = $assemble->as_string();
 //念のためマッチテストする
 $bad = false;
 foreach ($list as $_) {
     if (!preg_match('/^' . $out['regexp'] . '$/u', $_)) {
         $bad = true;
     }
 }
 mylog("入力単語:" . join("\t", $list) . "\t出力正規表現:" . $out['regexp'] . "\t判定:{$bad}\r\n");
 $out['code'] = 'require_once(\'Regexp_Assemble.php\');  //正規表現自動生成モジュール' . "\n";
 $out['code'] .= "\n";
 $out['code'] .= "\n";
        $ptr = $s;
        //            $ptr =~ s/\\/\\\\/;
        $ptr = preg_replace('/\\\\/', '/\\\\\\\\/', $ptr);
        //            $ptr =~ s/\n/\\n/;
        $ptr = preg_replace('/\\n/', '/\\\\n/', $ptr);
        //            my $bug_success = ($s =~ /\n/) ? 0 : 1;
        $bug_success = preg_match("/\n/", $s) ? 0 : 1;
        $bug_fail = 1 - $bug_success;
        ////保留
        //            is( preg_match("/$re/" , $s ) ? $bug_success : $bug_fail, $ok,
        //                "Folded meta pairs behave as list for \\$meta ($ptr,ok=$ok/$bug_success/$bug_fail)"
        //	            );
        is(preg_match("/{$re_fold}/", $s) ? 1 : 0, $ok, "Unfolded meta pairs behave as list for \\{$meta} ({$ptr},ok={$ok})");
    }
}
$u = new Regexp_Assemble(['unroll_plus' => 1]);
$u->add("a+b", 'ac');
$str = $u->as_string();
is($str, 'a(?:a*b|c)', 'unroll plus a+b ac');
$u->add("\\LA+B", "ac");
$str = $u->as_string();
is($str, 'a(?:a*b|c)', 'unroll plus \\LA+B ac');
$u->add("\\Ua+?b", "AC");
$str = $u->as_string();
is($str, 'A(?:A*?B|C)', 'unroll plus \\Ua+?b AC');
$u->add('\\d+d', '\\de', '\\w+?x', '\\wy');
$str = $u->as_string();
is($str, '(?:\\w(?:\\w*?x|y)|\\d(?:\\d*d|e))', 'unroll plus \\d and \\w');
//
//PHPではUTF-8以外のマルチバイトは扱えません。
//    $u->add( '\\xab+f', '\\xabg', '\\xcd+?h', '\\xcdi');
function lcmp($a, $b)
{
    $r = new Regexp_Assemble();
    is_deeply($r->_lex($a), array($a), "_lex {$a} source line {$b}");
}
  )
  |m
  (?:
    inatur
    |ers
  )
  |
  (?:
    ligh
    |jec
  )
  t
  |e
  (?:
    rwe
    |d
  )
  |athwe
  |press
  |vil
)
ed', 'indent de.*ed');
//追加
$r = new Regexp_Assemble();
is($r->add('unimped', 'unimpeded', 'unimpelled')->as_string(), 'unimpe(?:(?:de)?|lle)d', 'unimped unimpeded unimpelled');
$r = new Regexp_Assemble();
is($r->add('tiao', 'tie', 'tien', 'tin', 'tine', 'tinea', 'tinean', 'tineine', 'tininess', 'tinnet', 'tinniness', 'tinosa', 'tinstone', 'tint', 'tinta', 'tintie', 'tintiness', 'tintist', 'tisane', 'tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie')->as_string(), 'ti(?:n(?:t(?:i(?:ness|st|e)|a)?|e(?:an?|ine)?|n(?:iness|et)|iness|stone|osa)?|t(?:an(?:i(?:te|a)|ate|o)|i(?:[ae]n)?|(?:ti)?e)?|sane|en?|ao)', 'tiao tie ....  titien tittie');
/*
is( $_, $fixed, '$_ has not been altered' );
*/
echo "===OK===\n";