// SKIP: { // skip( "match()/matched() return undef in this version of perl ($])", 8 ) if $PERL_VERSION_5_005; ok(!$re->match('foo'), 'not match pattern-6 foo'); $target = 'cat'; ok($re->match($target), "match pattern-6 {$target}"); is($re->matched, '^cat', "match pattern-6 {$target} re"); $target = 'CATFOOD'; ok($re->match($target), "match pattern-6 {$target}"); is($re->matched, '^cat', "match pattern-6 {$target} re"); $target = 'candle'; ok($re->match($target), "match pattern-6 {$target}"); is($re->matched, '^candle$', "match pattern-6 {$target} re"); $target = 'Candlestick'; ok(!$re->match($target), "match pattern-6 {$target}"); // } $re = new Regexp_Assemble(['track' => 1]); $re->add('^ab-(\\d+)-(\\d+)')->add('^ac-(\\d+)')->add('^nothing')->add('^ad-((\\d+)-(\\d+))'); // SKIP: { // skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 15 ) if $PERL_VERSION_5_005; ok(!$re->capture(), 'match p7 no prior capture'); ok($re->match('nothing captured'), 'match p7-1'); is(count($re->capture()), 0, 'match p7-1 no capture'); ok($re->match('ac-417 captured'), 'match p7-2'); $capture = $re->capture(); is(count($capture), 1, 'match p7-2 capture'); is($capture[0], 417, "match p7-2 value 0 ok"); ok($re->match('ab-21-17 captured'), 'match p7-3'); $capture = $re->capture(); is(count($capture), 2, 'match p7-3 capture'); is($capture[0], 21, "match p7-3 value 0 ok"); is($capture[1], 17, "match p7-3 value 1 ok");
function _do_reduce($path, $ctx) { // my $indent = ' ' x $ctx->{depth}; $indent = str_repeat(' ', $ctx['depth']); // my $debug = $ctx->{debug}; $debug = $ctx['debug']; // my $ra = Regexp::Assemble->new(chomp=>0); $ra = new Regexp_Assemble(array('chomp' => 0)); // $ra->debug($debug); $ra->__debug = $debug; //$ra->__debug = 255; // $debug and print "# $indent| do @{[_dump($path)]}\n"; if ($debug) { echo "# {$indent}| do " . $this->_dump($path) . "\n"; } // $ra->_insertr( $_ ) for // # When nodes come into the picture, we have to be careful // # about how we insert the paths into the assembly. // # Paths with nodes first, then closest node to front // # then shortest path. Merely because if we can control // # order in which paths containing nodes get inserted, // # then we can make a couple of assumptions that simplify // # the code in _insert_node. // sort { // scalar(grep {ref($_) eq 'HASH'} @$a) // <=> scalar(grep {ref($_) eq 'HASH'} @$b) // || // _node_offset($b) <=> _node_offset($a) // || // scalar @$a <=> scalar @$b // } // @$path // ; $self = $this; //php5.4からは thisは暗黙なのだが・・ php5.3で動かしたいので、一度代入して自分で束縛する. $_temp_path = $this->_perl_sort(function ($a, $b) use($self) { $scalar_count_a = count($self->_perl_grep(function ($_) { return is_array($_); }, $a)); $scalar_count_b = count($self->_perl_grep(function ($_) { return is_array($_); }, $b)); if ($scalar_count_a > $scalar_count_b) { return 1; } else { if ($scalar_count_a < $scalar_count_b) { return -1; } } $temp_b = $self->_node_offset($b); $temp_a = $self->_node_offset($a); if ($temp_b > $temp_a) { return 1; } else { if ($temp_b < $temp_a) { return -1; } } $temp_a = count($a); $temp_b = count($b); if ($temp_a > $temp_b) { return 1; } else { if ($temp_a < $temp_b) { return -1; } } //挙動を見ていると辞書順な気がする・・・ return strcmp(is_array($a) ? json_encode($a) : $a, is_array($b) ? json_encode($b) : $b); // return 0; }, $path); foreach ($_temp_path as $_) { $ra->_insertr($_); } // $path = $ra->_path; $path = $ra->__path; // my $common = []; $common = array(); // push @$common, shift @$path while( ref($path->[0]) ne 'HASH' ); //これでいいんかな? while (count($path) > 0 && !is_array($path[0])) { // $common = $this->_perl_push2($common, array_shift($path)); $_temp_node = array_shift($path); if (is_array($_temp_node)) { $common = array_merge($common, $_temp_node); } else { $common[] = $_temp_node; } } // my $tail = scalar( @$path ) > 1 ? [@$path] : $path->[0]; $tail = count($path) > 1 ? $path : $path[0]; // $debug and print "# $indent| _do_reduce common={[_dump($common)]} tail=@{[_dump($tail)]}\n"; if ($debug) { echo "# {$indent}| _do_reduce common=" . $this->_dump($common) . " tail=" . $this->_dump($tail) . "\n"; } // return ($common, $tail); return array($common, $tail); //} }
function permute($target, $path) { foreach (range(0, 4) as $x1) { foreach (range(0, 4) as $x2) { if ($x2 == $x1) { continue; } foreach (range(0, 4) as $x3) { if (count(array_filter([$x1, $x2], function ($_) use($x3) { return $_ == $x3; }))) { continue; } foreach (range(0, 4) as $x4) { if (count(array_filter([$x1, $x2, $x3], function ($_) use($x4) { return $_ == $x4; }))) { continue; } foreach (range(0, 4) as $x5) { if (count(array_filter([$x1, $x2, $x3, $x4], function ($_) use($x5) { return $_ == $x5; }))) { continue; } $ra = new Regexp_Assemble(); $ra->insert($path[$x1])->insert($path[$x2])->insert($path[$x3])->insert($path[$x4])->insert($path[$x5]); is_deeply($ra->__path, $target, 'join: /' . join('/ /', array(join('', $path[$x1]), join('', $path[$x2]), join('', $path[$x3]), join('', $path[$x4]), join('', $path[$x5]))) . '/\\n' . $ra->dump() . ' versus ' . $ra->_dump($target) . "\n"); } } } } } }
$ra = new Regexp_Assemble(); foreach ($list as $p) { $ra->insert(str_split($p)); } $ra->_reduce(); is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list)); $list = ['tough', 'though', 'trough', 'through', 'thorough']; $ra = new Regexp_Assemble(); foreach ($list as $p) { $ra->insert(str_split($p)); } $ra->_reduce(); is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['o' => [['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list)); $list = ['tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie']; $ra = new Regexp_Assemble(); foreach ($list as $p) { $ra->insert(str_split($p)); } $ra->_reduce(); is_deeply($ra->__path, ['t', 'i', 't', ['' => 0, 'a' => ['a', 'n', ['a' => ['a', 't', 'e'], 'i' => ['i', ['a' => ['a'], 't' => ['t', 'e']]], 'o' => ['o']]], 'i' => ['i', ['' => 0, 'a' => [['e' => ['e'], 'a' => ['a']], 'n']]], 't' => [['' => 0, 't' => ['t', 'i']], 'e']]], join(' ', $list)); $ra = new Regexp_Assemble(); $ra->add('dasin'); $ra->add('dosin'); $ra->add('dastin'); $ra->add('dostin'); $ra->_reduce(); is_deeply($ra->__path, ['d', ['a' => ['a'], 'o' => ['o']], 's', ['' => 0, 't' => ['t']], 'i', 'n'], 'dasin/dosin/dastin/dosting'); /* is( $_, $fixed, '$_ has not been altered' ); */ echo "===OK===\n";
<?php require_once "Regexp_Assemble.php"; for ($i = 0; $i < 10000; ++$i) { $reg = new Regexp_Assemble(); $reg->add('神岸あかり'); $reg->add('赤座あかり'); $reg->add('黒座あかり'); $str = $reg->re(); $reg = new Regexp_Assemble(); $reg->add('スティーブ・ジョブズ'); $reg->add('スティーブ・ウォズアニック'); $str = $reg->re(); $reg = new Regexp_Assemble(); $reg->add('お兄ちゃま'); $reg->add('あにぃ'); $reg->add('お兄様'); $reg->add('おにいたま'); $reg->add('兄上様'); $reg->add('にいさま'); $reg->add('アニキ'); $reg->add('兄くん'); $reg->add('兄君さま'); $reg->add('兄チャマ'); $reg->add('兄や'); $str = $reg->re(); }
function match_list($tag, $patt, $test) { $re = new Regexp_Assemble(); $re->add($patt); $rela = new Regexp_Assemble(); $rela->lookahead(1)->add($patt); foreach ($test as $str) { like($str, '/^' . $re . '$/', "re {$tag}: {$str}"); like($str, '/^' . $rela . '$/', "rela {$tag}: {$str}"); } }
$_ = rtrim($_); if ($_ === '') { continue; } $list[] = $_; } if (count($list) <= 0) { $out['wordlist'] = "内容がないよう\nここに改行区切りで単語を入れよう。\n"; $out['regexp'] = ''; $out['code'] = ''; $out['bad'] = false; $out = hh($out); include "index.tpl"; } //正規表現生成 $assemble = new Regexp_Assemble(); foreach ($list as $_) { $assemble->add(_perl_quotemeta($_)); } $out['regexp'] = $assemble->as_string(); //念のためマッチテストする $bad = false; foreach ($list as $_) { if (!preg_match('/^' . $out['regexp'] . '$/u', $_)) { $bad = true; } } mylog("入力単語:" . join("\t", $list) . "\t出力正規表現:" . $out['regexp'] . "\t判定:{$bad}\r\n"); $out['code'] = 'require_once(\'Regexp_Assemble.php\'); //正規表現自動生成モジュール' . "\n"; $out['code'] .= "\n"; $out['code'] .= "\n";
$ptr = $s; // $ptr =~ s/\\/\\\\/; $ptr = preg_replace('/\\\\/', '/\\\\\\\\/', $ptr); // $ptr =~ s/\n/\\n/; $ptr = preg_replace('/\\n/', '/\\\\n/', $ptr); // my $bug_success = ($s =~ /\n/) ? 0 : 1; $bug_success = preg_match("/\n/", $s) ? 0 : 1; $bug_fail = 1 - $bug_success; ////保留 // is( preg_match("/$re/" , $s ) ? $bug_success : $bug_fail, $ok, // "Folded meta pairs behave as list for \\$meta ($ptr,ok=$ok/$bug_success/$bug_fail)" // ); is(preg_match("/{$re_fold}/", $s) ? 1 : 0, $ok, "Unfolded meta pairs behave as list for \\{$meta} ({$ptr},ok={$ok})"); } } $u = new Regexp_Assemble(['unroll_plus' => 1]); $u->add("a+b", 'ac'); $str = $u->as_string(); is($str, 'a(?:a*b|c)', 'unroll plus a+b ac'); $u->add("\\LA+B", "ac"); $str = $u->as_string(); is($str, 'a(?:a*b|c)', 'unroll plus \\LA+B ac'); $u->add("\\Ua+?b", "AC"); $str = $u->as_string(); is($str, 'A(?:A*?B|C)', 'unroll plus \\Ua+?b AC'); $u->add('\\d+d', '\\de', '\\w+?x', '\\wy'); $str = $u->as_string(); is($str, '(?:\\w(?:\\w*?x|y)|\\d(?:\\d*d|e))', 'unroll plus \\d and \\w'); // //PHPではUTF-8以外のマルチバイトは扱えません。 // $u->add( '\\xab+f', '\\xabg', '\\xcd+?h', '\\xcdi');
function lcmp($a, $b) { $r = new Regexp_Assemble(); is_deeply($r->_lex($a), array($a), "_lex {$a} source line {$b}"); }
) |m (?: inatur |ers ) | (?: ligh |jec ) t |e (?: rwe |d ) |athwe |press |vil ) ed', 'indent de.*ed'); //追加 $r = new Regexp_Assemble(); is($r->add('unimped', 'unimpeded', 'unimpelled')->as_string(), 'unimpe(?:(?:de)?|lle)d', 'unimped unimpeded unimpelled'); $r = new Regexp_Assemble(); is($r->add('tiao', 'tie', 'tien', 'tin', 'tine', 'tinea', 'tinean', 'tineine', 'tininess', 'tinnet', 'tinniness', 'tinosa', 'tinstone', 'tint', 'tinta', 'tintie', 'tintiness', 'tintist', 'tisane', 'tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie')->as_string(), 'ti(?:n(?:t(?:i(?:ness|st|e)|a)?|e(?:an?|ine)?|n(?:iness|et)|iness|stone|osa)?|t(?:an(?:i(?:te|a)|ate|o)|i(?:[ae]n)?|(?:ti)?e)?|sane|en?|ao)', 'tiao tie .... titien tittie'); /* is( $_, $fixed, '$_ has not been altered' ); */ echo "===OK===\n";