//} //{ $r = new Regexp_Assemble(); $r->__lex = '\\([^(]*(?:\\([^)]*\\))?[^)]*\\)|.'; $r->reset()->add('ab(cd)ef'); is_deeply($r->__path, ['a', 'b', '(cd)', 'e', 'f'], 'ab(cd)ef (with parenthetical lexer)'); $r->reset()->add('ab(cd(ef)gh)ij'); is_deeply($r->__path, ['a', 'b', '(cd(ef)gh)', 'i', 'j'], 'ab(cd(ef)gh)ij (with parenthetical lexer)'); $r->reset()->add('ab((ef)gh)ij'); is_deeply($r->__path, ['a', 'b', '((ef)gh)', 'i', 'j'], 'ab((ef)gh)ij (with parenthetical lexer)'); $r->reset()->add('ab(cd(ef))ij'); is_deeply($r->__path, ['a', 'b', '(cd(ef))', 'i', 'j'], 'ab(cd(ef))ij (with parenthetical lexer)'); $r->reset()->add('ab((ef))ij'); is_deeply($r->__path, ['a', 'b', '((ef))', 'i', 'j'], 'ab((ef))ij (with parenthetical lexer)'); $r = new Regexp_Assemble(['lex' => '\\d']); is_deeply($r->add('0\\Q0C,+')->__path, ['0', '0', 'C', ',', '\\+'], '0\\Q0C,+ with \\d lexer'); //{ $ra = new Regexp_Assemble(); $ra->insert(['d', 'a', 'b']); is_deeply($ra->__path, ['d', 'a', 'b'], '/dab/'); //} //{ $ra = new Regexp_Assemble(); $ra->insert(['0', '1']); $ra->insert(['0', '2']); is_deeply($ra->__path, ['0', ['1' => ['1'], '2' => ['2']]], '/01/ /02/'); //} //{ $ra = new Regexp_Assemble(); $ra->insert(['0']); $ra->insert(['0', '1']);
// skip( "match()/matched() return undef in this version of perl ($])", 8 ) if $PERL_VERSION_5_005; ok(!$re->match('foo'), 'not match pattern-6 foo'); $target = 'cat'; ok($re->match($target), "match pattern-6 {$target}"); is($re->matched, '^cat', "match pattern-6 {$target} re"); $target = 'CATFOOD'; ok($re->match($target), "match pattern-6 {$target}"); is($re->matched, '^cat', "match pattern-6 {$target} re"); $target = 'candle'; ok($re->match($target), "match pattern-6 {$target}"); is($re->matched, '^candle$', "match pattern-6 {$target} re"); $target = 'Candlestick'; ok(!$re->match($target), "match pattern-6 {$target}"); // } $re = new Regexp_Assemble(['track' => 1]); $re->add('^ab-(\\d+)-(\\d+)')->add('^ac-(\\d+)')->add('^nothing')->add('^ad-((\\d+)-(\\d+))'); // SKIP: { // skip( "/?{...}/ and \\d+ cause a panic in this version of perl ($])", 15 ) if $PERL_VERSION_5_005; ok(!$re->capture(), 'match p7 no prior capture'); ok($re->match('nothing captured'), 'match p7-1'); is(count($re->capture()), 0, 'match p7-1 no capture'); ok($re->match('ac-417 captured'), 'match p7-2'); $capture = $re->capture(); is(count($capture), 1, 'match p7-2 capture'); is($capture[0], 417, "match p7-2 value 0 ok"); ok($re->match('ab-21-17 captured'), 'match p7-3'); $capture = $re->capture(); is(count($capture), 2, 'match p7-3 capture'); is($capture[0], 21, "match p7-3 value 0 ok"); is($capture[1], 17, "match p7-3 value 1 ok"); ok($re->match('ad-808-245 captured'), 'match p7-4');
$ra = new Regexp_Assemble(); foreach ($list as $p) { $ra->insert(str_split($p)); } $ra->_reduce(); is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list)); $list = ['tough', 'though', 'trough', 'through', 'thorough']; $ra = new Regexp_Assemble(); foreach ($list as $p) { $ra->insert(str_split($p)); } $ra->_reduce(); is_deeply($ra->__path, ['t', ['' => 0, 'h' => ['h', ['o' => [['' => 0, 'o' => ['o', 'r']]], 'r' => ['r']]], 'r' => ['r']], 'o', 'u', 'g', 'h'], join(' ', $list)); $list = ['tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie']; $ra = new Regexp_Assemble(); foreach ($list as $p) { $ra->insert(str_split($p)); } $ra->_reduce(); is_deeply($ra->__path, ['t', 'i', 't', ['' => 0, 'a' => ['a', 'n', ['a' => ['a', 't', 'e'], 'i' => ['i', ['a' => ['a'], 't' => ['t', 'e']]], 'o' => ['o']]], 'i' => ['i', ['' => 0, 'a' => [['e' => ['e'], 'a' => ['a']], 'n']]], 't' => [['' => 0, 't' => ['t', 'i']], 'e']]], join(' ', $list)); $ra = new Regexp_Assemble(); $ra->add('dasin'); $ra->add('dosin'); $ra->add('dastin'); $ra->add('dostin'); $ra->_reduce(); is_deeply($ra->__path, ['d', ['a' => ['a'], 'o' => ['o']], 's', ['' => 0, 't' => ['t']], 'i', 'n'], 'dasin/dosin/dastin/dosting'); /* is( $_, $fixed, '$_ has not been altered' ); */ echo "===OK===\n";
<?php require_once "Regexp_Assemble.php"; for ($i = 0; $i < 10000; ++$i) { $reg = new Regexp_Assemble(); $reg->add('神岸あかり'); $reg->add('赤座あかり'); $reg->add('黒座あかり'); $str = $reg->re(); $reg = new Regexp_Assemble(); $reg->add('スティーブ・ジョブズ'); $reg->add('スティーブ・ウォズアニック'); $str = $reg->re(); $reg = new Regexp_Assemble(); $reg->add('お兄ちゃま'); $reg->add('あにぃ'); $reg->add('お兄様'); $reg->add('おにいたま'); $reg->add('兄上様'); $reg->add('にいさま'); $reg->add('アニキ'); $reg->add('兄くん'); $reg->add('兄君さま'); $reg->add('兄チャマ'); $reg->add('兄や'); $str = $reg->re(); }
function match_list($tag, $patt, $test) { $re = new Regexp_Assemble(); $re->add($patt); $rela = new Regexp_Assemble(); $rela->lookahead(1)->add($patt); foreach ($test as $str) { like($str, '/^' . $re . '$/', "re {$tag}: {$str}"); like($str, '/^' . $rela . '$/', "rela {$tag}: {$str}"); } }
continue; } $list[] = $_; } if (count($list) <= 0) { $out['wordlist'] = "内容がないよう\nここに改行区切りで単語を入れよう。\n"; $out['regexp'] = ''; $out['code'] = ''; $out['bad'] = false; $out = hh($out); include "index.tpl"; } //正規表現生成 $assemble = new Regexp_Assemble(); foreach ($list as $_) { $assemble->add(_perl_quotemeta($_)); } $out['regexp'] = $assemble->as_string(); //念のためマッチテストする $bad = false; foreach ($list as $_) { if (!preg_match('/^' . $out['regexp'] . '$/u', $_)) { $bad = true; } } mylog("入力単語:" . join("\t", $list) . "\t出力正規表現:" . $out['regexp'] . "\t判定:{$bad}\r\n"); $out['code'] = 'require_once(\'Regexp_Assemble.php\'); //正規表現自動生成モジュール' . "\n"; $out['code'] .= "\n"; $out['code'] .= "\n"; $out['code'] .= '$assemble = new Regexp_Assemble(); //オブジェクトを作って・・・' . "\n"; $out['code'] .= "\n";
// $ptr =~ s/\\/\\\\/; $ptr = preg_replace('/\\\\/', '/\\\\\\\\/', $ptr); // $ptr =~ s/\n/\\n/; $ptr = preg_replace('/\\n/', '/\\\\n/', $ptr); // my $bug_success = ($s =~ /\n/) ? 0 : 1; $bug_success = preg_match("/\n/", $s) ? 0 : 1; $bug_fail = 1 - $bug_success; ////保留 // is( preg_match("/$re/" , $s ) ? $bug_success : $bug_fail, $ok, // "Folded meta pairs behave as list for \\$meta ($ptr,ok=$ok/$bug_success/$bug_fail)" // ); is(preg_match("/{$re_fold}/", $s) ? 1 : 0, $ok, "Unfolded meta pairs behave as list for \\{$meta} ({$ptr},ok={$ok})"); } } $u = new Regexp_Assemble(['unroll_plus' => 1]); $u->add("a+b", 'ac'); $str = $u->as_string(); is($str, 'a(?:a*b|c)', 'unroll plus a+b ac'); $u->add("\\LA+B", "ac"); $str = $u->as_string(); is($str, 'a(?:a*b|c)', 'unroll plus \\LA+B ac'); $u->add("\\Ua+?b", "AC"); $str = $u->as_string(); is($str, 'A(?:A*?B|C)', 'unroll plus \\Ua+?b AC'); $u->add('\\d+d', '\\de', '\\w+?x', '\\wy'); $str = $u->as_string(); is($str, '(?:\\w(?:\\w*?x|y)|\\d(?:\\d*d|e))', 'unroll plus \\d and \\w'); // //PHPではUTF-8以外のマルチバイトは扱えません。 // $u->add( '\\xab+f', '\\xabg', '\\xcd+?h', '\\xcdi'); // $str = $u->as_string();
is_deeply($rt->_unrev_path([0, 1], $context), [1, 0], 'path(0,1)'); is_deeply($rt->_unrev_path(['ab', 'cd', 'ef'], $context), ['ef', 'cd', 'ab'], 'path(ab,cd,ef)'); is_deeply($rt->_unrev_path($rt->_unrev_path([['ab', 'cd', 'ef']], $context), $context), [['ab', 'cd', 'ef']], 'path(ab,cd,ef) back'); is_deeply($rt->_unrev_path(['ab', 'cd', 'ef', '\\d+', '\\D', 'ghi', 'jkl', 'mno'], $context), ['mno', 'jkl', 'ghi', '\\D', '\\d+', 'ef', 'cd', 'ab'], 'path(ab cd...)'); is_deeply($rt->_unrev_path($rt->_unrev_path(['ab', 'cd', 'ef', '\\d+', '\\D', 'ghi', 'jkl', 'mno'], $context), $context), ['ab', 'cd', 'ef', '\\d+', '\\D', 'ghi', 'jkl', 'mno'], 'path(ab cd...) back'); is_deeply($rt->_unrev_node([0 => [0, 1]], $context), [1 => [1, 0]], 'node(0)'); is_deeply($rt->_unrev_node([0 => [0, 1], 2 => [2, 0]], $context), [1 => [1, 0], 0 => [0, 2]], 'node(0,2)'); is_deeply($rt->_unrev_node(['' => 0, 'a' => ['a', 'b']], $context), ['' => 0, 'b' => ['b', 'a']], 'node(*,a,b)'); is_deeply($rt->_unrev_node(['' => 0, 'a' => ['a', 'b'], 'b' => ['b', 'c', 'd', 'e', 'f', 'g']], $context), ['' => 0, 'b' => ['b', 'a'], 'g' => ['g', 'f', 'e', 'd', 'c', 'b']], 'node(*a,b2)'); is_deeply($rt->_unrev_path([['x' => ['x', '0'], '' => 0]], $context), [['0' => ['0', 'x'], '' => 0]], 'node(* 0)'); is_deeply($rt->_unrev_node(['ab' => ['ab', 'bc'], 'bc' => ['bc', 'cd', 'de', 'ef', 'fg', 'gh'], 'ef' => ['ef', 'gh', 'ij']], $context), ['bc' => ['bc', 'ab'], 'gh' => ['gh', 'fg', 'ef', 'de', 'cd', 'bc'], 'ij' => ['ij', 'gh', 'ef']], 'node(ab,bc,ef)'); is_deeply($rt->_unrev_node(['' => 0, 'b' => [[['b' => ['b'], 'b?' => [['' => 0, 'b' => ['b']], 'a']]], ['' => 0, 'c' => ['c']]]], $context), ['' => 0, 'c' => [['' => 0, 'c' => ['c']], [['a' => ['a', ['' => 0, 'b' => ['b']]], 'b' => ['b']]]]], 'node of (?:(?:ab?|b)c?)?'); is_deeply($rt->_unrev_path(['a', 'b', ['c' => ['c', 'd', 'e'], 'f' => ['f', 'g', 'h'], 'i' => ['i', 'j', ['k' => ['k', 'l', 'm'], 'n' => ['n', 'o', 'p']], 'x']]], $context), [['e' => ['e', 'd', 'c'], 'h' => ['h', 'g', 'f'], 'x' => ['x', ['m' => ['m', 'l', 'k'], 'p' => ['p', 'o', 'n']], 'j', 'i']], 'b', 'a'], 'path(node(path))'); //{ $r = new Regexp_Assemble(); $ra = $r->add('refused')->add('fused')->add('used'); $ra->_reduce(); ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['f', 'r']), '_lookahead refused/fused/used'); $ra->reset()->add('refused')->add('reamused')->add('fused')->add('amused')->add('used')->_reduce(); ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['a', 'f', 'r']), '_lookahead reamused/refused/amused/fused/used'); $ra->reset()->add('reran')->add('ran')->_reduce(); ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['r']), '_lookahead reran/ran'); $ra->reset()->add('cruised')->add('bruised')->add('hosed')->add('gazed')->add('used')->_reduce(); ok(eq_set([array_keys($rt->_lookahead($ra->__path[0]))], ['b', 'c', 'g', 'h', 'u']), '_lookahead cruised/bruised/hosed/gazed/used'); //} is($rt->_dump([1, 0, NULL]), '[1 0 *]', 'dump undef'); is($rt->_dump([1, 0, ' ']), "[1 0 ' ']", 'dump space'); is($rt->_dump(['a' => ['a', 'b'], 'b' => ['b']]), '{a=>[a b] b=>[b]}', 'dump node'); is($rt->_dump(['a', chr(7), 'b']), '[a \\x07 b]', 'dump pretty'); //保留 //is( $r->insert(' ')->insert(' ')->dump(),
) |m (?: inatur |ers ) | (?: ligh |jec ) t |e (?: rwe |d ) |athwe |press |vil ) ed', 'indent de.*ed'); //追加 $r = new Regexp_Assemble(); is($r->add('unimped', 'unimpeded', 'unimpelled')->as_string(), 'unimpe(?:(?:de)?|lle)d', 'unimped unimpeded unimpelled'); $r = new Regexp_Assemble(); is($r->add('tiao', 'tie', 'tien', 'tin', 'tine', 'tinea', 'tinean', 'tineine', 'tininess', 'tinnet', 'tinniness', 'tinosa', 'tinstone', 'tint', 'tinta', 'tintie', 'tintiness', 'tintist', 'tisane', 'tit', 'titanate', 'titania', 'titanite', 'titano', 'tite', 'titi', 'titian', 'titien', 'tittie')->as_string(), 'ti(?:n(?:t(?:i(?:ness|st|e)|a)?|e(?:an?|ine)?|n(?:iness|et)|iness|stone|osa)?|t(?:an(?:i(?:te|a)|ate|o)|i(?:[ae]n)?|(?:ti)?e)?|sane|en?|ao)', 'tiao tie .... titien tittie'); /* is( $_, $fixed, '$_ has not been altered' ); */ echo "===OK===\n";