public function testTokenizingRegistry()
 {
     new t\Token('test');
     $tokenizer = new t\PunctuationTokenizer();
     $tReg = new t\TokenRegistry();
     $tReg->add(new t\WordToken('Das(ist'))->add(new t\WhitespaceToken(' '))->add(new t\WordToken('ein>Test)'))->add(new t\WhitespaceToken(' '))->add(new t\WordToken('vieleicht'))->add(new t\NonWordToken('/'))->add(new t\WordToken('oder{so!'));
     $tReg1 = new t\TokenRegistry();
     $tReg1->add(new t\WordToken('Das'))->add(new t\NonWordToken('('))->add(new t\WordToken('ist'))->add(new t\WhitespaceToken(' '))->add(new t\WordToken('ein'))->add(new t\NonWordToken('>'))->add(new t\WordToken('Test'))->add(new t\NonWordToken(')'))->add(new t\WhitespaceToken(' '))->add(new t\WordToken('vieleicht'))->add(new t\NonWordToken('/'))->add(new t\WordToken('oder'))->add(new t\NonWordToken('{'))->add(new t\WordToken('so'))->add(new t\NonWordToken('!'));
     $registry = $tokenizer->run($tReg);
     $this->assertEquals($tReg1, $tokenizer->run($tReg));
 }
 /**
  * @dataProvider filterProvider
  */
 public function testFilter($input, $pattern, $result)
 {
     $f = new NonStandardFilter();
     $o = new Options();
     $t = new t\WordToken($input);
     $t->addPattern($pattern);
     $r = new t\TokenRegistry();
     $r->add($t);
     $o->setHyphen('-');
     $f->setOptions($o);
     $this->assertSame($r, $f->run($r));
     $this->assertEquals($result, $r->getTokenWithKey(0)->getHyphenatedContent());
 }
 /**
  * Split the given input into tokens using whitespace as splitter
  *
  * The input can be a string or a tokenRegistry. If the input is a
  * TokenRegistry, each item will be tokenized.
  *
  * @param string|\Org\Heigl\Hyphenator\Tokenizer\TokenRegistry $input The
  * input to be tokenized
  *
  * @return \Org\Heigl\Hyphenator\Tokenizer\TokenRegistry
  */
 public function run($input)
 {
     if ($input instanceof TokenRegistry) {
         // Tokenize a TokenRegistry
         foreach ($input as $token) {
             if ($token instanceof WhitespaceToken) {
                 continue;
             }
             $newTokens = $this->_tokenize($token->get());
             if ($newTokens == array($token)) {
                 continue;
             }
             $input->replace($token, $newTokens);
         }
         return $input;
     }
     // Tokenize a simple string.
     $array = $this->_tokenize($input);
     $registry = new TokenRegistry();
     foreach ($array as $item) {
         $registry->add($item);
     }
     return $registry;
 }
 public function testFiltering()
 {
     $t1 = new TestFilter();
     $t2 = new Test1Filter();
     $r = new FilterRegistry();
     $r->add($t1);
     $r->add($t2);
     $t = new t\TokenRegistry();
     $t->add(new t\WordToken('test'));
     $this->assertSame($t, $r->filter($t));
 }
 public function testTokenizingThinNonBreakingSpaceString()
 {
     new t\Token('test');
     $tokenizer = new t\WhitespaceTokenizer();
     $tReg = new t\TokenRegistry();
     $tReg->add(new t\WordToken('Some'))->add(new t\WhitespaceToken(" "))->add(new t\WordToken('thin'))->add(new t\WhitespaceToken(" "))->add(new t\WordToken('non'))->add(new t\WhitespaceToken(" "))->add(new t\WordToken('breaking'))->add(new t\WhitespaceToken(" "))->add(new t\WordToken('spaces'));
     $registry = $tokenizer->run("Some thin non breaking spaces");
     $this->assertEquals($tReg, $registry);
 }
 public function testReplacement()
 {
     new Token('f');
     $wt1 = new WordToken('a');
     $wt2 = new WordToken('b');
     $wt3 = new WordToken('c');
     $wt4 = new WordToken('d');
     $wt5 = new WordToken('e');
     $r = new TokenRegistry();
     $r->add($wt1);
     $r->add($wt2);
     $r->add($wt3);
     $this->assertAttributeEquals(array($wt1, $wt2, $wt3), '_registry', $r);
     $r->replace($wt4, array());
     $this->assertAttributeEquals(array($wt1, $wt2, $wt3), '_registry', $r);
     $r->replace($wt2, array($wt4, 'foo', $wt5));
     $this->assertAttributeEquals(array($wt1, $wt4, $wt5, $wt3), '_registry', $r);
 }