/**
  * Simply attempts to attract place names or names.
  */
 private function extract_potential_named_entities()
 {
     for ($i = 0, $n = count($this->tokens); $i < $n; $i++) {
         $entity = NULL;
         $token = new Token($this->tokens[$i]);
         // If the token is uppercase, maybe it is a name or a place.
         if ($token->isUpperCase() && !$token->isStopWord() && !$token->isInitWord()) {
             $entity = array($token->getText());
             // Look two words ahead.
             if (isset($this->tokens[$i + 2])) {
                 $next = new Token($this->tokens[$i + 2]);
                 while ($next->isUpperCase() || $next->isPrefixOrInfix()) {
                     // Jump two words.
                     $i += 2;
                     $entity[] = $next->getText();
                     if (isset($this->tokens[$i + 2])) {
                         $next = new Token($this->tokens[$i + 2]);
                     } else {
                         break;
                     }
                 }
             }
             $this->named_entities[] = $entity;
         }
     }
 }
Exemple #2
0
 public function FromToken(Token $oldToken)
 {
     $text = $oldToken->getText();
     $type = $oldToken->getType();
     $line = $oldToken->getLine();
     $index = $oldToken->getTokenIndex();
     $charPositionInLine = $oldToken->getCharPositionInLine();
     $channel = $oldToken->getChannel();
     if ($oldToken instanceof CommonToken) {
         $start = $oldToken->start;
         $stop = $oldToken->stop;
     }
     $token = new CommonToken(null, $type, $channel, $start, $stop);
     $token->text = $text;
     $token->line = $line;
     $token->index = $index;
     $token->charPositionInLine = $charPositionInLine;
     return $token;
 }
 function accept(Token $token)
 {
     if ($token->isA(T_DOC_COMMENT)) {
         $this->last_doc_block = $token->getText();
         $this->state = 1;
     } elseif ($token->isA(T_INTERFACE) || $token->isA(T_CLASS) || $token->isA(T_FUNCTION) || $token->isA(T_VARIABLE) && !$this->parameters_scanner->isActive()) {
         if ($this->state === 1) {
             $this->state = 2;
         } else {
             $this->last_doc_block = null;
             $this->state = 0;
         }
     }
 }
Exemple #4
0
 /**
  * @covers spriebsch\PHPca\Token::__construct
  * @covers spriebsch\PHPca\Token::getText
  */
 public function testGetText()
 {
     $t = new Token(T_OPEN_TAG, '<?php');
     $this->assertEquals('<?php', $t->getText());
 }
Exemple #5
0
 /**
  * Tokenize a file
  *
  * @param string $fileName    the file name
  * @param string $sourceCode  the source code
  * @return File
  */
 public static function tokenize($fileName, $sourceCode)
 {
     Constants::init();
     $class = '';
     $classFound = false;
     $waitForClassBegin = false;
     $classCurlyLevel = 0;
     $interface = '';
     $interfaceFound = false;
     $waitForInterfaceBegin = false;
     $interfaceCurlyLevel = 0;
     $function = '';
     $functionFound = false;
     $waitForFunctionBegin = false;
     $functionCurlyLevel = 0;
     $namespace = '\\';
     $newNamespace = '';
     $namespaceFound = false;
     $namespaceStarted = false;
     $level = 0;
     $line = 1;
     $column = 1;
     $file = new File($fileName, $sourceCode);
     foreach (token_get_all($sourceCode) as $token) {
         if (is_array($token)) {
             $id = $token[0];
             $text = $token[1];
             $line = $token[2];
         } else {
             try {
                 // it's not a PHP token, so we use one we have defined
                 $id = Constants::getTokenId($token);
                 $text = $token;
             } catch (UnkownTokenException $e) {
                 throw new TokenizerException('Unknown token ' . $e->getTokenName() . ' in file ' . $fileName);
             }
             // This exception is not testable, because we _have_ defined all
             // tokens, hopefully. It's just a safeguard to provide a decent
             // error message should we ever encounter an undefined token.
             // @codeCoverageIgnoreEnd
         }
         $tokenObj = new Token($id, $text, $line, $column);
         if ($tokenObj->hasNewline()) {
             // a newline resets the column count
             $line += $tokenObj->getNewLineCount();
             $column = 1 + $tokenObj->getTrailingWhitespaceCount();
         } else {
             $column += $tokenObj->getLength();
         }
         // We have encountered a T_NAMESPACE token before (this is indicated
         // by $namespaceFound being true, so the T_STRING contains the class
         // name (there will be T_WHITESPACE between T_NAMESPACE and T_STRING).
         // We remember the namespace name, but do not set it until we have
         // encountered the next opening brace or semicolon. We set
         // $waitForNamespaceBegin to true so that we can wait for one of these.
         if ($namespaceFound && $tokenObj->getId() == T_STRING || $namespaceFound && $tokenObj->getId() == T_NS_SEPARATOR) {
             $newNamespace .= $tokenObj->getText();
         }
         // We have encountered a T_CLASS token before (this is indicated
         // by $classFound being true, so the T_STRING contains the class
         // name (there will be T_WHITESPACE between T_CLASS and T_STRING).
         // We remember the class name, but do not set it until we have
         // encountered the next opening brace. We set $waitForClassBegin
         // to true so that we can wait for the next opening curly brace.
         if ($classFound && $tokenObj->getId() == T_STRING) {
             $class = $tokenObj->getText();
             $waitForClassBegin = true;
             $classFound = false;
         }
         // We have encountered a T_INTERFACE token before (this is indicated
         // by $interfaceFound being true, so the T_STRING contains the class
         // name (there will be T_WHITESPACE between T_INTERFACE and T_STRING).
         // We remember the interface name, but do not set it until we have
         // encountered the next opening brace. We set $waitForInterfaceBegin
         // to true so that we can wait for the next opening curly brace.
         if ($interfaceFound && $tokenObj->getId() == T_STRING) {
             $interface = $tokenObj->getText();
             $waitForInterfaceBegin = true;
             $interfaceFound = false;
         }
         // We have encountered a T_FUNCTION token before (this is indicated
         // by $functionFound being true, so the T_STRING contains the class
         // name (there will be T_WHITESPACE between T_FUNCTION and T_STRING).
         // We remember the function name, but do not set it until we have
         // encountered the next opening brace. We set $waitForFunctionBegin
         // to true so that we can wait for the next opening curly brace.
         if ($functionFound && $tokenObj->getId() == T_STRING) {
             $function = $tokenObj->getText();
             $waitForFunctionBegin = true;
             $functionFound = false;
         }
         // T_NAMESPACE token starts a namespace. We set $namespaceFound
         // to true so that we can watch out for the namespace name (see above).
         if ($tokenObj->getId() == T_NAMESPACE) {
             // Reset the current namespace. It is a PHPCa convention to
             // always make namespace statements themselves part of the
             // global namespace.
             $namespace = '\\';
             $namespaceFound = true;
         }
         // If we encounter a T_CLASS token, we have found a class definition.
         // We set $classFound to true so that we can watch out for the class
         // name (see above).
         if ($tokenObj->getId() == T_CLASS) {
             $classFound = true;
         }
         // If we encounter a T_INTERFACE token, we have found an interface definition.
         // We set $interfaceFound to true so that we can watch out for the interface
         // name (see above).
         if ($tokenObj->getId() == T_INTERFACE) {
             $interfaceFound = true;
         }
         // If we encounter a T_FUNCTION token, we have found a function.
         // We set $functionFound to true so that we can watch out for the
         // function name (see above).
         if ($tokenObj->getId() == T_FUNCTION) {
             $functionFound = true;
         }
         // A semicolon can end the namespace declaration. If we encounter
         // a semicolon in $namespaceFound mode, we switch the mode which
         // indicates that the full namespace name has been parsed.
         if ($namespaceFound && $tokenObj->getId() == T_SEMICOLON) {
             $namespaceStarted = true;
             $namespaceFound = false;
         }
         $id = $tokenObj->getId();
         if ($id == T_SEMICOLON) {
             if ($waitForFunctionBegin) {
                 $functionCurlyLevel = $level;
                 $waitForFunctionBegin = false;
             }
         }
         // Opening curly brace opens another block, thus increases the level.
         if ($id == T_OPEN_CURLY || $id == T_CURLY_OPEN || $id == T_DOLLAR_OPEN_CURLY_BRACES) {
             $level++;
             // An opening curly brace can end the namespace declaration.
             // If we encounter one while in $namespaceFound mode, we switch
             // the mode which indicates that the full namespace name has
             // been parsed.
             if ($namespaceFound) {
                 $namespace = $newNamespace;
                 $newNamespace = '';
                 $namespaceFound = false;
             }
             // If we encounter the opening curly brace of a class (this happens
             // when $waitForClassBegin is true), we remember the block level of
             // this brace so that we can end the class when we encounter the
             // matching closing tag.
             if ($waitForClassBegin) {
                 $classCurlyLevel = $level;
                 $waitForClassBegin = false;
             }
             // If we encounter the opening curly brace of an interface (this happens
             // when $waitForInterfaceBegin is true), we remember the block level of
             // this brace so that we can end the interface when we encounter the
             // matching closing tag.
             if ($waitForInterfaceBegin) {
                 $interfaceCurlyLevel = $level;
                 $waitForInterfaceBegin = false;
             }
             // If we encounter the opening curly brace of a class (this happens
             // when $waitForClassBegin is true), we remember the block level of
             // this brace so that we can end the class when we encounter the
             // matching closing tag.
             if ($waitForFunctionBegin) {
                 $functionCurlyLevel = $level;
                 $waitForFunctionBegin = false;
             }
         }
         // Since we assemble any new namespace name in $newNamespace,
         // we can safely always set the $namespace.
         $tokenObj->setNamespace($namespace);
         if ($namespaceStarted) {
             $namespaceStarted = false;
             $namespace = $newNamespace;
             $newNamespace = '';
         }
         // This also sets the class when we are outside the class,
         // which is harmless because we then just set an emtpy string.
         if (!$waitForClassBegin) {
             if (substr($class, 0, 1) == '\\' || $namespace == '\\') {
                 $classname = $class;
             } else {
                 if ($class != '') {
                     $classname = $namespace . '\\' . $class;
                 } else {
                     $classname = '';
                 }
             }
             $tokenObj->setClass($classname);
         }
         // This also sets the interface when we are outside the interface,
         // which is harmless because we then just set an emtpy string.
         if (!$waitForInterfaceBegin) {
             if (substr($interface, 0, 1) == '\\' || $namespace == '\\') {
                 $interfaceName = $interface;
             } else {
                 if ($interface != '') {
                     $interfaceName = $namespace . '\\' . $interface;
                 } else {
                     $interfaceName = '';
                 }
             }
             $tokenObj->setInterface($interfaceName);
         }
         // This also sets the function when we are outside the function,
         // which is harmless because we then just set an emtpy string.
         if (!$waitForFunctionBegin) {
             $tokenObj->setFunction($function);
         }
         $tokenObj->setBlockLevel($level);
         // Closing curly decreases the block level. We do this *after*
         // we have set the block leven in the current token, so that
         // the closing curly's level matches the level of its opening brace.
         if ($tokenObj->getId() == T_CLOSE_CURLY) {
             $level--;
             // We get away with not dealing with namespace ends, since
             // non-namespaced code is not allowed when there is at least
             // one namespace in a file. So any namespace either implicitly
             // ends at the end of a file, or another namespace starts,
             // implicitly "ending" the previous namespace.
             // If we are inside a class and the closing brace matches the
             // opening brace of that class, the block/class has ended.
             if ($class != '' && $tokenObj->getBlockLevel() == $classCurlyLevel) {
                 $class = '';
                 $classCurlyLevel = 0;
             }
             // If we are inside an interface and the closing brace matches the
             // opening brace of that interface, the block/interface has ended.
             if ($interface != '' && $tokenObj->getBlockLevel() == $interfaceCurlyLevel) {
                 $interface = '';
                 $interfaceCurlyLevel = 0;
             }
             // If we are inside a function and the closing brace matches the
             // opening brace of that function, the block/function has ended.
             if ($function != '' && $tokenObj->getBlockLevel() == $functionCurlyLevel) {
                 $function = '';
                 $functionCurlyLevel = 0;
             }
         }
         $file->add($tokenObj);
     }
     return $file;
 }
 function accept(Token $token)
 {
     $this->output .= $token->getText();
 }