Blame | Letzte Änderung | Log anzeigen | RSS feed
<?php/*** a generic lexer** phpDocumentor :: automatic documentation generator** PHP versions 4 and 5** Copyright (c) 2000-2007 Joshua Eichorn** LICENSE:** This library is free software; you can redistribute it* and/or modify it under the terms of the GNU Lesser General* Public License as published by the Free Software Foundation;* either version 2.1 of the License, or (at your option) any* later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU* Lesser General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this library; if not, write to the Free Software* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA** @category ToolsAndUtilities* @package phpDocumentor* @subpackage WordParsers* @author Joshua Eichorn <jeichorn@phpdoc.org>* @copyright 2000-2007 Joshua Eichorn* @license http://www.opensource.org/licenses/lgpl-license.php LGPL* @version CVS: $Id: WordParser.inc 246145 2007-11-14 01:37:03Z ashnazg $* @link http://www.phpdoc.org* @link http://pear.php.net/PhpDocumentor* @since 0.1* @todo CS cleanup - change package to PhpDocumentor*//*** Retrieves tokens from source code for use by the Parser** @category ToolsAndUtilities* @package phpDocumentor* @subpackage WordParsers* @author Joshua Eichorn <jeichorn@phpdoc.org>* @copyright 2000-2007 Joshua Eichorn* @license http://www.opensource.org/licenses/lgpl-license.php LGPL* @version Release: 1.4.3* @link http://www.phpdoc.org* @link http://pear.php.net/PhpDocumentor* @see Parser* @todo CS cleanup - change package to PhpDocumentor*/class WordParser{/*New lines around the worldMacintosh: \rUnix : \nWindows : \r\n*//**#@+* @access private*//*** List of text that separates tokens, used to retrieve tokens* @var array*/var $wordseperators = array();/*** Position within input of the cursor pointing to the next text to be* retrieved as a token* @var integer*/var $pos = 0;/*** Size of the input source code* @var integer*/var $size;/*** Source code* @var string*/var $data;var $cache;/*** Current line number* @var integer*/var $linenum = 0;/*** Position the cursor was at the last time line numbers were counted, used* to guarantee that line numbers are incremented* @var integer*/var $linenumpos = 0;/*** Used for {@}source} tag, contains currently parsed function source* @var string*/var $source = '';/*** flag, determines whether tokens are added to {@link $source}* @var boolean*/var $getsource = false;/*** If true, then white space is returned as a part of tokens, otherwise* tokens are trimmed* @var boolean*/var $returnWhiteSpace = false;/**#@-*//*** Initialize the WordParser** @param string &$input source code** @return void*/function setup(&$input){$this->size = strlen($input);$this->data = & $input;$this->pos = 0;$this->linenum = 0;$this->linenumpos = 0;$this->cache = array();//$this->run = 0;//$this->word = WORD_PARSER_RET_WORD;}/*** Retrieve source code for the last function/method** @return string*/function getSource(){$source = $this->source;$this->source = '';$this->getsource = false;return $source;}/*** Used to tell the WordParser to start retrieving source code** @param string $word source code** @return void* @access private*/function retrievesource($word = ''){$this->source = $word;$this->getsource = true;}/*** Retrieve a token from the token list** The {@link Parser} class relies upon this method to retrieve the next* token. The {@link $wordseperators} array is a collection of strings* that delineate tokens for the current parser state. $wordseperators* is set by the parser with a call to {@link Parser::configWordParser()}* every time a new parser state is reached.** For example, while parsing the source code for a class, the word* <code>var</code> is a token, and <code>global</code> is not,* but inside a function, the reverse is true. The parser state* {@link PARSER_STATE_CLASS} has a token list that includes whitespace,* code delimiters like ; and {}, and comment/DocBlock indicators** If the whitespace option has been turned off using* {@link setWhitespace()}, then no whitespace is returned with tokens** {@internal* In the first segment of the function, the code attempts to find the next* token. A cache is used to speed repetitious tasks. The $tpos variable* is used to hold the position of the next token. $npos is used to* hold the end of the token, and so $npos - $tpos will give the length* of the token. This is used to allow tokens that contain whitespace,* should that option be desired.** {@link $data} is of course the string containing the PHP code to be* parsed, and {@link $pos} is the cursor, or current location within the* parsed data.* }}** @return string|false the next token, an empty string if there are no* token separators in the $wordseperators array,* or false if the end of input has been reached*/function getWord(){//$st = $this->mtime();if ($this->size == $this->pos) {return false;}// assume, for starting, that the token is from $this->pos to the end$npos = $this->size;if (is_array($this->wordseperators)) {//$this->wordseperators = array();foreach ($this->wordseperators as $sep) {// cache is set if this separator has been testedif (isset($this->cache[$sep])) {$tpos = $this->cache[$sep];} else {$tpos = false;}if ($tpos < $this->pos || !is_int($tpos)) {// find the position of the next token separator$tpos = strpos($this->data, $sep, $this->pos);}// was a token separator found// that is closer to the current location?if ( ($tpos < $npos) && !($tpos === false)) {//echo trim($sep) . "=$tpos\n";// set the length of the token// to be from $this->pos to// the next token separator$npos = $tpos;$seplen = strlen($sep);} else if (!($tpos === false)) {$this->cache[$sep] = $tpos;}}} else {// no token separators, tell the parser to choose a new statereturn "";}$len = $npos - $this->pos;if ($len == 0) {$len = $seplen;}//$st3 = $this->mtime();$word = substr($this->data, $this->pos, $len);// Change random other os newlines to the unix oneif ($word == "\r" || $word == "\r\n") {$word = "\n";}if ($this->linenumpos <= $this->pos) {$this->linenumpos = $this->pos + $len;$this->linenum += count(explode("\n", $word)) - 1;}if ($this->getsource) {$this->source .= $word;}$this->pos = $this->pos + $len;//$this->word = WORD_PARSER_RET_SEP;// Things like // commenats rely on the newline// to find their end so im going to have to return them// never return worthless white space /t ' 'if ($this->returnWhiteSpace == false) {if (strlen(trim($word)) == 0 && $word != "\n") {$word = $this->getWord();}}//$this->time3 = $this->time3 + ($this->mtime() - $st3);//$this->time = $this->time + ($this->mtime() - $st);return $word;}/*** Returns the current pointer position, or 1 character after the end of the word** @return int the position*/function getPos(){return $this->pos;}/*** Unused** {@source}** @param integer $start starting position* @param integer $len length of block to retrieve** @return string the requested block of characters*/function getBlock($start, $len){return substr($this->data, $start, $len);}/*** Sets the list of possible separator tokens** @param array &$seps array of strings that separate tokens** @return void* @uses $wordseperators*/function setSeperator(&$seps){$this->wordseperators = &$seps;}/*** Set the internal cursor within the source code** @param integer $pos the position** @return void*/function setPos($pos){$this->pos = $pos;}/*** Backup to the previous token so that it can be retrieved again in a new* context.** Occasionally, a word will be passed to an event handler that should be* handled by another event handler. This method allows that to happen.** @param string $word token to back up to** @return void*/function backupPos($word){if ($this->getsource) $this->source =substr($this->source, 0, strlen($this->source) - 1);$this->pos = $this->pos - strlen($word);}/*** set parser to return or strip whitespace** @param boolean $val flag to return or strip whitespace** @return void*/function setWhitespace($val = false){$this->returnWhiteSpace = $val;}}?>