Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
/**
3
 * a generic lexer
4
 *
5
 * phpDocumentor :: automatic documentation generator
6
 *
7
 * PHP versions 4 and 5
8
 *
9
 * Copyright (c) 2000-2007 Joshua Eichorn
10
 *
11
 * LICENSE:
12
 *
13
 * This library is free software; you can redistribute it
14
 * and/or modify it under the terms of the GNU Lesser General
15
 * Public License as published by the Free Software Foundation;
16
 * either version 2.1 of the License, or (at your option) any
17
 * later version.
18
 *
19
 * This library is distributed in the hope that it will be useful,
20
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
 * Lesser General Public License for more details.
23
 *
24
 * You should have received a copy of the GNU Lesser General Public
25
 * License along with this library; if not, write to the Free Software
26
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27
 *
28
 * @category   ToolsAndUtilities
29
 * @package    phpDocumentor
30
 * @subpackage WordParsers
31
 * @author     Joshua Eichorn <jeichorn@phpdoc.org>
32
 * @copyright  2000-2007 Joshua Eichorn
33
 * @license    http://www.opensource.org/licenses/lgpl-license.php LGPL
34
 * @version    CVS: $Id: WordParser.inc 246145 2007-11-14 01:37:03Z ashnazg $
35
 * @link       http://www.phpdoc.org
36
 * @link       http://pear.php.net/PhpDocumentor
37
 * @since      0.1
38
 * @todo       CS cleanup - change package to PhpDocumentor
39
 */
40
 
41
/**
42
 * Retrieves tokens from source code for use by the Parser
43
 *
44
 * @category   ToolsAndUtilities
45
 * @package    phpDocumentor
46
 * @subpackage WordParsers
47
 * @author     Joshua Eichorn <jeichorn@phpdoc.org>
48
 * @copyright  2000-2007 Joshua Eichorn
49
 * @license    http://www.opensource.org/licenses/lgpl-license.php LGPL
50
 * @version    Release: 1.4.3
51
 * @link       http://www.phpdoc.org
52
 * @link       http://pear.php.net/PhpDocumentor
53
 * @see        Parser
54
 * @todo       CS cleanup - change package to PhpDocumentor
55
 */
56
class WordParser
57
{
58
    /*
59
    New lines around the world
60
    Macintosh: \r
61
        Unix : \n
62
    Windows : \r\n
63
     */
64
 
65
    /**#@+
66
     * @access private
67
     */
68
    /**
69
     * List of text that separates tokens, used to retrieve tokens
70
     * @var array
71
     */
72
    var $wordseperators = array();
73
 
74
    /**
75
     * Position within input of the cursor pointing to the next text to be
76
     * retrieved as a token
77
     * @var integer
78
     */
79
    var $pos = 0;
80
 
81
    /**
82
     * Size of the input source code
83
     * @var integer
84
     */
85
    var $size;
86
 
87
    /**
88
     * Source code
89
     * @var string
90
     */
91
    var $data;
92
 
93
    var $cache;
94
    /**
95
     * Current line number
96
     * @var integer
97
     */
98
    var $linenum = 0;
99
    /**
100
     * Position the cursor was at the last time line numbers were counted, used
101
     * to guarantee that line numbers are incremented
102
     * @var integer
103
     */
104
    var $linenumpos = 0;
105
 
106
    /**
107
     * Used for {@}source} tag, contains currently parsed function source
108
     * @var string
109
     */
110
    var $source = '';
111
    /**
112
     * flag, determines whether tokens are added to {@link $source}
113
     * @var boolean
114
     */
115
    var $getsource = false;
116
 
117
    /**
118
     * If true, then white space is returned as a part of tokens, otherwise
119
     * tokens are trimmed
120
     * @var boolean
121
     */
122
    var $returnWhiteSpace = false;
123
    /**#@-*/
124
 
125
    /**
126
     * Initialize the WordParser
127
     *
128
     * @param string &$input source code
129
     *
130
     * @return void
131
     */
132
    function setup(&$input)
133
    {
134
        $this->size       = strlen($input);
135
        $this->data       = & $input;
136
        $this->pos        = 0;
137
        $this->linenum    = 0;
138
        $this->linenumpos = 0;
139
        $this->cache      = array();
140
        //$this->run      = 0;
141
        //$this->word     = WORD_PARSER_RET_WORD;
142
    }
143
 
144
    /**
145
     * Retrieve source code for the last function/method
146
     *
147
     * @return string
148
     */
149
    function getSource()
150
    {
151
        $source          = $this->source;
152
        $this->source    = '';
153
        $this->getsource = false;
154
        return $source;
155
    }
156
 
157
    /**
158
     * Used to tell the WordParser to start retrieving source code
159
     *
160
     * @param string $word source code
161
     *
162
     * @return void
163
     * @access private
164
     */
165
    function retrievesource($word = '')
166
    {
167
        $this->source    = $word;
168
        $this->getsource = true;
169
    }
170
 
171
    /**
172
     * Retrieve a token from the token list
173
     *
174
     * The {@link Parser} class relies upon this method to retrieve the next
175
     * token.  The {@link $wordseperators} array is a collection of strings
176
     * that delineate tokens for the current parser state.  $wordseperators
177
     * is set by the parser with a call to {@link Parser::configWordParser()}
178
     * every time a new parser state is reached.
179
     *
180
     * For example, while parsing the source code for a class, the word
181
     * <code>var</code> is a token, and <code>global</code> is not,
182
     * but inside a function, the reverse is true.  The parser state
183
     * {@link PARSER_STATE_CLASS} has a token list that includes whitespace,
184
     * code delimiters like ; and {}, and comment/DocBlock indicators
185
     *
186
     * If the whitespace option has been turned off using
187
     * {@link setWhitespace()}, then no whitespace is returned with tokens
188
     *
189
     * {@internal
190
     * In the first segment of the function, the code attempts to find the next
191
     * token.  A cache is used to speed repetitious tasks.  The $tpos variable
192
     * is used to hold the position of the next token.  $npos is used to
193
     * hold the end of the token, and so $npos - $tpos will give the length
194
     * of the token.  This is used to allow tokens that contain whitespace,
195
     * should that option be desired.
196
     *
197
     * {@link $data} is of course the string containing the PHP code to be
198
     * parsed, and {@link $pos} is the cursor, or current location within the
199
     * parsed data.
200
     * }}
201
     *
202
     * @return string|false the next token, an empty string if there are no
203
     *                      token separators in the $wordseperators array,
204
     *                      or false if the end of input has been reached
205
     */
206
    function getWord()
207
    {
208
        //$st = $this->mtime();
209
        if ($this->size == $this->pos) {
210
            return false;
211
        }
212
 
213
        // assume, for starting, that the token is from $this->pos to the end
214
        $npos = $this->size;
215
        if (is_array($this->wordseperators)) {
216
            //$this->wordseperators = array();
217
            foreach ($this->wordseperators as $sep) {
218
                // cache is set if this separator has been tested
219
                if (isset($this->cache[$sep])) {
220
                    $tpos = $this->cache[$sep];
221
                } else {
222
                    $tpos = false;
223
                }
224
                if ($tpos < $this->pos || !is_int($tpos)) {
225
                    // find the position of the next token separator
226
                    $tpos = strpos($this->data, $sep, $this->pos);
227
                }
228
 
229
                // was a token separator found
230
                // that is closer to the current location?
231
                if ( ($tpos < $npos) && !($tpos === false)) {
232
                    //echo trim($sep) . "=$tpos\n";
233
                    // set the length of the token
234
                    // to be from $this->pos to
235
                    // the next token separator
236
                    $npos   = $tpos;
237
                    $seplen = strlen($sep);
238
                } else if (!($tpos === false)) {
239
                    $this->cache[$sep] = $tpos;
240
                }
241
            }
242
        } else {
243
            // no token separators, tell the parser to choose a new state
244
            return "";
245
        }
246
 
247
        $len = $npos - $this->pos;
248
        if ($len == 0) {
249
            $len = $seplen;
250
        }
251
 
252
        //$st3 = $this->mtime();
253
        $word = substr($this->data, $this->pos, $len);
254
 
255
        // Change random other os newlines to the unix one
256
        if ($word == "\r" || $word == "\r\n") {
257
            $word = "\n";
258
        }
259
 
260
        if ($this->linenumpos <= $this->pos) {
261
            $this->linenumpos = $this->pos + $len;
262
            $this->linenum   += count(explode("\n", $word)) - 1;
263
        }
264
 
265
        if ($this->getsource) {
266
            $this->source .= $word;
267
        }
268
        $this->pos = $this->pos + $len;
269
        //$this->word = WORD_PARSER_RET_SEP;
270
 
271
        // Things like // commenats rely on the newline
272
        // to find their end so im going to have to return them
273
        // never return worthless white space /t ' '
274
        if ($this->returnWhiteSpace == false) {
275
            if (strlen(trim($word)) == 0 && $word != "\n") {
276
                $word = $this->getWord();
277
            }
278
        }
279
        //$this->time3 = $this->time3 + ($this->mtime() - $st3);
280
        //$this->time = $this->time + ($this->mtime() - $st);
281
        return $word;
282
    }
283
 
284
 
285
    /**
286
     * Returns the current pointer position, or 1 character after the end of the word
287
     *
288
     * @return int the position
289
     */
290
    function getPos()
291
    {
292
        return $this->pos;
293
    }
294
 
295
    /**
296
     * Unused
297
     *
298
     * {@source}
299
     *
300
     * @param integer $start starting position
301
     * @param integer $len   length of block to retrieve
302
     *
303
     * @return string the requested block of characters
304
     */
305
    function getBlock($start, $len)
306
    {
307
        return substr($this->data, $start, $len);
308
    }
309
 
310
    /**
311
     * Sets the list of possible separator tokens
312
     *
313
     * @param array &$seps array of strings that separate tokens
314
     *
315
     * @return void
316
     * @uses $wordseperators
317
     */
318
    function setSeperator(&$seps)
319
    {
320
        $this->wordseperators = &$seps;
321
    }
322
 
323
    /**
324
     * Set the internal cursor within the source code
325
     *
326
     * @param integer $pos the position
327
     *
328
     * @return void
329
     */
330
    function setPos($pos)
331
    {
332
        $this->pos = $pos;
333
    }
334
 
335
    /**
336
     * Backup to the previous token so that it can be retrieved again in a new
337
     * context.
338
     *
339
     * Occasionally, a word will be passed to an event handler that should be
340
     * handled by another event handler.  This method allows that to happen.
341
     *
342
     * @param string $word token to back up to
343
     *
344
     * @return void
345
     */
346
    function backupPos($word)
347
    {
348
        if ($this->getsource) $this->source =
349
            substr($this->source, 0, strlen($this->source) - 1);
350
        $this->pos = $this->pos - strlen($word);
351
    }
352
 
353
    /**
354
     * set parser to return or strip whitespace
355
     *
356
     * @param boolean $val flag to return or strip whitespace
357
     *
358
     * @return void
359
     */
360
    function setWhitespace($val = false)
361
    {
362
        $this->returnWhiteSpace = $val;
363
    }
364
}
365
?>