Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
/**
3
 * Tokenizes JS code.
4
 *
5
 * PHP version 5
6
 *
7
 * @category  PHP
8
 * @package   PHP_CodeSniffer
9
 * @author    Greg Sherwood <gsherwood@squiz.net>
10
 * @author    Marc McIntyre <mmcintyre@squiz.net>
11
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
12
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
13
 * @version   CVS: $Id: JS.php 289840 2009-10-22 00:52:57Z squiz $
14
 * @link      http://pear.php.net/package/PHP_CodeSniffer
15
 */
16
 
17
/**
18
 * Tokenizes JS code.
19
 *
20
 * @category  PHP
21
 * @package   PHP_CodeSniffer
22
 * @author    Greg Sherwood <gsherwood@squiz.net>
23
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
24
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
25
 * @version   Release: 1.2.1
26
 * @link      http://pear.php.net/package/PHP_CodeSniffer
27
 */
28
class PHP_CodeSniffer_Tokenizers_JS
29
{
30
 
31
    /**
32
     * A list of tokens that are allowed to open a scope.
33
     *
34
     * This array also contains information about what kind of token the scope
35
     * opener uses to open and close the scope, if the token strictly requires
36
     * an opener, if the token can share a scope closer, and who it can be shared
37
     * with. An example of a token that shares a scope closer is a CASE scope.
38
     *
39
     * @var array
40
     */
41
    public $scopeOpeners = array(
42
                            T_IF       => array(
43
                                           'start'  => T_OPEN_CURLY_BRACKET,
44
                                           'end'    => T_CLOSE_CURLY_BRACKET,
45
                                           'strict' => false,
46
                                           'shared' => false,
47
                                           'with'   => array(),
48
                                          ),
49
                            T_TRY      => array(
50
                                           'start'  => T_OPEN_CURLY_BRACKET,
51
                                           'end'    => T_CLOSE_CURLY_BRACKET,
52
                                           'strict' => true,
53
                                           'shared' => false,
54
                                           'with'   => array(),
55
                                          ),
56
                            T_CATCH    => array(
57
                                           'start'  => T_OPEN_CURLY_BRACKET,
58
                                           'end'    => T_CLOSE_CURLY_BRACKET,
59
                                           'strict' => true,
60
                                           'shared' => false,
61
                                           'with'   => array(),
62
                                          ),
63
                            T_ELSE     => array(
64
                                           'start'  => T_OPEN_CURLY_BRACKET,
65
                                           'end'    => T_CLOSE_CURLY_BRACKET,
66
                                           'strict' => false,
67
                                           'shared' => false,
68
                                           'with'   => array(),
69
                                          ),
70
                            T_FOR      => array(
71
                                           'start'  => T_OPEN_CURLY_BRACKET,
72
                                           'end'    => T_CLOSE_CURLY_BRACKET,
73
                                           'strict' => false,
74
                                           'shared' => false,
75
                                           'with'   => array(),
76
                                          ),
77
                            T_FUNCTION => array(
78
                                           'start'  => T_OPEN_CURLY_BRACKET,
79
                                           'end'    => T_CLOSE_CURLY_BRACKET,
80
                                           'strict' => false,
81
                                           'shared' => false,
82
                                           'with'   => array(),
83
                                          ),
84
                            T_WHILE    => array(
85
                                           'start'  => T_OPEN_CURLY_BRACKET,
86
                                           'end'    => T_CLOSE_CURLY_BRACKET,
87
                                           'strict' => false,
88
                                           'shared' => false,
89
                                           'with'   => array(),
90
                                          ),
91
                            T_DO       => array(
92
                                           'start'  => T_OPEN_CURLY_BRACKET,
93
                                           'end'    => T_CLOSE_CURLY_BRACKET,
94
                                           'strict' => true,
95
                                           'shared' => false,
96
                                           'with'   => array(),
97
                                          ),
98
                            T_SWITCH   => array(
99
                                           'start'  => T_OPEN_CURLY_BRACKET,
100
                                           'end'    => T_CLOSE_CURLY_BRACKET,
101
                                           'strict' => true,
102
                                           'shared' => false,
103
                                           'with'   => array(),
104
                                          ),
105
                            T_CASE     => array(
106
                                           'start'  => T_COLON,
107
                                           'end'    => T_BREAK,
108
                                           'strict' => true,
109
                                           'shared' => true,
110
                                           'with'   => array(
111
                                                        T_DEFAULT,
112
                                                        T_CASE,
113
                                                        T_SWITCH,
114
                                                       ),
115
                                          ),
116
                            T_DEFAULT  => array(
117
                                           'start'  => T_COLON,
118
                                           'end'    => T_BREAK,
119
                                           'strict' => true,
120
                                           'shared' => true,
121
                                           'with'   => array(
122
                                                        T_CASE,
123
                                                        T_SWITCH,
124
                                                       ),
125
                                          ),
126
                           );
127
 
128
    /**
129
     * A list of tokens that end the scope.
130
     *
131
     * This array is just a unique collection of the end tokens
132
     * from the _scopeOpeners array. The data is duplicated here to
133
     * save time during parsing of the file.
134
     *
135
     * @var array
136
     */
137
    public $endScopeTokens = array(
138
                              T_CLOSE_CURLY_BRACKET,
139
                              T_BREAK,
140
                             );
141
 
142
    /**
143
     * A list of special JS tokens and their types.
144
     *
145
     * @var array
146
     */
147
    protected $tokenValues = array(
148
                              'function'  => 'T_FUNCTION',
149
                              'prototype' => 'T_PROTOTYPE',
150
                              'try'       => 'T_TRY',
151
                              'catch'     => 'T_CATCH',
152
                              'return'    => 'T_RETURN',
153
                              'break'     => 'T_BREAK',
154
                              'switch'    => 'T_SWITCH',
155
                              'continue'  => 'T_CONTINUE',
156
                              'if'        => 'T_IF',
157
                              'else'      => 'T_ELSE',
158
                              'do'        => 'T_DO',
159
                              'while'     => 'T_WHILE',
160
                              'for'       => 'T_FOR',
161
                              'var'       => 'T_VAR',
162
                              'case'      => 'T_CASE',
163
                              'default'   => 'T_DEFAULT',
164
                              'true'      => 'T_TRUE',
165
                              'false'     => 'T_FALSE',
166
                              'null'      => 'T_NULL',
167
                              'this'      => 'T_THIS',
168
                              '('         => 'T_OPEN_PARENTHESIS',
169
                              ')'         => 'T_CLOSE_PARENTHESIS',
170
                              '{'         => 'T_OPEN_CURLY_BRACKET',
171
                              '}'         => 'T_CLOSE_CURLY_BRACKET',
172
                              '['         => 'T_OPEN_SQUARE_BRACKET',
173
                              ']'         => 'T_CLOSE_SQUARE_BRACKET',
174
                              '?'         => 'T_INLINE_THEN',
175
                              '.'         => 'T_OBJECT_OPERATOR',
176
                              '+'         => 'T_PLUS',
177
                              '-'         => 'T_MINUS',
178
                              '*'         => 'T_MULTIPLY',
179
                              '%'         => 'T_MODULUS',
180
                              '/'         => 'T_DIVIDE',
181
                              ','         => 'T_COMMA',
182
                              ';'         => 'T_SEMICOLON',
183
                              ':'         => 'T_COLON',
184
                              '<'         => 'T_LESS_THAN',
185
                              '>'         => 'T_GREATER_THAN',
186
                              '<='        => 'T_IS_SMALLER_OR_EQUAL',
187
                              '>='        => 'T_IS_GREATER_OR_EQUAL',
188
                              '!'         => 'T_BOOLEAN_NOT',
189
                              '!='        => 'T_IS_NOT_EQUAL',
190
                              '!=='       => 'T_IS_NOT_IDENTICAL',
191
                              '='         => 'T_EQUAL',
192
                              '=='        => 'T_IS_EQUAL',
193
                              '==='       => 'T_IS_IDENTICAL',
194
                              '-='        => 'T_MINUS_EQUAL',
195
                              '+='        => 'T_PLUS_EQUAL',
196
                              '*='        => 'T_MUL_EQUAL',
197
                              '/='        => 'T_DIV_EQUAL',
198
                              '++'        => 'T_INC',
199
                              '--'        => 'T_DEC',
200
                              '//'        => 'T_COMMENT',
201
                              '/*'        => 'T_COMMENT',
202
                              '/**'       => 'T_DOC_COMMENT',
203
                              '*/'        => 'T_COMMENT',
204
                             );
205
 
206
    /**
207
     * A list string delimiters.
208
     *
209
     * @var array
210
     */
211
    protected $stringTokens = array(
212
                               '\'',
213
                               '"',
214
                              );
215
 
216
    /**
217
     * A list tokens that start and end comments.
218
     *
219
     * @var array
220
     */
221
    protected $commentTokens = array(
222
                                '//'  => null,
223
                                '/*'  => '*/',
224
                                '/**' => '*/',
225
                               );
226
 
227
 
228
    /**
229
     * Creates an array of tokens when given some PHP code.
230
     *
231
     * Starts by using token_get_all() but does a lot of extra processing
232
     * to insert information about the context of the token.
233
     *
234
     * @param string $string  The string to tokenize.
235
     * @param string $eolChar The EOL character to use for splitting strings.
236
     *
237
     * @return array
238
     */
239
    public function tokenizeString($string, $eolChar='\n')
240
    {
241
        $tokenTypes = array_keys($this->tokenValues);
242
 
243
        $maxTokenLength = 0;
244
        foreach ($tokenTypes as $token) {
245
            if (strlen($token) > $maxTokenLength) {
246
                $maxTokenLength = strlen($token);
247
            }
248
        }
249
 
250
        $tokens          = array();
251
        $inString        = '';
252
        $stringChar      = null;
253
        $inComment       = '';
254
        $buffer          = '';
255
        $preStringBuffer = '';
256
        $cleanBuffer     = false;
257
 
258
        $tokens[] = array(
259
                     'code'    => T_OPEN_TAG,
260
                     'type'    => 'T_OPEN_TAG',
261
                     'content' => '',
262
                    );
263
 
264
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
265
            echo "\t*** START TOKENIZING ***".PHP_EOL;
266
        }
267
 
268
        // Convert newlines to single characters for ease of
269
        // processing. We will change them back later.
270
        $string = str_replace($eolChar, "\n", $string);
271
 
272
        $chars    = str_split($string);
273
        $numChars = count($chars);
274
        for ($i = 0; $i < $numChars; $i++) {
275
            $char = $chars[$i];
276
 
277
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
278
                $content = str_replace("\n", '\n', $char);
279
                $bufferContent = str_replace("\n", '\n', $buffer);
280
                if ($inString !== '') {
281
                    echo "\t";
282
                }
283
 
284
                if ($inComment !== '') {
285
                    echo "\t";
286
                }
287
 
288
                echo "Process char $i => $content (buffer: $bufferContent)".PHP_EOL;
289
            }
290
 
291
            if ($inString === '' && $inComment === '' && $buffer !== '') {
292
                // If the buffer only has whitespace and we are about to
293
                // add a character, store the whitespace first.
294
                if (trim($char) !== '' && trim($buffer) === '') {
295
                    $tokens[] = array(
296
                                 'code'    => T_WHITESPACE,
297
                                 'type'    => 'T_WHITESPACE',
298
                                 'content' => str_replace("\n", $eolChar, $buffer),
299
                                );
300
 
301
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
302
                        $content = str_replace("\n", '\n', $buffer);
303
                        echo "=> Added token T_WHITESPACE ($content)".PHP_EOL;
304
                    }
305
 
306
                    $buffer = '';
307
                }
308
 
309
                // If the buffer is not whitespace and we are about to
310
                // add a whitespace character, store the content first.
311
                if ($inString === ''
312
                    && $inComment === ''
313
                    && trim($char) === ''
314
                    && trim($buffer) !== ''
315
                ) {
316
                    $tokens[] = array(
317
                                 'code'    => T_STRING,
318
                                 'type'    => 'T_STRING',
319
                                 'content' => str_replace("\n", $eolChar, $buffer),
320
                                );
321
 
322
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
323
                        $content = str_replace("\n", '\n', $buffer);
324
                        echo "=> Added token T_STRING ($content)".PHP_EOL;
325
                    }
326
 
327
                    $buffer = '';
328
                }
329
            }//end if
330
 
331
            // Process strings.
332
            if ($inComment === '' && in_array($char, $this->stringTokens) === true) {
333
                if ($inString === $char) {
334
                    // This could be the end of the string, but make sure it
335
                    // is not escaped first.
336
                    $escapes = 0;
337
                    for ($x = ($i - 1); $x >= 0; $x--) {
338
                        if ($chars[$x] !== '\\') {
339
                            break;
340
                        }
341
 
342
                        $escapes++;
343
                    }
344
 
345
                    if ($escapes === 0 || ($escapes % 2) === 0) {
346
                        // There is an even number escape chars,
347
                        // so this is not escaped, it is the end of the string.
348
                        $tokens[] = array(
349
                                     'code'    => T_CONSTANT_ENCAPSED_STRING,
350
                                     'type'    => 'T_CONSTANT_ENCAPSED_STRING',
351
                                     'content' => str_replace("\n", $eolChar, $buffer).$char,
352
                                    );
353
 
354
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
355
                            echo "\t* found end of string *".PHP_EOL;
356
                            $content = str_replace("\n", '\n', $buffer.$char);
357
                            echo "=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
358
                        }
359
 
360
                        $buffer          = '';
361
                        $preStringBuffer = '';
362
                        $inString        = '';
363
                        $stringChar      = null;
364
                        continue;
365
                    }
366
                } else if ($inString === '') {
367
                    $inString        = $char;
368
                    $stringChar      = $i;
369
                    $preStringBuffer = $buffer;
370
 
371
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
372
                        echo "\t* looking for string closer *".PHP_EOL;
373
                    }
374
                }//end if
375
            }//end if
376
 
377
            if ($inString !== '' && $char === "\n") {
378
                // Unless this newline character is escaped, the string did not
379
                // end before the end of the line, which means it probably
380
                // wasn't a string at all (maybe a regex).
381
                if ($chars[($i - 1)] !== '\\') {
382
                    $i               = $stringChar;
383
                    $buffer          = $preStringBuffer;
384
                    $preStringBuffer = '';
385
                    $inString        = '';
386
                    $stringChar      = null;
387
                    $char            = $chars[$i];
388
 
389
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
390
                        echo "\t* found newline before end of string, bailing *".PHP_EOL;
391
                    }
392
                }
393
            }
394
 
395
            $buffer .= $char;
396
 
397
            // We don't look for special tokens inside strings,
398
            // so if we are in a string, we can continue here now
399
            // that the current char is in the buffer.
400
            if ($inString !== '') {
401
                continue;
402
            }
403
 
404
            // Check for known tokens, but ignore tokens found that are not at
405
            // the end of a string, like FOR and this.FORmat.
406
            if (in_array(strtolower($buffer), $tokenTypes) === true
407
                && (preg_match('|[a-zA-z0-9_]|', $char) === 0
408
                || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
409
            ) {
410
                $matchedToken    = false;
411
                $lookAheadLength = ($maxTokenLength - strlen($buffer));
412
 
413
                if ($lookAheadLength > 0) {
414
                    // The buffer contains a token type, but we need
415
                    // to look ahead at the next chars to see if this is
416
                    // actually part of a larger token. For example,
417
                    // FOR and FOREACH.
418
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
419
                        echo "\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
420
                    }
421
 
422
                    $charBuffer = $buffer;
423
                    for ($x = 1; $x <= $lookAheadLength; $x++) {
424
                        if (isset($chars[($i + $x)]) === false) {
425
                            break;
426
                        }
427
 
428
                        $charBuffer .= $chars[($i + $x)];
429
 
430
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
431
                            $content = str_replace("\n", '\n', $charBuffer);
432
                            echo "\t=> Looking ahead $x chars => $content".PHP_EOL;
433
                        }
434
 
435
                        if (in_array(strtolower($charBuffer), $tokenTypes) === true) {
436
                            // We've found something larger that matches
437
                            // so we can ignore this char.
438
                            if (PHP_CODESNIFFER_VERBOSITY > 1) {
439
                                $type = $this->tokenValues[strtolower($charBuffer)];
440
                                echo "\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
441
                            }
442
 
443
                            $matchedToken = true;
444
                            break;
445
                        }
446
                    }//end for
447
                }//end if
448
 
449
                if ($matchedToken === false) {
450
                    if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
451
                        echo "\t* look ahead found nothing *".PHP_EOL;
452
                    }
453
 
454
                    // Special case for T_DIVIDE which can actually be
455
                    // the start of a regular expression.
456
                    $foundRegex = false;
457
                    if ($char === '/') {
458
                        $beforeTokens = array(
459
                                         T_EQUAL,
460
                                         T_OPEN_PARENTHESIS,
461
                                         T_RETURN,
462
                                        );
463
 
464
                        $afterTokens = array(
465
                                         ',',
466
                                         ')',
467
                                         ';',
468
                                         ' ',
469
                                         '.',
470
                                        );
471
 
472
                        // Find the last non-whitespace token that was added
473
                        // to the tokens array.
474
                        $numTokens = count($tokens);
475
                        for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
476
                            if (in_array($tokens[$prev]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
477
                                break;
478
                            }
479
                        }
480
 
481
                        if (in_array($tokens[$prev]['code'], $beforeTokens) === true) {
482
                            // This is probably a regular expression,
483
                            // so look for the end of it.
484
                            if (PHP_CODESNIFFER_VERBOSITY > 1) {
485
                                $content = str_replace("\n", '\n', $char);
486
                                echo "\t* token possibly starts a regular expression *".PHP_EOL;
487
                            }
488
 
489
                            for ($next = ($i + 1); $next < $numChars; $next++) {
490
                                if ($chars[$next] === '/') {
491
                                    // Just make sure this is not escaped first.
492
                                    if ($chars[($next - 1)] !== '\\') {
493
                                        break;
494
                                    }
495
                                } else {
496
                                    $possiblEolChar = substr($string, $next, strlen($eolChar));
497
                                    if ($possiblEolChar === $eolChar) {
498
                                        // This is the last token on the line and regular
499
                                        // expressions need to be defined on a single line,
500
                                        // so this is not a regular expression.
501
                                        break;
502
                                    }
503
                                }
504
                            }
505
 
506
                            if ($chars[$next] === '/') {
507
                                while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
508
                                    // The token directly after the end of the regex can
509
                                    // be modifiers like global and case insensitive
510
                                    // (.e.g, /pattern/gi).
511
                                    $next++;
512
                                }
513
 
514
                                $regexEnd = $next;
515
 
516
                                for ($next = ($next + 1); $next < $numChars; $next++) {
517
                                    if ($chars[$next] !== ' ') {
518
                                        break;
519
                                    } else {
520
                                        $possiblEolChar = substr($string, $next, strlen($eolChar));
521
                                        if ($possiblEolChar === $eolChar) {
522
                                            // This is the last token on the line.
523
                                            break;
524
                                        }
525
                                    }
526
                                }
527
 
528
                                if (in_array($chars[$next], $afterTokens) === true) {
529
                                    // This is a regular expression, so join all the
530
                                    // tokens together.
531
                                    $content = '';
532
                                    for ($x = $i; $x <= $regexEnd; $x++) {
533
                                        $content .= $chars[$x];
534
                                    }
535
 
536
                                    $tokens[] = array(
537
                                                 'code'    => T_REGULAR_EXPRESSION,
538
                                                 'type'    => 'T_REGULAR_EXPRESSION',
539
                                                 'content' => $content,
540
                                                );
541
 
542
                                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
543
                                        $content = str_replace("\n", '\n', $content);
544
                                        echo "=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
545
                                    }
546
 
547
                                    $i          = $regexEnd;
548
                                    $foundRegex = true;
549
                                }
550
                            }//end if
551
                        }//end if
552
                    }//end if
553
 
554
                    if ($foundRegex === false) {
555
                        $value    = $this->tokenValues[strtolower($buffer)];
556
                        $tokens[] = array(
557
                                     'code'    => constant($value),
558
                                     'type'    => $value,
559
                                     'content' => $buffer,
560
                                    );
561
 
562
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
563
                            $content = str_replace("\n", '\n', $buffer);
564
                            echo "=> Added token $value ($content)".PHP_EOL;
565
                        }
566
                    }
567
 
568
                    $cleanBuffer = true;
569
                }
570
            } else if (in_array(strtolower($char), $tokenTypes) === true) {
571
                // No matter what token we end up using, we don't
572
                // need the content in the buffer any more because we have
573
                // found a valid token.
574
                $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
575
                if ($newContent !== '') {
576
                    $tokens[] = array(
577
                                 'code'    => T_STRING,
578
                                 'type'    => 'T_STRING',
579
                                 'content' => $newContent,
580
                                );
581
 
582
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
583
                        $content = str_replace("\n", '\n', substr($buffer, 0, -1));
584
                        echo "=> Added token T_STRING ($content)".PHP_EOL;
585
                    }
586
                }
587
 
588
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
589
                    echo "\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
590
                }
591
 
592
                // The char is a token type, but we need to look ahead at the
593
                // next chars to see if this is actually part of a larger token.
594
                // For example, = and ===.
595
                $charBuffer   = $char;
596
                $matchedToken = false;
597
                for ($x = 1; $x <= $maxTokenLength; $x++) {
598
                    if (isset($chars[($i + $x)]) === false) {
599
                        break;
600
                    }
601
 
602
                    $charBuffer .= $chars[($i + $x)];
603
 
604
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
605
                        $content = str_replace("\n", '\n', $charBuffer);
606
                        echo "\t=> Looking ahead $x chars => $content".PHP_EOL;
607
                    }
608
 
609
                    if (in_array(strtolower($charBuffer), $tokenTypes) === true) {
610
                        // We've found something larger that matches
611
                        // so we can ignore this char.
612
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
613
                            $type = $this->tokenValues[strtolower($charBuffer)];
614
                            echo "\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
615
                        }
616
 
617
                        $matchedToken = true;
618
                        break;
619
                    }
620
                }//end for
621
 
622
                if ($matchedToken === false) {
623
                    $value    = $this->tokenValues[strtolower($char)];
624
                    $tokens[] = array(
625
                                 'code'    => constant($value),
626
                                 'type'    => $value,
627
                                 'content' => $char,
628
                                );
629
 
630
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
631
                        echo "\t* look ahead found nothing *".PHP_EOL;
632
                        $content = str_replace("\n", '\n', $char);
633
                        echo "=> Added token $value ($content)".PHP_EOL;
634
                    }
635
 
636
                    $cleanBuffer = true;
637
                } else {
638
                    $buffer = $char;
639
                }
640
            }//end if
641
 
642
            // Keep track of content inside comments.
643
            if ($inComment === ''
644
                && array_key_exists($buffer, $this->commentTokens) === true
645
            ) {
646
                // This is not really a comment if the content
647
                // looks like \// (i.e., it is escaped).
648
                if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
649
                    $lastToken   = array_pop($tokens);
650
                    $lastContent = $lastToken['content'];
651
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
652
                        $value   = $this->tokenValues[strtolower($lastContent)];
653
                        $content = str_replace("\n", '\n', $lastContent);
654
                        echo "=> Removed token $value ($content)".PHP_EOL;
655
                    }
656
 
657
                    $lastChars    = str_split($lastContent);
658
                    $lastNumChars = count($lastChars);
659
                    for ($x = 0; $x < $lastNumChars; $x++) {
660
                        $lastChar = $lastChars[$x];
661
                        $value    = $this->tokenValues[strtolower($lastChar)];
662
                        $tokens[] = array(
663
                                     'code'    => constant($value),
664
                                     'type'    => $value,
665
                                     'content' => $lastChar,
666
                                    );
667
 
668
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
669
                            $content = str_replace("\n", '\n', $lastChar);
670
                            echo "=> Added token $value ($content)".PHP_EOL;
671
                        }
672
                    }
673
                } else {
674
                    // We have started a comment.
675
                    $inComment = $buffer;
676
 
677
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
678
                        echo "\t* looking for end of comment *".PHP_EOL;
679
                    }
680
                }
681
            } else if ($inComment !== '') {
682
                if ($this->commentTokens[$inComment] === null) {
683
                    // Comment ends at the next newline.
684
                    if (strpos($buffer, "\n") !== false) {
685
                        $inComment = '';
686
                    }
687
                } else {
688
                    if ($this->commentTokens[$inComment] === $buffer) {
689
                        $inComment = '';
690
                    }
691
                }
692
 
693
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
694
                    if ($inComment === '') {
695
                        echo "\t* found end of comment *".PHP_EOL;
696
                    }
697
                }
698
 
699
                if ($inComment === '' && $cleanBuffer === false) {
700
                    $tokens[] = array(
701
                                 'code'    => T_STRING,
702
                                 'type'    => 'T_STRING',
703
                                 'content' => str_replace("\n", $eolChar, $buffer),
704
                                );
705
 
706
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
707
                        $content = str_replace("\n", '\n', $buffer);
708
                        echo "=> Added token T_STRING ($content)".PHP_EOL;
709
                    }
710
 
711
                    $buffer = '';
712
                }
713
            }//end if
714
 
715
            if ($cleanBuffer === true) {
716
                $buffer      = '';
717
                $cleanBuffer = false;
718
            }
719
        }//end foreach
720
 
721
        // Trim the last newline off the end of the buffer before
722
        // adding it's contents to the token stack.
723
        // This is so we don't count the very final newline of a file.
724
        $buffer = substr($buffer, 0, -1);
725
 
726
        if (empty($buffer) === false) {
727
            // Buffer contians whitespace from the end of the file, and not
728
            // just the final newline.
729
            $tokens[] = array(
730
                         'code'    => T_WHITESPACE,
731
                         'type'    => 'T_WHITESPACE',
732
                         'content' => str_replace("\n", $eolChar, $buffer),
733
                        );
734
 
735
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
736
                $content = str_replace($eolChar, '\n', $buffer);
737
                echo "=> Added token T_WHITESPACE ($content)".PHP_EOL;
738
            }
739
        }
740
 
741
        $tokens[] = array(
742
                     'code'    => T_CLOSE_TAG,
743
                     'type'    => 'T_CLOSE_TAG',
744
                     'content' => '',
745
                    );
746
 
747
        /*
748
            Now that we have done some basic tokenizing, we need to
749
            modify the tokens to join some together and split some apart
750
            so they match what the PHP tokenizer does.
751
        */
752
 
753
        $finalTokens = array();
754
        $newStackPtr = 0;
755
        $numTokens   = count($tokens);
756
        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
757
            $token = $tokens[$stackPtr];
758
 
759
            /*
760
                Look for comments and join the tokens together.
761
            */
762
 
763
            if (array_key_exists($token['content'], $this->commentTokens) === true) {
764
                $newContent   = '';
765
                $tokenContent = $token['content'];
766
                $endContent   = $this->commentTokens[$tokenContent];
767
                while ($tokenContent !== $endContent) {
768
                    if ($endContent === null
769
                        && strpos($tokenContent, $eolChar) !== false
770
                    ) {
771
                        // A null end token means the comment ends at the end of
772
                        // the line so we look for newlines and split the token.
773
                        $tokens[$stackPtr]['content'] = substr(
774
                            $tokenContent,
775
                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
776
                        );
777
 
778
                        $tokenContent = substr(
779
                            $tokenContent,
780
                            0,
781
                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
782
                        );
783
 
784
                        // If the substr failed, skip the token as the content
785
                        // will now be blank.
786
                        if ($tokens[$stackPtr]['content'] !== false) {
787
                            $stackPtr--;
788
                        }
789
 
790
                        break;
791
                    }//end if
792
 
793
                    $stackPtr++;
794
                    $newContent  .= $tokenContent;
795
                    if (isset($tokens[$stackPtr]) === false) {
796
                        break;
797
                    }
798
 
799
                    $tokenContent = $tokens[$stackPtr]['content'];
800
                }//end while
801
 
802
                // Save the new content in the current token so
803
                // the code below can chop it up on newlines.
804
                $token['content'] = $newContent.$tokenContent;
805
            }//end if
806
 
807
            /*
808
                If this token has newlines in its content, split each line up
809
                and create a new token for each line. We do this so it's easier
810
                to asertain where errors occur on a line.
811
                Note that $token[1] is the token's content.
812
            */
813
 
814
            if (strpos($token['content'], $eolChar) !== false) {
815
                $tokenLines = explode($eolChar, $token['content']);
816
                $numLines   = count($tokenLines);
817
 
818
                for ($i = 0; $i < $numLines; $i++) {
819
                    $newToken['content'] = $tokenLines[$i];
820
                    if ($i === ($numLines - 1)) {
821
                        if ($tokenLines[$i] === '') {
822
                            break;
823
                        }
824
                    } else {
825
                        $newToken['content'] .= $eolChar;
826
                    }
827
 
828
                    $newToken['type']          = $token['type'];
829
                    $newToken['code']          = $token['code'];
830
                    $finalTokens[$newStackPtr] = $newToken;
831
                    $newStackPtr++;
832
                }
833
            } else {
834
                $finalTokens[$newStackPtr] = $token;
835
                $newStackPtr++;
836
            }//end if
837
 
838
            // Convert numbers, including decimals.
839
            if ($token['code'] === T_STRING
840
                || $token['code'] === T_OBJECT_OPERATOR
841
            ) {
842
                $newContent  = '';
843
                $oldStackPtr = $stackPtr;
844
                while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
845
                    $newContent .= $tokens[$stackPtr]['content'];
846
                    $stackPtr++;
847
                }
848
 
849
                if ($newContent !== '' && $newContent !== '.') {
850
                    $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
851
                    if (ctype_digit($newContent) === true) {
852
                        $finalTokens[($newStackPtr - 1)]['code']
853
                            = constant('T_LNUMBER');
854
                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
855
                    } else {
856
                        $finalTokens[($newStackPtr - 1)]['code']
857
                            = constant('T_DNUMBER');
858
                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
859
                    }
860
 
861
                    $stackPtr--;
862
                } else {
863
                    $stackPtr = $oldStackPtr;
864
                }
865
            }//end if
866
        }//end for
867
 
868
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
869
            echo "\t*** END TOKENIZING ***".PHP_EOL;
870
        }
871
 
872
        return $finalTokens;
873
 
874
    }//end tokenizeString()
875
 
876
 
877
    /**
878
     * Performs additional processing after main tokenizing.
879
     *
880
     * This additional processing looks for properties, labels and objects.
881
     *
882
     * @param array  &$tokens The array of tokens to process.
883
     * @param string $eolChar The EOL character to use for splitting strings.
884
     *
885
     * @return void
886
     */
887
    public function processAdditional(&$tokens, $eolChar)
888
    {
889
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
890
            echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
891
        }
892
 
893
        $numTokens  = count($tokens);
894
        $classStack = array();
895
 
896
        for ($i = 0; $i < $numTokens; $i++) {
897
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
898
                $type    = $tokens[$i]['type'];
899
                $content = str_replace($eolChar, '\n', $tokens[$i]['content']);
900
                echo str_repeat("\t", count($classStack));
901
 
902
                echo "\tProcess token $i: $type => $content".PHP_EOL;
903
            }
904
 
905
            if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
906
                && isset($tokens[$i]['scope_condition']) === false
907
            ) {
908
                $classStack[] = $i;
909
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
910
                    echo str_repeat("\t", count($classStack));
911
                    echo "\t=> Found property opener".PHP_EOL;
912
                }
913
 
914
                // This could also be an object definition.
915
                for ($x = ($i - 1); $x >= 0; $x--) {
916
                    if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
917
                        // Non-whitespace content.
918
                        break;
919
                    }
920
                }
921
 
922
                if ($tokens[$x]['code'] === T_EQUAL) {
923
                    for ($x--; $x >= 0; $x--) {
924
                        if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
925
                            break;
926
                        }
927
                    }
928
 
929
                    if ($tokens[$x]['code'] === T_STRING
930
                        || $tokens[$x]['code'] === T_PROTOTYPE
931
                    ) {
932
                        // Find the first string in this definition.
933
                        // E.g., WantedString.DontWantThis.prototype
934
                        for ($x--; $x >= 0; $x--) {
935
                            $wantedTokens = array(
936
                                             T_STRING,
937
                                             T_PROTOTYPE,
938
                                             T_OBJECT_OPERATOR,
939
                                            );
940
 
941
                            if (in_array($tokens[$x]['code'], $wantedTokens) === false) {
942
                                $x++;
943
                                break;
944
                            }
945
                        }
946
 
947
                        $closer = $tokens[$i]['bracket_closer'];
948
                        $tokens[$i]['scope_condition']      = $x;
949
                        $tokens[$i]['scope_closer']         = $closer;
950
                        $tokens[$closer]['scope_condition'] = $x;
951
                        $tokens[$closer]['scope_opener']    = $i;
952
                        $tokens[$x]['scope_opener']         = $i;
953
                        $tokens[$x]['scope_closer']         = $closer;
954
                        $tokens[$x]['code']                 = T_OBJECT;
955
                        $tokens[$x]['type']                 = 'T_OBJECT';
956
 
957
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
958
                            echo str_repeat("\t", count($classStack));
959
                            echo "\t* token $x converted from T_STRING to T_OBJECT *".PHP_EOL;
960
                            echo str_repeat("\t", count($classStack));
961
                            echo "\t* set scope opener ($i) and closer ($closer) for token $x *".PHP_EOL;
962
                        }
963
                    }//end if
964
                }//end if
965
            } else if ($tokens[$i]['code'] === T_CLOSE_CURLY_BRACKET
966
                && (isset($tokens[$i]['scope_condition']) === false
967
                || $tokens[$tokens[$i]['scope_condition']]['code'] === T_OBJECT)
968
            ) {
969
                $opener = array_pop($classStack);
970
 
971
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
972
                    echo str_repeat("\t", count($classStack));
973
                    echo "\t\t=> Found property closer for $opener".PHP_EOL;
974
                }
975
            } else if ($tokens[$i]['code'] === T_COLON) {
976
                // If it is a scope opener, it belongs to a
977
                // DEFAULT or CASE statement.
978
                if (isset($tokens[$i]['scope_condition']) === true) {
979
                    continue;
980
                }
981
 
982
                // Make sure this is not part of an inline IF statement.
983
                for ($x = ($i - 1); $x >= 0; $x--) {
984
                    if ($tokens[$x]['code'] === T_INLINE_THEN) {
985
                        continue(2);
986
                    } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
987
                        break;
988
                    }
989
                }
990
 
991
                // The string to the left of the colon is either a property or label.
992
                for ($label = ($i - 1); $label >= 0; $label--) {
993
                    if (in_array($tokens[$label]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
994
                        break;
995
                    }
996
                }
997
 
998
                if ($tokens[$label]['code'] !== T_STRING) {
999
                    continue;
1000
                }
1001
 
1002
                if (empty($classStack) === false) {
1003
                    $tokens[$label]['code'] = T_PROPERTY;
1004
                    $tokens[$label]['type'] = 'T_PROPERTY';
1005
 
1006
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1007
                        echo str_repeat("\t", count($classStack));
1008
                        echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
1009
                    }
1010
 
1011
                    // If the net token after the colon is a curly brace,
1012
                    // this property is actually an object, so we can give it
1013
                    // and opener and closer.
1014
                    for ($x = ($i + 1); $x < $numTokens; $x++) {
1015
                        if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
1016
                            break;
1017
                        }
1018
                    }
1019
 
1020
                    if ($tokens[$x]['code'] === T_OPEN_CURLY_BRACKET) {
1021
                        $closer = $tokens[$x]['bracket_closer'];
1022
                        $tokens[$label]['scope_opener']     = $x;
1023
                        $tokens[$label]['scope_closer']     = $closer;
1024
                        $tokens[$x]['scope_condition']      = $label;
1025
                        $tokens[$x]['scope_closer']         = $closer;
1026
                        $tokens[$closer]['scope_condition'] = $label;
1027
                        $tokens[$closer]['scope_opener']    = $x;
1028
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1029
                            echo str_repeat("\t", count($classStack));
1030
                            echo "\t* set scope opener ($x) and closer ($closer) for token $label *".PHP_EOL;
1031
                        }
1032
                    }
1033
                } else {
1034
                    $tokens[$label]['code'] = T_LABEL;
1035
                    $tokens[$label]['type'] = 'T_LABEL';
1036
 
1037
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
1038
                        echo str_repeat("\t", count($classStack));
1039
                        echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
1040
                    }
1041
                }
1042
            }//end if
1043
        }//end for
1044
 
1045
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
1046
            echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
1047
        }
1048
 
1049
    }//end processAdditional()
1050
 
1051
 
1052
}//end class
1053
 
1054
?>