Subversion-Projekte lars-tiefland.prado

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
/* vim: set expandtab tabstop=4 shiftwidth=4: */
3
//
4
// +----------------------------------------------------------------------+
5
// | PHP Version 4                                                        |
6
// +----------------------------------------------------------------------+
7
// | Copyright (c) 1997-2002 The PHP Group                                |
8
// +----------------------------------------------------------------------+
9
// | This source file is subject to version 2.02 of the PHP license,      |
10
// | that is bundled with this package in the file LICENSE, and is        |
11
// | available at through the world-wide-web at                           |
12
// | http://www.php.net/license/3_0.txt.                                  |
13
// | If you did not receive a copy of the PHP license and are unable to   |
14
// | obtain it through the world-wide-web, please send a note to          |
15
// | license@php.net so we can mail you a copy immediately.               |
16
// +----------------------------------------------------------------------+
17
// | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python |
18
// | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more  |
19
// | Authors: Many @ Sitepointforums Advanced PHP Forums                  |
20
// +----------------------------------------------------------------------+
21
//
22
// $Id: HTMLSax3.php 1398 2006-09-08 19:31:03Z xue $
23
//
24
/**
25
* Main parser components
26
* @package    System.Security.SafeHtml
27
* @version $Id: HTMLSax3.php 1398 2006-09-08 19:31:03Z xue $
28
*/
29
/**
30
* Required classes
31
*/
32
 
33
require_once(dirname(__FILE__).'/HTMLSax3/States.php');
34
require_once(dirname(__FILE__).'/HTMLSax3/Decorators.php');
35
 
36
/**
37
* Base State Parser
38
* @package System.Security.SafeHtml
39
* @access protected
40
* @abstract
41
*/
42
class TSax3_StateParser {
43
    /**
44
    * Instance of user front end class to be passed to callbacks
45
    * @var TSax3
46
    * @access private
47
    */
48
    public $htmlsax;
49
    /**
50
    * User defined object for handling elements
51
    * @var object
52
    * @access private
53
    */
54
    public $handler_object_element;
55
    /**
56
    * User defined open tag handler method
57
    * @var string
58
    * @access private
59
    */
60
    public $handler_method_opening;
61
    /**
62
    * User defined close tag handler method
63
    * @var string
64
    * @access private
65
    */
66
    public $handler_method_closing;
67
    /**
68
    * User defined object for handling data in elements
69
    * @var object
70
    * @access private
71
    */
72
    public $handler_object_data;
73
    /**
74
    * User defined data handler method
75
    * @var string
76
    * @access private
77
    */
78
    public $handler_method_data;
79
    /**
80
    * User defined object for handling processing instructions
81
    * @var object
82
    * @access private
83
    */
84
    public $handler_object_pi;
85
    /**
86
    * User defined processing instruction handler method
87
    * @var string
88
    * @access private
89
    */
90
    public $handler_method_pi;
91
    /**
92
    * User defined object for handling JSP/ASP tags
93
    * @var object
94
    * @access private
95
    */
96
    public $handler_object_jasp;
97
    /**
98
    * User defined JSP/ASP handler method
99
    * @var string
100
    * @access private
101
    */
102
    public $handler_method_jasp;
103
    /**
104
    * User defined object for handling XML escapes
105
    * @var object
106
    * @access private
107
    */
108
    public $handler_object_escape;
109
    /**
110
    * User defined XML escape handler method
111
    * @var string
112
    * @access private
113
    */
114
    public $handler_method_escape;
115
    /**
116
    * User defined handler object or NullHandler
117
    * @var object
118
    * @access private
119
    */
120
    public $handler_default;
121
    /**
122
    * Parser options determining parsing behavior
123
    * @var array
124
    * @access private
125
    */
126
    protected $parser_options = array();
127
    /**
128
    * XML document being parsed
129
    * @var string
130
    * @access private
131
    */
132
    protected $rawtext;
133
    /**
134
    * Position in XML document relative to start (0)
135
    * @var int
136
    * @access private
137
    */
138
    protected $position;
139
    /**
140
    * Length of the XML document in characters
141
    * @var int
142
    * @access private
143
    */
144
    protected $length;
145
    /**
146
    * Array of state objects
147
    * @var array
148
    * @access private
149
    */
150
    protected $State = array();
151
 
152
	const TSAX3_STATE_STOP = 0;
153
	const TSAX3_STATE_START = 1;
154
	const TSAX3_STATE_TAG = 2;
155
	const TSAX3_STATE_OPENING_TAG = 3;
156
	const TSAX3_STATE_CLOSING_TAG = 4;
157
	const TSAX3_STATE_ESCAPE = 6;
158
	const TSAX3_STATE_JASP = 7;
159
	const TSAX3_STATE_PI = 8;
160
 
161
    /**
162
    * Constructs TSax3_StateParser setting up states
163
    * @var TSax3 instance of user front end class
164
    * @access protected
165
    */
166
    protected function __construct($htmlsax) {
167
        $this->htmlsax = $htmlsax;
168
        $this->State[self::TSAX3_STATE_START] = new TSax3_StartingState();
169
 
170
        $this->State[self::TSAX3_STATE_CLOSING_TAG] = new TSax3_ClosingTagState();
171
        $this->State[self::TSAX3_STATE_TAG] = new TSax3_TagState();
172
        $this->State[self::TSAX3_STATE_OPENING_TAG] = new TSax3_OpeningTagState();
173
 
174
        $this->State[self::TSAX3_STATE_PI] = new TSax3_PiState();
175
        $this->State[self::TSAX3_STATE_JASP] = new TSax3_JaspState();
176
        $this->State[self::TSAX3_STATE_ESCAPE] = new TSax3_EscapeState();
177
    }
178
 
179
    /**
180
    * Moves the position back one character
181
    * @access protected
182
    * @return void
183
    */
184
    function unscanCharacter() {
185
        $this->position -= 1;
186
    }
187
 
188
    /**
189
    * Moves the position forward one character
190
    * @access protected
191
    * @return void
192
    */
193
    function ignoreCharacter() {
194
        $this->position += 1;
195
    }
196
 
197
    /**
198
    * Returns the next character from the XML document or void if at end
199
    * @access protected
200
    * @return mixed
201
    */
202
    function scanCharacter() {
203
        if ($this->position < $this->length) {
204
            return $this->rawtext{$this->position++};
205
        }
206
    }
207
 
208
    /**
209
    * Returns a string from the current position to the next occurance
210
    * of the supplied string
211
    * @param string string to search until
212
    * @access protected
213
    * @return string
214
    */
215
    function scanUntilString($string) {
216
        $start = $this->position;
217
        $this->position = strpos($this->rawtext, $string, $start);
218
        if ($this->position === FALSE) {
219
            $this->position = $this->length;
220
        }
221
        return substr($this->rawtext, $start, $this->position - $start);
222
    }
223
 
224
    /**
225
    * Returns a string from the current position until the first instance of
226
    * one of the characters in the supplied string argument
227
    * @param string string to search until
228
    * @access protected
229
    * @return string
230
    * @abstract
231
    */
232
    function scanUntilCharacters($string) {}
233
 
234
    /**
235
    * Moves the position forward past any whitespace characters
236
    * @access protected
237
    * @return void
238
    * @abstract
239
    */
240
    function ignoreWhitespace() {}
241
 
242
    /**
243
    * Begins the parsing operation, setting up any decorators, depending on
244
    * parse options invoking _parse() to execute parsing
245
    * @param string XML document to parse
246
    * @access protected
247
    * @return void
248
    */
249
    function parse($data) {
250
        if ($this->parser_options['XML_OPTION_TRIM_DATA_NODES']==1) {
251
            $decorator = new TSax3_Trim(
252
                $this->handler_object_data,
253
                $this->handler_method_data);
254
            $this->handler_object_data =& $decorator;
255
            $this->handler_method_data = 'trimData';
256
        }
257
        if ($this->parser_options['XML_OPTION_CASE_FOLDING']==1) {
258
            $open_decor = new TSax3_CaseFolding(
259
                $this->handler_object_element,
260
                $this->handler_method_opening,
261
                $this->handler_method_closing);
262
            $this->handler_object_element =& $open_decor;
263
            $this->handler_method_opening ='foldOpen';
264
            $this->handler_method_closing ='foldClose';
265
        }
266
        if ($this->parser_options['XML_OPTION_LINEFEED_BREAK']==1) {
267
            $decorator = new TSax3_Linefeed(
268
                $this->handler_object_data,
269
                $this->handler_method_data);
270
            $this->handler_object_data =& $decorator;
271
            $this->handler_method_data = 'breakData';
272
        }
273
        if ($this->parser_options['XML_OPTION_TAB_BREAK']==1) {
274
            $decorator = new TSax3_Tab(
275
                $this->handler_object_data,
276
                $this->handler_method_data);
277
            $this->handler_object_data =& $decorator;
278
            $this->handler_method_data = 'breakData';
279
        }
280
        if ($this->parser_options['XML_OPTION_ENTITIES_UNPARSED']==1) {
281
            $decorator = new TSax3_Entities_Unparsed(
282
                $this->handler_object_data,
283
                $this->handler_method_data);
284
            $this->handler_object_data =& $decorator;
285
            $this->handler_method_data = 'breakData';
286
        }
287
        if ($this->parser_options['XML_OPTION_ENTITIES_PARSED']==1) {
288
            $decorator = new TSax3_Entities_Parsed(
289
                $this->handler_object_data,
290
                $this->handler_method_data);
291
            $this->handler_object_data =& $decorator;
292
            $this->handler_method_data = 'breakData';
293
        }
294
        // Note switched on by default
295
        if ($this->parser_options['XML_OPTION_STRIP_ESCAPES']==1) {
296
            $decorator = new TSax3_Escape_Stripper(
297
                $this->handler_object_escape,
298
                $this->handler_method_escape);
299
            $this->handler_object_escape =& $decorator;
300
            $this->handler_method_escape = 'strip';
301
        }
302
        $this->rawtext = $data;
303
        $this->length = strlen($data);
304
        $this->position = 0;
305
        $this->_parse();
306
    }
307
 
308
    /**
309
    * Performs the parsing itself, delegating calls to a specific parser
310
    * state
311
    * @param constant state object to parse with
312
    * @access protected
313
    * @return void
314
    */
315
    function _parse($state = self::TSAX3_STATE_START) {
316
        do {
317
            $state = $this->State[$state]->parse($this);
318
        } while ($state != self::TSAX3_STATE_STOP &&
319
                    $this->position < $this->length);
320
    }
321
}
322
 
323
/**
324
* Parser for PHP Versions below 4.3.0. Uses a slower parsing mechanism than
325
* the equivalent PHP 4.3.0+  subclass of StateParser
326
* @package System.Security.SafeHtml
327
* @access protected
328
* @see TSax3_StateParser_Gtet430
329
*/
330
class TSax3_StateParser_Lt430 extends TSax3_StateParser {
331
    /**
332
    * Constructs TSax3_StateParser_Lt430 defining available
333
    * parser options
334
    * @var TSax3 instance of user front end class
335
    * @access protected
336
    */
337
    function __construct(& $htmlsax) {
338
        parent::__construct($htmlsax);
339
        $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0;
340
        $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0;
341
        $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0;
342
        $this->parser_options['XML_OPTION_TAB_BREAK'] = 0;
343
        $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0;
344
        $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0;
345
        $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0;
346
		//var_dump($this->parser_options);
347
    }
348
 
349
    /**
350
    * Returns a string from the current position until the first instance of
351
    * one of the characters in the supplied string argument
352
    * @param string string to search until
353
    * @access protected
354
    * @return string
355
    */
356
    function scanUntilCharacters($string) {
357
        $startpos = $this->position;
358
        while ($this->position < $this->length && strpos($string, $this->rawtext{$this->position}) === FALSE) {
359
            $this->position++;
360
        }
361
        return substr($this->rawtext, $startpos, $this->position - $startpos);
362
    }
363
 
364
    /**
365
    * Moves the position forward past any whitespace characters
366
    * @access protected
367
    * @return void
368
    */
369
    function ignoreWhitespace() {
370
        while ($this->position < $this->length &&
371
            strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE) {
372
            $this->position++;
373
        }
374
    }
375
 
376
    /**
377
    * Begins the parsing operation, setting up the unparsed XML entities
378
    * decorator if necessary then delegating further work to parent
379
    * @param string XML document to parse
380
    * @access protected
381
    * @return void
382
    */
383
    function parse($data) {
384
        parent::parse($data);
385
    }
386
}
387
 
388
/**
389
* Parser for PHP Versions equal to or greater than 4.3.0. Uses a faster
390
* parsing mechanism than the equivalent PHP < 4.3.0 subclass of StateParser
391
* @package System.Security.SafeHtml
392
* @access protected
393
* @see TSax3_StateParser_Lt430
394
*/
395
class TSax3_StateParser_Gtet430 extends TSax3_StateParser {
396
    /**
397
    * Constructs TSax3_StateParser_Gtet430 defining available
398
    * parser options
399
    * @var TSax3 instance of user front end class
400
    * @access protected
401
    */
402
    function __construct(& $htmlsax) {
403
        parent::__construct($htmlsax);
404
        $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0;
405
        $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0;
406
        $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0;
407
        $this->parser_options['XML_OPTION_TAB_BREAK'] = 0;
408
        $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0;
409
        $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0;
410
        $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0;
411
    }
412
    /**
413
    * Returns a string from the current position until the first instance of
414
    * one of the characters in the supplied string argument.
415
    * @param string string to search until
416
    * @access protected
417
    * @return string
418
    */
419
    function scanUntilCharacters($string) {
420
        $startpos = $this->position;
421
        $length = strcspn($this->rawtext, $string, $startpos);
422
        $this->position += $length;
423
        return substr($this->rawtext, $startpos, $length);
424
    }
425
 
426
    /**
427
    * Moves the position forward past any whitespace characters
428
    * @access protected
429
    * @return void
430
    */
431
    function ignoreWhitespace() {
432
        $this->position += strspn($this->rawtext, " \n\r\t", $this->position);
433
    }
434
 
435
    /**
436
    * Begins the parsing operation, setting up the parsed and unparsed
437
    * XML entity decorators if necessary then delegating further work
438
    * to parent
439
    * @param string XML document to parse
440
    * @access protected
441
    * @return void
442
    */
443
    function parse($data) {
444
        parent::parse($data);
445
    }
446
}
447
 
448
/**
449
* Default NullHandler for methods which were not set by user
450
* @package System.Security.SafeHtml
451
* @access protected
452
*/
453
class TSax3_NullHandler {
454
    /**
455
    * Generic handler method which does nothing
456
    * @access protected
457
    * @return void
458
    */
459
    function DoNothing() {
460
    }
461
}
462
 
463
/**
464
* User interface class. All user calls should only be made to this class
465
* @package System.Security.SafeHtml
466
* @access public
467
*/
468
class TSax3 {
469
    /**
470
    * Instance of concrete subclass of TSax3_StateParser
471
    * @var TSax3_StateParser
472
    * @access private
473
    */
474
    private $state_parser;
475
 
476
    /**
477
    * Constructs TSax3 selecting concrete StateParser subclass
478
    * depending on PHP version being used as well as setting the default
479
    * NullHandler for all callbacks<br />
480
    * <b>Example:</b>
481
    * <pre>
482
    * $myHandler = & new MyHandler();
483
    * $parser = new TSax3();
484
    * $parser->set_object($myHandler);
485
    * $parser->set_option('XML_OPTION_CASE_FOLDING');
486
    * $parser->set_element_handler('myOpenHandler','myCloseHandler');
487
    * $parser->set_data_handler('myDataHandler');
488
    * $parser->parser($xml);
489
    * </pre>
490
    * @access public
491
    */
492
    function __construct() {
493
        if (version_compare(phpversion(), '4.3', 'ge')) {
494
            $this->state_parser = new TSax3_StateParser_Gtet430($this);
495
        } else {
496
            $this->state_parser = new TSax3_StateParser_Lt430($this);
497
        }
498
        $nullhandler = new TSax3_NullHandler();
499
        $this->set_object($nullhandler);
500
        $this->set_element_handler('DoNothing', 'DoNothing');
501
        $this->set_data_handler('DoNothing');
502
        $this->set_pi_handler('DoNothing');
503
        $this->set_jasp_handler('DoNothing');
504
        $this->set_escape_handler('DoNothing');
505
    }
506
 
507
    /**
508
    * Sets the user defined handler object. Returns a PEAR Error
509
    * if supplied argument is not an object.
510
    * @param object handler object containing SAX callback methods
511
    * @access public
512
    * @return mixed
513
    */
514
    function set_object(&$object) {
515
        if ( is_object($object) ) {
516
            $this->state_parser->handler_default =& $object;
517
            return true;
518
        } else {
519
            require_once('PEAR.php');
520
            PEAR::raiseError('TSax3::set_object requires '.
521
                'an object instance');
522
        }
523
    }
524
 
525
    /**
526
    * Sets a parser option. By default all options are switched off.
527
    * Returns a PEAR Error if option is invalid<br />
528
    * <b>Available options:</b>
529
    * <ul>
530
    * <li>XML_OPTION_TRIM_DATA_NODES: trim whitespace off the beginning
531
    * and end of data passed to the data handler</li>
532
    * <li>XML_OPTION_LINEFEED_BREAK: linefeeds result in additional data
533
    * handler calls</li>
534
    * <li>XML_OPTION_TAB_BREAK: tabs result in additional data handler
535
    * calls</li>
536
    * <li>XML_OPTION_ENTITIES_UNPARSED: XML entities are returned as
537
    * seperate data handler calls in unparsed form</li>
538
    * <li>XML_OPTION_ENTITIES_PARSED: (PHP 4.3.0+ only) XML entities are
539
    * returned as seperate data handler calls and are parsed with
540
    * PHP's html_entity_decode() function</li>
541
    * <li>XML_OPTION_STRIP_ESCAPES: strips out the -- -- comment markers
542
    * or CDATA markup inside an XML escape, if found.</li>
543
    * </ul>
544
    * To get HTMLSax to behave in the same way as the native PHP SAX parser,
545
    * using it's default state, you need to switch on XML_OPTION_LINEFEED_BREAK,
546
    * XML_OPTION_ENTITIES_PARSED and XML_OPTION_CASE_FOLDING
547
    * @param string name of parser option
548
    * @param int (optional) 1 to switch on, 0 for off
549
    * @access public
550
    * @return boolean
551
    */
552
    function set_option($name, $value=1) {
553
        if ( array_key_exists($name,$this->state_parser->parser_options) ) {
554
            $this->state_parser->parser_options[$name] = $value;
555
            return true;
556
        } else {
557
            require_once('PEAR.php');
558
            PEAR::raiseError('TSax3::set_option('.$name.') illegal');
559
        }
560
    }
561
 
562
    /**
563
    * Sets the data handler method which deals with the contents of XML
564
    * elements.<br />
565
    * The handler method must accept two arguments, the first being an
566
    * instance of TSax3 and the second being the contents of an
567
    * XML element e.g.
568
    * <pre>
569
    * function myDataHander(& $parser,$data){}
570
    * </pre>
571
    * @param string name of method
572
    * @access public
573
    * @return void
574
    * @see set_object
575
    */
576
    function set_data_handler($data_method) {
577
        $this->state_parser->handler_object_data =& $this->state_parser->handler_default;
578
        $this->state_parser->handler_method_data = $data_method;
579
    }
580
 
581
    /**
582
    * Sets the open and close tag handlers
583
    * <br />The open handler method must accept three arguments; the parser,
584
    * the tag name and an array of attributes e.g.
585
    * <pre>
586
    * function myOpenHander(& $parser,$tagname,$attrs=array()){}
587
    * </pre>
588
    * The close handler method must accept two arguments; the parser and
589
    * the tag name e.g.
590
    * <pre>
591
    * function myCloseHander(& $parser,$tagname){}
592
    * </pre>
593
    * @param string name of open method
594
    * @param string name of close method
595
    * @access public
596
    * @return void
597
    * @see set_object
598
    */
599
    function set_element_handler($opening_method, $closing_method) {
600
        $this->state_parser->handler_object_element =& $this->state_parser->handler_default;
601
        $this->state_parser->handler_method_opening = $opening_method;
602
        $this->state_parser->handler_method_closing = $closing_method;
603
    }
604
 
605
    /**
606
    * Sets the processing instruction handler method e.g. for PHP open
607
    * and close tags<br />
608
    * The handler method must accept three arguments; the parser, the
609
    * PI target and data inside the PI
610
    * <pre>
611
    * function myPIHander(& $parser,$target, $data){}
612
    * </pre>
613
    * @param string name of method
614
    * @access public
615
    * @return void
616
    * @see set_object
617
    */
618
    function set_pi_handler($pi_method) {
619
        $this->state_parser->handler_object_pi =& $this->state_parser->handler_default;
620
        $this->state_parser->handler_method_pi = $pi_method;
621
    }
622
 
623
    /**
624
    * Sets the XML escape handler method e.g. for comments and doctype
625
    * declarations<br />
626
    * The handler method must accept two arguments; the parser and the
627
    * contents of the escaped section
628
    * <pre>
629
    * function myEscapeHander(& $parser, $data){}
630
    * </pre>
631
    * @param string name of method
632
    * @access public
633
    * @return void
634
    * @see set_object
635
    */
636
    function set_escape_handler($escape_method) {
637
        $this->state_parser->handler_object_escape =& $this->state_parser->handler_default;
638
        $this->state_parser->handler_method_escape = $escape_method;
639
    }
640
 
641
    /**
642
    * Sets the JSP/ASP markup handler<br />
643
    * The handler method must accept two arguments; the parser and
644
    * body of the JASP tag
645
    * <pre>
646
    * function myJaspHander(& $parser, $data){}
647
    * </pre>
648
    * @param string name of method
649
    * @access public
650
    * @return void
651
    * @see set_object
652
    */
653
    function set_jasp_handler ($jasp_method) {
654
        $this->state_parser->handler_object_jasp =& $this->state_parser->handler_default;
655
        $this->state_parser->handler_method_jasp = $jasp_method;
656
    }
657
 
658
    /**
659
    * Returns the current string position of the "cursor" inside the XML
660
    * document
661
    * <br />Intended for use from within a user defined handler called
662
    * via the $parser reference e.g.
663
    * <pre>
664
    * function myDataHandler(& $parser,$data) {
665
    *     echo( 'Current position: '.$parser->get_current_position() );
666
    * }
667
    * </pre>
668
    * @access public
669
    * @return int
670
    * @see get_length
671
    */
672
    function get_current_position() {
673
        return $this->state_parser->position;
674
    }
675
 
676
    /**
677
    * Returns the string length of the XML document being parsed
678
    * @access public
679
    * @return int
680
    */
681
    function get_length() {
682
        return $this->state_parser->length;
683
    }
684
 
685
    /**
686
    * Start parsing some XML
687
    * @param string XML document
688
    * @access public
689
    * @return void
690
    */
691
    function parse($data) {
692
        $this->state_parser->parse($data);
693
    }
694
}
695
?>