Subversion-Projekte lars-tiefland.prado

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
    /**
3
     *	base include file for SimpleTest
4
     *	@package	SimpleTest
5
     *	@subpackage	MockObjects
6
     *	@version	$Id: parser.php 1532 2006-12-01 12:28:55Z xue $
7
     */
8
 
9
    /**#@+
10
     * Lexer mode stack constants
11
     */
12
    if (! defined('LEXER_ENTER')) {
13
        define('LEXER_ENTER', 1);
14
    }
15
    if (! defined('LEXER_MATCHED')) {
16
        define('LEXER_MATCHED', 2);
17
    }
18
    if (! defined('LEXER_UNMATCHED')) {
19
        define('LEXER_UNMATCHED', 3);
20
    }
21
    if (! defined('LEXER_EXIT')) {
22
        define('LEXER_EXIT', 4);
23
    }
24
    if (! defined('LEXER_SPECIAL')) {
25
        define('LEXER_SPECIAL', 5);
26
    }
27
    /**#@-*/
28
 
29
    /**
30
     *    Compounded regular expression. Any of
31
     *    the contained patterns could match and
32
     *    when one does, it's label is returned.
33
	 *    @package SimpleTest
34
	 *    @subpackage WebTester
35
     */
36
    class ParallelRegex {
37
        protected $_patterns;
38
        protected $_labels;
39
        protected $_regex;
40
        protected $_case;
41
 
42
        /**
43
         *    Constructor. Starts with no patterns.
44
         *    @param boolean $case    True for case sensitive, false
45
         *                            for insensitive.
46
         *    @access public
47
         */
48
        function ParallelRegex($case) {
49
            $this->_case = $case;
50
            $this->_patterns = array();
51
            $this->_labels = array();
52
            $this->_regex = null;
53
        }
54
 
55
        /**
56
         *    Adds a pattern with an optional label.
57
         *    @param string $pattern      Perl style regex, but ( and )
58
         *                                lose the usual meaning.
59
         *    @param string $label        Label of regex to be returned
60
         *                                on a match.
61
         *    @access public
62
         */
63
        function addPattern($pattern, $label = true) {
64
            $count = count($this->_patterns);
65
            $this->_patterns[$count] = $pattern;
66
            $this->_labels[$count] = $label;
67
            $this->_regex = null;
68
        }
69
 
70
        /**
71
         *    Attempts to match all patterns at once against
72
         *    a string.
73
         *    @param string $subject      String to match against.
74
         *    @param string $match        First matched portion of
75
         *                                subject.
76
         *    @return boolean             True on success.
77
         *    @access public
78
         */
79
        function match($subject, $match) {
80
            if (count($this->_patterns) == 0) {
81
                return false;
82
            }
83
            if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
84
                $match = '';
85
                return false;
86
            }
87
            $match = $matches[0];
88
            for ($i = 1; $i < count($matches); $i++) {
89
                if ($matches[$i]) {
90
                    return $this->_labels[$i - 1];
91
                }
92
            }
93
            return true;
94
        }
95
 
96
        /**
97
         *    Compounds the patterns into a single
98
         *    regular expression separated with the
99
         *    "or" operator. Caches the regex.
100
         *    Will automatically escape (, ) and / tokens.
101
         *    @param array $patterns    List of patterns in order.
102
         *    @access private
103
         */
104
        function _getCompoundedRegex() {
105
            if ($this->_regex == null) {
106
                for ($i = 0, $count = count($this->_patterns); $i < $count; $i++) {
107
                    $this->_patterns[$i] = '(' . str_replace(
108
                            array('/', '(', ')'),
109
                            array('\/', '\(', '\)'),
110
                            $this->_patterns[$i]) . ')';
111
                }
112
                $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
113
            }
114
            return $this->_regex;
115
        }
116
 
117
        /**
118
         *    Accessor for perl regex mode flags to use.
119
         *    @return string       Perl regex flags.
120
         *    @access private
121
         */
122
        function _getPerlMatchingFlags() {
123
            return ($this->_case ? "msS" : "msSi");
124
        }
125
    }
126
 
127
    /**
128
     *    States for a stack machine.
129
	 *    @package SimpleTest
130
	 *    @subpackage WebTester
131
     */
132
    class SimpleStateStack {
133
        protected $_stack;
134
 
135
        /**
136
         *    Constructor. Starts in named state.
137
         *    @param string $start        Starting state name.
138
         *    @access public
139
         */
140
        function SimpleStateStack($start) {
141
            $this->_stack = array($start);
142
        }
143
 
144
        /**
145
         *    Accessor for current state.
146
         *    @return string       State.
147
         *    @access public
148
         */
149
        function getCurrent() {
150
            return $this->_stack[count($this->_stack) - 1];
151
        }
152
 
153
        /**
154
         *    Adds a state to the stack and sets it
155
         *    to be the current state.
156
         *    @param string $state        New state.
157
         *    @access public
158
         */
159
        function enter($state) {
160
            array_push($this->_stack, $state);
161
        }
162
 
163
        /**
164
         *    Leaves the current state and reverts
165
         *    to the previous one.
166
         *    @return boolean    False if we drop off
167
         *                       the bottom of the list.
168
         *    @access public
169
         */
170
        function leave() {
171
            if (count($this->_stack) == 1) {
172
                return false;
173
            }
174
            array_pop($this->_stack);
175
            return true;
176
        }
177
    }
178
 
179
    /**
180
     *    Accepts text and breaks it into tokens.
181
     *    Some optimisation to make the sure the
182
     *    content is only scanned by the PHP regex
183
     *    parser once. Lexer modes must not start
184
     *    with leading underscores.
185
	 *    @package SimpleTest
186
	 *    @subpackage WebTester
187
     */
188
    class SimpleLexer {
189
        protected $_regexes;
190
        protected $_parser;
191
        protected $_mode;
192
        protected $_mode_handlers;
193
        protected $_case;
194
 
195
        /**
196
         *    Sets up the lexer in case insensitive matching
197
         *    by default.
198
         *    @param SimpleSaxParser $parser  Handling strategy by
199
         *                                    reference.
200
         *    @param string $start            Starting handler.
201
         *    @param boolean $case            True for case sensitive.
202
         *    @access public
203
         */
204
        function SimpleLexer($parser, $start = "accept", $case = false) {
205
            $this->_case = $case;
206
            $this->_regexes = array();
207
            $this->_parser = $parser;
208
            $this->_mode = new SimpleStateStack($start);
209
            $this->_mode_handlers = array($start => $start);
210
        }
211
 
212
        /**
213
         *    Adds a token search pattern for a particular
214
         *    parsing mode. The pattern does not change the
215
         *    current mode.
216
         *    @param string $pattern      Perl style regex, but ( and )
217
         *                                lose the usual meaning.
218
         *    @param string $mode         Should only apply this
219
         *                                pattern when dealing with
220
         *                                this type of input.
221
         *    @access public
222
         */
223
        function addPattern($pattern, $mode = "accept") {
224
            if (! isset($this->_regexes[$mode])) {
225
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
226
            }
227
            $this->_regexes[$mode]->addPattern($pattern);
228
            if (! isset($this->_mode_handlers[$mode])) {
229
                $this->_mode_handlers[$mode] = $mode;
230
            }
231
        }
232
 
233
        /**
234
         *    Adds a pattern that will enter a new parsing
235
         *    mode. Useful for entering parenthesis, strings,
236
         *    tags, etc.
237
         *    @param string $pattern      Perl style regex, but ( and )
238
         *                                lose the usual meaning.
239
         *    @param string $mode         Should only apply this
240
         *                                pattern when dealing with
241
         *                                this type of input.
242
         *    @param string $new_mode     Change parsing to this new
243
         *                                nested mode.
244
         *    @access public
245
         */
246
        function addEntryPattern($pattern, $mode, $new_mode) {
247
            if (! isset($this->_regexes[$mode])) {
248
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
249
            }
250
            $this->_regexes[$mode]->addPattern($pattern, $new_mode);
251
            if (! isset($this->_mode_handlers[$new_mode])) {
252
                $this->_mode_handlers[$new_mode] = $new_mode;
253
            }
254
        }
255
 
256
        /**
257
         *    Adds a pattern that will exit the current mode
258
         *    and re-enter the previous one.
259
         *    @param string $pattern      Perl style regex, but ( and )
260
         *                                lose the usual meaning.
261
         *    @param string $mode         Mode to leave.
262
         *    @access public
263
         */
264
        function addExitPattern($pattern, $mode) {
265
            if (! isset($this->_regexes[$mode])) {
266
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
267
            }
268
            $this->_regexes[$mode]->addPattern($pattern, "__exit");
269
            if (! isset($this->_mode_handlers[$mode])) {
270
                $this->_mode_handlers[$mode] = $mode;
271
            }
272
        }
273
 
274
        /**
275
         *    Adds a pattern that has a special mode. Acts as an entry
276
         *    and exit pattern in one go, effectively calling a special
277
         *    parser handler for this token only.
278
         *    @param string $pattern      Perl style regex, but ( and )
279
         *                                lose the usual meaning.
280
         *    @param string $mode         Should only apply this
281
         *                                pattern when dealing with
282
         *                                this type of input.
283
         *    @param string $special      Use this mode for this one token.
284
         *    @access public
285
         */
286
        function addSpecialPattern($pattern, $mode, $special) {
287
            if (! isset($this->_regexes[$mode])) {
288
                $this->_regexes[$mode] = new ParallelRegex($this->_case);
289
            }
290
            $this->_regexes[$mode]->addPattern($pattern, "_$special");
291
            if (! isset($this->_mode_handlers[$special])) {
292
                $this->_mode_handlers[$special] = $special;
293
            }
294
        }
295
 
296
        /**
297
         *    Adds a mapping from a mode to another handler.
298
         *    @param string $mode        Mode to be remapped.
299
         *    @param string $handler     New target handler.
300
         *    @access public
301
         */
302
        function mapHandler($mode, $handler) {
303
            $this->_mode_handlers[$mode] = $handler;
304
        }
305
 
306
        /**
307
         *    Splits the page text into tokens. Will fail
308
         *    if the handlers report an error or if no
309
         *    content is consumed. If successful then each
310
         *    unparsed and parsed token invokes a call to the
311
         *    held listener.
312
         *    @param string $raw        Raw HTML text.
313
         *    @return boolean           True on success, else false.
314
         *    @access public
315
         */
316
        function parse($raw) {
317
            if (! isset($this->_parser)) {
318
                return false;
319
            }
320
            $length = strlen($raw);
321
            while (is_array($parsed = $this->_reduce($raw))) {
322
                list($raw, $unmatched, $matched, $mode) = $parsed;
323
                if (! $this->_dispatchTokens($unmatched, $matched, $mode)) {
324
                    return false;
325
                }
326
                if ($raw === '') {
327
                    return true;
328
                }
329
                if (strlen($raw) == $length) {
330
                    return false;
331
                }
332
                $length = strlen($raw);
333
            }
334
            if (! $parsed) {
335
                return false;
336
            }
337
            return $this->_invokeParser($raw, LEXER_UNMATCHED);
338
        }
339
 
340
        /**
341
         *    Sends the matched token and any leading unmatched
342
         *    text to the parser changing the lexer to a new
343
         *    mode if one is listed.
344
         *    @param string $unmatched    Unmatched leading portion.
345
         *    @param string $matched      Actual token match.
346
         *    @param string $mode         Mode after match. A boolean
347
         *                                false mode causes no change.
348
         *    @return boolean             False if there was any error
349
         *                                from the parser.
350
         *    @access private
351
         */
352
        function _dispatchTokens($unmatched, $matched, $mode = false) {
353
            if (! $this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
354
                return false;
355
            }
356
            if (is_bool($mode)) {
357
                return $this->_invokeParser($matched, LEXER_MATCHED);
358
            }
359
            if ($this->_isModeEnd($mode)) {
360
                if (! $this->_invokeParser($matched, LEXER_EXIT)) {
361
                    return false;
362
                }
363
                return $this->_mode->leave();
364
            }
365
            if ($this->_isSpecialMode($mode)) {
366
                $this->_mode->enter($this->_decodeSpecial($mode));
367
                if (! $this->_invokeParser($matched, LEXER_SPECIAL)) {
368
                    return false;
369
                }
370
                return $this->_mode->leave();
371
            }
372
            $this->_mode->enter($mode);
373
            return $this->_invokeParser($matched, LEXER_ENTER);
374
        }
375
 
376
        /**
377
         *    Tests to see if the new mode is actually to leave
378
         *    the current mode and pop an item from the matching
379
         *    mode stack.
380
         *    @param string $mode    Mode to test.
381
         *    @return boolean        True if this is the exit mode.
382
         *    @access private
383
         */
384
        function _isModeEnd($mode) {
385
            return ($mode === "__exit");
386
        }
387
 
388
        /**
389
         *    Test to see if the mode is one where this mode
390
         *    is entered for this token only and automatically
391
         *    leaves immediately afterwoods.
392
         *    @param string $mode    Mode to test.
393
         *    @return boolean        True if this is the exit mode.
394
         *    @access private
395
         */
396
        function _isSpecialMode($mode) {
397
            return (strncmp($mode, "_", 1) == 0);
398
        }
399
 
400
        /**
401
         *    Strips the magic underscore marking single token
402
         *    modes.
403
         *    @param string $mode    Mode to decode.
404
         *    @return string         Underlying mode name.
405
         *    @access private
406
         */
407
        function _decodeSpecial($mode) {
408
            return substr($mode, 1);
409
        }
410
 
411
        /**
412
         *    Calls the parser method named after the current
413
         *    mode. Empty content will be ignored. The lexer
414
         *    has a parser handler for each mode in the lexer.
415
         *    @param string $content        Text parsed.
416
         *    @param boolean $is_match      Token is recognised rather
417
         *                                  than unparsed data.
418
         *    @access private
419
         */
420
        function _invokeParser($content, $is_match) {
421
            if (($content === '') || ($content === false)) {
422
                return true;
423
            }
424
            $handler = $this->_mode_handlers[$this->_mode->getCurrent()];
425
            return $this->_parser->$handler($content, $is_match);
426
        }
427
 
428
        /**
429
         *    Tries to match a chunk of text and if successful
430
         *    removes the recognised chunk and any leading
431
         *    unparsed data. Empty strings will not be matched.
432
         *    @param string $raw         The subject to parse. This is the
433
         *                               content that will be eaten.
434
         *    @return array/boolean      Three item list of unparsed
435
         *                               content followed by the
436
         *                               recognised token and finally the
437
         *                               action the parser is to take.
438
         *                               True if no match, false if there
439
         *                               is a parsing error.
440
         *    @access private
441
         */
442
        function _reduce($raw) {
443
            if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
444
                $unparsed_character_count = strpos($raw, $match);
445
                $unparsed = substr($raw, 0, $unparsed_character_count);
446
                $raw = substr($raw, $unparsed_character_count + strlen($match));
447
                return array($raw, $unparsed, $match, $action);
448
            }
449
            return true;
450
        }
451
    }
452
 
453
    /**
454
     *    Breas HTML into SAX events.
455
	 *    @package SimpleTest
456
	 *    @subpackage WebTester
457
     */
458
    class SimpleHtmlLexer extends SimpleLexer {
459
 
460
        /**
461
         *    Sets up the lexer with case insensitive matching
462
         *    and adds the HTML handlers.
463
         *    @param SimpleSaxParser $parser  Handling strategy by
464
         *                                    reference.
465
         *    @access public
466
         */
467
        function SimpleHtmlLexer($parser) {
468
            $this->SimpleLexer($parser, 'text');
469
            $this->mapHandler('text', 'acceptTextToken');
470
            $this->_addSkipping();
471
            foreach ($this->_getParsedTags() as $tag) {
472
                $this->_addTag($tag);
473
            }
474
            $this->_addInTagTokens();
475
        }
476
 
477
        /**
478
         *    List of parsed tags. Others are ignored.
479
         *    @return array        List of searched for tags.
480
         *    @access private
481
         */
482
        function _getParsedTags() {
483
            return array('a', 'title', 'form', 'input', 'button', 'textarea', 'select',
484
                    'option', 'frameset', 'frame', 'label');
485
        }
486
 
487
        /**
488
         *    The lexer has to skip certain sections such
489
         *    as server code, client code and styles.
490
         *    @access private
491
         */
492
        function _addSkipping() {
493
            $this->mapHandler('css', 'ignore');
494
            $this->addEntryPattern('<style', 'text', 'css');
495
            $this->addExitPattern('</style>', 'css');
496
            $this->mapHandler('js', 'ignore');
497
            $this->addEntryPattern('<script', 'text', 'js');
498
            $this->addExitPattern('</script>', 'js');
499
            $this->mapHandler('comment', 'ignore');
500
            $this->addEntryPattern('<!--', 'text', 'comment');
501
            $this->addExitPattern('-->', 'comment');
502
        }
503
 
504
        /**
505
         *    Pattern matches to start and end a tag.
506
         *    @param string $tag          Name of tag to scan for.
507
         *    @access private
508
         */
509
        function _addTag($tag) {
510
            $this->addSpecialPattern("</$tag>", 'text', 'acceptEndToken');
511
            $this->addEntryPattern("<$tag", 'text', 'tag');
512
        }
513
 
514
        /**
515
         *    Pattern matches to parse the inside of a tag
516
         *    including the attributes and their quoting.
517
         *    @access private
518
         */
519
        function _addInTagTokens() {
520
            $this->mapHandler('tag', 'acceptStartToken');
521
            $this->addSpecialPattern('\s+', 'tag', 'ignore');
522
            $this->_addAttributeTokens();
523
            $this->addExitPattern('/>', 'tag');
524
            $this->addExitPattern('>', 'tag');
525
        }
526
 
527
        /**
528
         *    Matches attributes that are either single quoted,
529
         *    double quoted or unquoted.
530
         *    @access private
531
         */
532
        function _addAttributeTokens() {
533
            $this->mapHandler('dq_attribute', 'acceptAttributeToken');
534
            $this->addEntryPattern('=\s*"', 'tag', 'dq_attribute');
535
            $this->addPattern("\\\\\"", 'dq_attribute');
536
            $this->addExitPattern('"', 'dq_attribute');
537
            $this->mapHandler('sq_attribute', 'acceptAttributeToken');
538
            $this->addEntryPattern("=\s*'", 'tag', 'sq_attribute');
539
            $this->addPattern("\\\\'", 'sq_attribute');
540
            $this->addExitPattern("'", 'sq_attribute');
541
            $this->mapHandler('uq_attribute', 'acceptAttributeToken');
542
            $this->addSpecialPattern('=\s*[^>\s]*', 'tag', 'uq_attribute');
543
        }
544
    }
545
 
546
    /**
547
     *    Converts HTML tokens into selected SAX events.
548
	 *    @package SimpleTest
549
	 *    @subpackage WebTester
550
     */
551
    class SimpleHtmlSaxParser {
552
        protected $_lexer;
553
        protected $_listener;
554
        protected $_tag;
555
        protected $_attributes;
556
        protected $_current_attribute;
557
 
558
        /**
559
         *    Sets the listener.
560
         *    @param SimpleSaxListener $listener    SAX event handler.
561
         *    @access public
562
         */
563
        function SimpleHtmlSaxParser($listener) {
564
            $this->_listener = $listener;
565
            $this->_lexer = $this->createLexer($this);
566
            $this->_tag = '';
567
            $this->_attributes = array();
568
            $this->_current_attribute = '';
569
        }
570
 
571
        /**
572
         *    Runs the content through the lexer which
573
         *    should call back to the acceptors.
574
         *    @param string $raw      Page text to parse.
575
         *    @return boolean         False if parse error.
576
         *    @access public
577
         */
578
        function parse($raw) {
579
            return $this->_lexer->parse($raw);
580
        }
581
 
582
        /**
583
         *    Sets up the matching lexer. Starts in 'text' mode.
584
         *    @param SimpleSaxParser $parser    Event generator, usually $self.
585
         *    @return SimpleLexer               Lexer suitable for this parser.
586
         *    @access public
587
         *    @static
588
         */
589
        static function &createLexer($parser) {
590
            $lexer = new SimpleHtmlLexer($parser);
591
            return $lexer;
592
        }
593
 
594
        /**
595
         *    Accepts a token from the tag mode. If the
596
         *    starting element completes then the element
597
         *    is dispatched and the current attributes
598
         *    set back to empty. The element or attribute
599
         *    name is converted to lower case.
600
         *    @param string $token     Incoming characters.
601
         *    @param integer $event    Lexer event type.
602
         *    @return boolean          False if parse error.
603
         *    @access public
604
         */
605
        function acceptStartToken($token, $event) {
606
            if ($event == LEXER_ENTER) {
607
                $this->_tag = strtolower(substr($token, 1));
608
                return true;
609
            }
610
            if ($event == LEXER_EXIT) {
611
                $success = $this->_listener->startElement(
612
                        $this->_tag,
613
                        $this->_attributes);
614
                $this->_tag = '';
615
                $this->_attributes = array();
616
                return $success;
617
            }
618
            if ($token != '=') {
619
                $this->_current_attribute = strtolower(SimpleHtmlSaxParser::decodeHtml($token));
620
                $this->_attributes[$this->_current_attribute] = '';
621
            }
622
            return true;
623
        }
624
 
625
        /**
626
         *    Accepts a token from the end tag mode.
627
         *    The element name is converted to lower case.
628
         *    @param string $token     Incoming characters.
629
         *    @param integer $event    Lexer event type.
630
         *    @return boolean          False if parse error.
631
         *    @access public
632
         */
633
        function acceptEndToken($token, $event) {
634
            if (! preg_match('/<\/(.*)>/', $token, $matches)) {
635
                return false;
636
            }
637
            return $this->_listener->endElement(strtolower($matches[1]));
638
        }
639
 
640
        /**
641
         *    Part of the tag data.
642
         *    @param string $token     Incoming characters.
643
         *    @param integer $event    Lexer event type.
644
         *    @return boolean          False if parse error.
645
         *    @access public
646
         */
647
        function acceptAttributeToken($token, $event) {
648
            if ($event == LEXER_UNMATCHED) {
649
                $this->_attributes[$this->_current_attribute] .=
650
                        SimpleHtmlSaxParser::decodeHtml($token);
651
            }
652
            if ($event == LEXER_SPECIAL) {
653
                $this->_attributes[$this->_current_attribute] .=
654
                        preg_replace('/^=\s*/' , '', SimpleHtmlSaxParser::decodeHtml($token));
655
            }
656
            return true;
657
        }
658
 
659
        /**
660
         *    A character entity.
661
         *    @param string $token    Incoming characters.
662
         *    @param integer $event   Lexer event type.
663
         *    @return boolean         False if parse error.
664
         *    @access public
665
         */
666
        function acceptEntityToken($token, $event) {
667
        }
668
 
669
        /**
670
         *    Character data between tags regarded as
671
         *    important.
672
         *    @param string $token     Incoming characters.
673
         *    @param integer $event    Lexer event type.
674
         *    @return boolean          False if parse error.
675
         *    @access public
676
         */
677
        function acceptTextToken($token, $event) {
678
            return $this->_listener->addContent($token);
679
        }
680
 
681
        /**
682
         *    Incoming data to be ignored.
683
         *    @param string $token     Incoming characters.
684
         *    @param integer $event    Lexer event type.
685
         *    @return boolean          False if parse error.
686
         *    @access public
687
         */
688
        function ignore($token, $event) {
689
            return true;
690
        }
691
 
692
        /**
693
         *    Decodes any HTML entities.
694
         *    @param string $html    Incoming HTML.
695
         *    @return string         Outgoing plain text.
696
         *    @access public
697
         *    @static
698
         */
699
        static function decodeHtml($html) {
700
            static $translations;
701
            if (! isset($translations)) {
702
                $translations = array_flip(get_html_translation_table(HTML_ENTITIES));
703
            }
704
            return strtr($html, $translations);
705
        }
706
 
707
        /**
708
         *    Turns HTML into text browser visible text. Images
709
         *    are converted to their alt text and tags are supressed.
710
         *    Entities are converted to their visible representation.
711
         *    @param string $html        HTML to convert.
712
         *    @return string             Plain text.
713
         *    @access public
714
         *    @static
715
         */
716
        static function normalise($html) {
717
            $text = preg_replace('|<!--.*?-->|', '', $html);
718
            $text = preg_replace('|<img.*?alt\s*=\s*"(.*?)".*?>|', ' \1 ', $text);
719
            $text = preg_replace('|<img.*?alt\s*=\s*\'(.*?)\'.*?>|', ' \1 ', $text);
720
            $text = preg_replace('|<img.*?alt\s*=\s*([a-zA-Z_]+).*?>|', ' \1 ', $text);
721
            $text = preg_replace('|<.*?>|', '', $text);
722
            $text = SimpleHtmlSaxParser::decodeHtml($text);
723
            $text = preg_replace('|\s+|', ' ', $text);
724
            return trim($text);
725
        }
726
    }
727
 
728
    /**
729
     *    SAX event handler.
730
	 *    @package SimpleTest
731
	 *    @subpackage WebTester
732
     *    @abstract
733
     */
734
    class SimpleSaxListener {
735
 
736
        /**
737
         *    Sets the document to write to.
738
         *    @access public
739
         */
740
        function SimpleSaxListener() {
741
        }
742
 
743
        /**
744
         *    Start of element event.
745
         *    @param string $name        Element name.
746
         *    @param hash $attributes    Name value pairs.
747
         *                               Attributes without content
748
         *                               are marked as true.
749
         *    @return boolean            False on parse error.
750
         *    @access public
751
         */
752
        function startElement($name, $attributes) {
753
        }
754
 
755
        /**
756
         *    End of element event.
757
         *    @param string $name        Element name.
758
         *    @return boolean            False on parse error.
759
         *    @access public
760
         */
761
        function endElement($name) {
762
        }
763
 
764
        /**
765
         *    Unparsed, but relevant data.
766
         *    @param string $text        May include unparsed tags.
767
         *    @return boolean            False on parse error.
768
         *    @access public
769
         */
770
        function addContent($text) {
771
        }
772
    }
773
?>