Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
3
 
4
/**
5
 * XML_Beautifier/Tokenizer
6
 *
7
 * XML Beautifier package's Tokenizer
8
 *
9
 * PHP versions 4 and 5
10
 *
11
 * LICENSE:
12
 *
13
 * Copyright (c) 2003-2008 Stephan Schmidt <schst@php.net>
14
 * All rights reserved.
15
 *
16
 * Redistribution and use in source and binary forms, with or without
17
 * modification, are permitted provided that the following conditions
18
 * are met:
19
 *
20
 *    * Redistributions of source code must retain the above copyright
21
 *      notice, this list of conditions and the following disclaimer.
22
 *    * Redistributions in binary form must reproduce the above copyright
23
 *      notice, this list of conditions and the following disclaimer in the
24
 *      documentation and/or other materials provided with the distribution.
25
 *    * The name of the author may not be used to endorse or promote products
26
 *      derived from this software without specific prior written permission.
27
 *
28
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
29
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
30
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
32
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
36
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
 *
40
 * @category  XML
41
 * @package   XML_Beautifier
42
 * @author    Stephan Schmidt <schst@php.net>
43
 * @copyright 2003-2008 Stephan Schmidt <schst@php.net>
44
 * @license   http://opensource.org/licenses/bsd-license New BSD License
45
 * @version   CVS: $Id: Tokenizer.php 302598 2010-08-21 03:03:58Z clockwerx $
46
 * @link      http://pear.php.net/package/XML_Beautifier
47
 */
48
 
49
/**
50
 * XML_Parser is needed to parse the document
51
 */
52
require_once 'XML/Parser.php';
53
 
54
/**
55
 * Tokenizer for XML_Beautifier
56
 *
57
 * This class breaks an XML document in seperate tokens
58
 * that will be rendered by an XML_Beautifier renderer.
59
 *
60
 * @category  XML
61
 * @package   XML_Beautifier
62
 * @author    Stephan Schmidt <schst@php.net>
63
 * @copyright 2003-2008 Stephan Schmidt <schst@php.net>
64
 * @license   http://opensource.org/licenses/bsd-license New BSD License
65
 * @version   Release: @package_version@
66
 * @link      http://pear.php.net/package/XML_Beautifier
67
 * @todo      tokenize DTD
68
 * @todo      check for xml:space attribute
69
 */
70
class XML_Beautifier_Tokenizer extends XML_Parser
71
{
72
    /**
73
     * current depth
74
     * @var    integer
75
     * @access private
76
     */
77
    var $_depth = 0;
78
 
79
    /**
80
     * stack for all found elements
81
     * @var    array
82
     * @access private
83
     */
84
    var $_struct = array();
85
 
86
    /**
87
     * current parsing mode
88
     * @var    string
89
     * @access private
90
     */
91
    var $_mode = "xml";
92
 
93
    /**
94
     * indicates, whether parser is in cdata section
95
     * @var    boolean
96
     * @access private
97
     */
98
    var $_inCDataSection = false;
99
 
100
    /**
101
     * Tokenize a document
102
     *
103
     * @param string  $document filename or XML document
104
     * @param boolean $isFile   flag to indicate whether
105
     *                          the first parameter is a file
106
     *
107
     * @return mixed
108
     */
109
    function tokenize($document, $isFile = true)
110
    {
111
        $this->folding = false;
112
        $this->XML_Parser();
113
        $this->_resetVars();
114
 
115
        if ($isFile === true) {
116
            $this->setInputFile($document);
117
            $result = $this->parse();
118
        } else {
119
            $result = $this->parseString($document);
120
        }
121
 
122
        if ($this->isError($result)) {
123
            return $result;
124
        }
125
 
126
        return $this->_struct;
127
    }
128
 
129
    /**
130
     * Start element handler for XML parser
131
     *
132
     * @param object $parser  XML parser object
133
     * @param string $element XML element
134
     * @param array  $attribs attributes of XML tag
135
     *
136
     * @return void
137
     * @access protected
138
     */
139
    function startHandler($parser, $element, $attribs)
140
    {
141
        $struct = array(
142
            "type"     => XML_BEAUTIFIER_ELEMENT,
143
            "tagname"  => $element,
144
            "attribs"  => $attribs,
145
            "contains" => XML_BEAUTIFIER_EMPTY,
146
            "depth"    => $this->_depth++,
147
            "children" => array()
148
        );
149
 
150
        array_push($this->_struct, $struct);
151
    }
152
 
153
    /**
154
     * End element handler for XML parser
155
     *
156
     * @param object $parser  XML parser object
157
     * @param string $element element
158
     *
159
     * @return void
160
     * @access protected
161
     */
162
    function endHandler($parser, $element)
163
    {
164
        $struct = array_pop($this->_struct);
165
        if ($struct["depth"] > 0) {
166
            $parent = array_pop($this->_struct);
167
            array_push($parent["children"], $struct);
168
            $parent["contains"] = $parent["contains"] | XML_BEAUTIFIER_ELEMENT;
169
            array_push($this->_struct, $parent);
170
        } else {
171
            array_push($this->_struct, $struct);
172
        }
173
        $this->_depth--;
174
    }
175
 
176
    /**
177
     * Handler for character data
178
     *
179
     * @param object $parser XML parser object
180
     * @param string $cdata  CDATA
181
     *
182
     * @return void
183
     * @access protected
184
     */
185
    function cdataHandler($parser, $cdata)
186
    {
187
        if ((string)$cdata === '') {
188
            return true;
189
        }
190
 
191
        if ($this->_inCDataSection === true) {
192
            $type = XML_BEAUTIFIER_CDATA_SECTION;
193
        } else {
194
            $type = XML_BEAUTIFIER_CDATA;
195
        }
196
 
197
        $struct = array(
198
            "type"  => $type,
199
            "data"  => $cdata,
200
            "depth" => $this->_depth
201
        );
202
 
203
        $this->_appendToParent($struct);
204
    }
205
 
206
    /**
207
     * Handler for processing instructions
208
     *
209
     * @param object $parser XML parser object
210
     * @param string $target target
211
     * @param string $data   data
212
     *
213
     * @return void
214
     * @access protected
215
     */
216
    function piHandler($parser, $target, $data)
217
    {
218
        $struct = array(
219
            "type"    => XML_BEAUTIFIER_PI,
220
            "target"  => $target,
221
            "data"    => $data,
222
            "depth"   => $this->_depth
223
        );
224
 
225
        $this->_appendToParent($struct);
226
    }
227
 
228
    /**
229
     * Handler for external entities
230
     *
231
     * @param object $parser            XML parser object
232
     * @param string $open_entity_names entity name
233
     * @param string $base              ?? (unused?)
234
     * @param string $system_id         ?? (unused?)
235
     * @param string $public_id         ?? (unused?)
236
     *
237
     * @return bool
238
     * @access protected
239
     * @todo revisit parameter signature... doesn't seem to be correct
240
     * @todo PEAR CS - need to shorten arg list for 85-char rule
241
     */
242
    function entityrefHandler($parser, $open_entity_names, $base, $system_id, $public_id)
243
    {
244
        $struct = array(
245
            "type"    => XML_BEAUTIFIER_ENTITY,
246
            "name"    => $open_entity_names,
247
            "depth"   => $this->_depth
248
        );
249
 
250
        $this->_appendToParent($struct);
251
        return true;
252
    }
253
 
254
    /**
255
     * Handler for all other stuff
256
     *
257
     * @param object $parser XML parser object
258
     * @param string $data   data
259
     *
260
     * @return void
261
     * @access protected
262
     */
263
    function defaultHandler($parser, $data)
264
    {
265
        switch ($this->_mode) {
266
        case "xml":
267
            $this->_handleXMLDefault($data);
268
            break;
269
        case "doctype":
270
            $this->_handleDoctype($data);
271
            break;
272
        }
273
    }
274
 
275
    /**
276
     * handler for all data inside the doctype declaration
277
     *
278
     * @param string $data data
279
     *
280
     * @return void
281
     * @access private
282
     * @todo improve doctype parsing to split the declaration into seperate tokens
283
     */
284
    function _handleDoctype($data)
285
    {
286
        if (preg_match("/>/i", $data)) {
287
            $last = $this->_getLastToken();
288
            if ($last["data"] == "]" ) {
289
                $this->_mode = "xml";
290
            }
291
        }
292
 
293
        $struct = array(
294
            "type"    => XML_BEAUTIFIER_DT_DECLARATION,
295
            "data"    => $data,
296
            "depth"   => $this->_depth
297
        );
298
        $this->_appendToParent($struct);
299
    }
300
 
301
    /**
302
     * handler for all default XML data
303
     *
304
     * @param string $data data
305
     *
306
     * @return bool
307
     * @access private
308
     */
309
    function _handleXMLDefault($data)
310
    {
311
        if (strncmp("<!--", $data, 4) == 0) {
312
 
313
            /*
314
             * handle comment
315
             */
316
            $regs = array();
317
            preg_match("/<!--(.+)-->/i", $data, $regs);
318
            $comment = trim($regs[1]);
319
 
320
            $struct = array(
321
                "type"    => XML_BEAUTIFIER_COMMENT,
322
                "data"    => $comment,
323
                "depth"   => $this->_depth
324
            );
325
 
326
        } elseif ($data == "<![CDATA[") {
327
            /*
328
             * handle start of cdata section
329
             */
330
            $this->_inCDataSection = true;
331
            $struct                = null;
332
 
333
        } elseif ($data == "]]>") {
334
            /*
335
             * handle end of cdata section
336
             */
337
            $this->_inCDataSection = false;
338
            $struct                = null;
339
 
340
        } elseif (strncmp("<?", $data, 2) == 0) {
341
            /*
342
             * handle XML declaration
343
             */
344
            preg_match_all('/([a-zA-Z_]+)="((?:\\\.|[^"\\\])*)"/', $data, $match);
345
            $cnt     = count($match[1]);
346
            $attribs = array();
347
            for ($i = 0; $i < $cnt; $i++) {
348
                $attribs[$match[1][$i]] = $match[2][$i];
349
            }
350
 
351
            if (!isset($attribs["version"])) {
352
                $attribs["version"] = "1.0";
353
            }
354
            if (!isset($attribs["encoding"])) {
355
                $attribs["encoding"] = "UTF-8";
356
            }
357
            if (!isset($attribs["standalone"])) {
358
                $attribs["standalone"] = true;
359
            } else {
360
                if ($attribs["standalone"] === 'yes') {
361
                    $attribs["standalone"] = true;
362
                } else {
363
                    $attribs["standalone"] = false;
364
                }
365
            }
366
 
367
            $struct = array(
368
                "type"       => XML_BEAUTIFIER_XML_DECLARATION,
369
                "version"    => $attribs["version"],
370
                "encoding"   => $attribs["encoding"],
371
                "standalone" => $attribs["standalone"],
372
                "depth"      => $this->_depth
373
            );
374
 
375
        } elseif (preg_match("/^<!DOCTYPE/i", $data)) {
376
            $this->_mode = "doctype";
377
            $struct      = array(
378
                "type"    => XML_BEAUTIFIER_DT_DECLARATION,
379
                "data"    => $data,
380
                "depth"   => $this->_depth
381
            );
382
 
383
        } else {
384
            /*
385
             * handle all other data
386
             */
387
            $struct = array(
388
                "type"    => XML_BEAUTIFIER_DEFAULT,
389
                "data"    => $data,
390
                "depth"   => $this->_depth
391
            );
392
        }
393
 
394
        if (!is_null($struct)) {
395
            $this->_appendToParent($struct);
396
        }
397
        return true;
398
    }
399
 
400
    /**
401
     * append a struct to the last struct on the stack
402
     *
403
     * @param array $struct structure to append
404
     *
405
     * @return bool
406
     * @access private
407
     */
408
    function _appendToParent($struct)
409
    {
410
        if ($this->_depth > 0) {
411
            $parent = array_pop($this->_struct);
412
            array_push($parent["children"], $struct);
413
            $parent["contains"] = $parent["contains"] | $struct["type"];
414
            array_push($this->_struct, $parent);
415
            return true;
416
        }
417
        array_push($this->_struct, $struct);
418
    }
419
 
420
    /**
421
     * get the last token
422
     *
423
     * @access   private
424
     * @return   array
425
     */
426
    function _getLastToken()
427
    {
428
        $parent = array_pop($this->_struct);
429
        if (isset($parent["children"]) && is_array($parent["children"])) {
430
            $last = array_pop($parent["children"]);
431
            array_push($parent["children"], $last);
432
        } else {
433
            $last = $parent;
434
        }
435
        array_push($this->_struct, $parent);
436
 
437
        return $last;
438
    }
439
 
440
    /**
441
     * reset all used object properties
442
     *
443
     * This method is called before parsing a new document
444
     *
445
     * @return void
446
     * @access private
447
     */
448
    function _resetVars()
449
    {
450
        $this->_depth          = 0;
451
        $this->_struct         = array();
452
        $this->_mode           = "xml";
453
        $this->_inCDataSection = false;
454
    }
455
}
456
?>