Blame | Letzte Änderung | Log anzeigen | RSS feed
<?php/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: *//*** XML_Beautifier/Tokenizer** XML Beautifier package's Tokenizer** PHP versions 4 and 5** LICENSE:** Copyright (c) 2003-2008 Stephan Schmidt <schst@php.net>* All rights reserved.** Redistribution and use in source and binary forms, with or without* modification, are permitted provided that the following conditions* are met:** * Redistributions of source code must retain the above copyright* notice, this list of conditions and the following disclaimer.* * Redistributions in binary form must reproduce the above copyright* notice, this list of conditions and the following disclaimer in the* documentation and/or other materials provided with the distribution.* * The name of the author may not be used to endorse or promote products* derived from this software without specific prior written permission.** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.** @category XML* @package XML_Beautifier* @author Stephan Schmidt <schst@php.net>* @copyright 2003-2008 Stephan Schmidt <schst@php.net>* @license http://opensource.org/licenses/bsd-license New BSD License* @version CVS: $Id: Tokenizer.php 302598 2010-08-21 03:03:58Z clockwerx $* @link http://pear.php.net/package/XML_Beautifier*//*** XML_Parser is needed to parse the document*/require_once 'XML/Parser.php';/*** Tokenizer for XML_Beautifier** This class breaks an XML document in seperate tokens* that will be rendered by an XML_Beautifier renderer.** @category XML* @package XML_Beautifier* @author Stephan Schmidt <schst@php.net>* @copyright 2003-2008 Stephan Schmidt <schst@php.net>* @license http://opensource.org/licenses/bsd-license New BSD License* @version Release: @package_version@* @link http://pear.php.net/package/XML_Beautifier* @todo tokenize DTD* @todo check for xml:space attribute*/class XML_Beautifier_Tokenizer extends XML_Parser{/*** current depth* @var integer* @access private*/var $_depth = 0;/*** stack for all found elements* @var array* @access private*/var $_struct = array();/*** current parsing mode* @var string* @access private*/var $_mode = "xml";/*** indicates, whether parser is in cdata section* @var boolean* @access private*/var $_inCDataSection = false;/*** Tokenize a document** @param string $document filename or XML document* @param boolean $isFile flag to indicate whether* the first parameter is a file** @return mixed*/function tokenize($document, $isFile = true){$this->folding = false;$this->XML_Parser();$this->_resetVars();if ($isFile === true) {$this->setInputFile($document);$result = $this->parse();} else {$result = $this->parseString($document);}if ($this->isError($result)) {return $result;}return $this->_struct;}/*** Start element handler for XML parser** @param object $parser XML parser object* @param string $element XML element* @param array $attribs attributes of XML tag** @return void* @access protected*/function startHandler($parser, $element, $attribs){$struct = array("type" => XML_BEAUTIFIER_ELEMENT,"tagname" => $element,"attribs" => $attribs,"contains" => XML_BEAUTIFIER_EMPTY,"depth" => $this->_depth++,"children" => array());array_push($this->_struct, $struct);}/*** End element handler for XML parser** @param object $parser XML parser object* @param string $element element** @return void* @access protected*/function endHandler($parser, $element){$struct = array_pop($this->_struct);if ($struct["depth"] > 0) {$parent = array_pop($this->_struct);array_push($parent["children"], $struct);$parent["contains"] = $parent["contains"] | XML_BEAUTIFIER_ELEMENT;array_push($this->_struct, $parent);} else {array_push($this->_struct, $struct);}$this->_depth--;}/*** Handler for character data** @param object $parser XML parser object* @param string $cdata CDATA** @return void* @access protected*/function cdataHandler($parser, $cdata){if ((string)$cdata === '') {return true;}if ($this->_inCDataSection === true) {$type = XML_BEAUTIFIER_CDATA_SECTION;} else {$type = XML_BEAUTIFIER_CDATA;}$struct = array("type" => $type,"data" => $cdata,"depth" => $this->_depth);$this->_appendToParent($struct);}/*** Handler for processing instructions** @param object $parser XML parser object* @param string $target target* @param string $data data** @return void* @access protected*/function piHandler($parser, $target, $data){$struct = array("type" => XML_BEAUTIFIER_PI,"target" => $target,"data" => $data,"depth" => $this->_depth);$this->_appendToParent($struct);}/*** Handler for external entities** @param object $parser XML parser object* @param string $open_entity_names entity name* @param string $base ?? (unused?)* @param string $system_id ?? (unused?)* @param string $public_id ?? (unused?)** @return bool* @access protected* @todo revisit parameter signature... doesn't seem to be correct* @todo PEAR CS - need to shorten arg list for 85-char rule*/function entityrefHandler($parser, $open_entity_names, $base, $system_id, $public_id){$struct = array("type" => XML_BEAUTIFIER_ENTITY,"name" => $open_entity_names,"depth" => $this->_depth);$this->_appendToParent($struct);return true;}/*** Handler for all other stuff** @param object $parser XML parser object* @param string $data data** @return void* @access protected*/function defaultHandler($parser, $data){switch ($this->_mode) {case "xml":$this->_handleXMLDefault($data);break;case "doctype":$this->_handleDoctype($data);break;}}/*** handler for all data inside the doctype declaration** @param string $data data** @return void* @access private* @todo improve doctype parsing to split the declaration into seperate tokens*/function _handleDoctype($data){if (preg_match("/>/i", $data)) {$last = $this->_getLastToken();if ($last["data"] == "]" ) {$this->_mode = "xml";}}$struct = array("type" => XML_BEAUTIFIER_DT_DECLARATION,"data" => $data,"depth" => $this->_depth);$this->_appendToParent($struct);}/*** handler for all default XML data** @param string $data data** @return bool* @access private*/function _handleXMLDefault($data){if (strncmp("<!--", $data, 4) == 0) {/** handle comment*/$regs = array();preg_match("/<!--(.+)-->/i", $data, $regs);$comment = trim($regs[1]);$struct = array("type" => XML_BEAUTIFIER_COMMENT,"data" => $comment,"depth" => $this->_depth);} elseif ($data == "<![CDATA[") {/** handle start of cdata section*/$this->_inCDataSection = true;$struct = null;} elseif ($data == "]]>") {/** handle end of cdata section*/$this->_inCDataSection = false;$struct = null;} elseif (strncmp("<?", $data, 2) == 0) {/** handle XML declaration*/preg_match_all('/([a-zA-Z_]+)="((?:\\\.|[^"\\\])*)"/', $data, $match);$cnt = count($match[1]);$attribs = array();for ($i = 0; $i < $cnt; $i++) {$attribs[$match[1][$i]] = $match[2][$i];}if (!isset($attribs["version"])) {$attribs["version"] = "1.0";}if (!isset($attribs["encoding"])) {$attribs["encoding"] = "UTF-8";}if (!isset($attribs["standalone"])) {$attribs["standalone"] = true;} else {if ($attribs["standalone"] === 'yes') {$attribs["standalone"] = true;} else {$attribs["standalone"] = false;}}$struct = array("type" => XML_BEAUTIFIER_XML_DECLARATION,"version" => $attribs["version"],"encoding" => $attribs["encoding"],"standalone" => $attribs["standalone"],"depth" => $this->_depth);} elseif (preg_match("/^<!DOCTYPE/i", $data)) {$this->_mode = "doctype";$struct = array("type" => XML_BEAUTIFIER_DT_DECLARATION,"data" => $data,"depth" => $this->_depth);} else {/** handle all other data*/$struct = array("type" => XML_BEAUTIFIER_DEFAULT,"data" => $data,"depth" => $this->_depth);}if (!is_null($struct)) {$this->_appendToParent($struct);}return true;}/*** append a struct to the last struct on the stack** @param array $struct structure to append** @return bool* @access private*/function _appendToParent($struct){if ($this->_depth > 0) {$parent = array_pop($this->_struct);array_push($parent["children"], $struct);$parent["contains"] = $parent["contains"] | $struct["type"];array_push($this->_struct, $parent);return true;}array_push($this->_struct, $struct);}/*** get the last token** @access private* @return array*/function _getLastToken(){$parent = array_pop($this->_struct);if (isset($parent["children"]) && is_array($parent["children"])) {$last = array_pop($parent["children"]);array_push($parent["children"], $last);} else {$last = $parent;}array_push($this->_struct, $parent);return $last;}/*** reset all used object properties** This method is called before parsing a new document** @return void* @access private*/function _resetVars(){$this->_depth = 0;$this->_struct = array();$this->_mode = "xml";$this->_inCDataSection = false;}}?>