Blame | Letzte Änderung | Log anzeigen | RSS feed
<?php/* vim: set noai expandtab ts=4 st=4 sw=4: *//*** Abstract class providing common functions to File_Sitemap related classes.** PHP versions 5** Redistribution and use in source and binary forms, with or without* modification, are permitted provided that the following conditions are met:** * Redistributions of source code must retain the above copyright notice,* this list of conditions and the following disclaimer.* * Redistributions in binary form must reproduce the above copyright notice,* this list of conditions and the following disclaimer in the documentation* and/or other materials provided with the distribution.* * The names of its contributors may not be used to endorse or promote* products derived from this software without specific prior written* permission.** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE* POSSIBILITY OF SUCH DAMAGE.** @category File* @package File_Sitemap* @author Charles Brunet <cbrunet@php.net>* @license http://www.opensource.org/licenses/bsd-license.html BSD License* @version CVS: $Id: Base.php 299129 2010-05-07 22:21:04Z cbrunet $* @link http://pear.php.net/package/File_Sitemap*/require_once "File/Sitemap/Exception.php";/*** Abstract class providing common functions to File_Sitemap related classes.** @category File* @package File_Sitemap* @author Charles Brunet <cbrunet@php.net>* @license http://www.opensource.org/licenses/bsd-license.html BSD License* @version Release: 0.1.2* @link http://pear.php.net/package/File_Sitemap*/abstract class File_Sitemap_Base{/*** The internal DOMDocument used by this class** @var DOMDocument*/protected $dom;/*** XML namespace*/const XMLNS = 'http://www.sitemaps.org/schemas/sitemap/0.9';/*** namespace of XMLSchema-instance*/const XSI = 'http://www.w3.org/2001/XMLSchema-instance';/*** Constructor. Build an empty XML document, with xmlns and root element.** @param string $root Name of the root element* @param string $schema Location of the schema** @return void*/public function __construct($root, $schema){$imp = new DomImplementation();$this->dom = $imp->createDocument(self::XMLNS, $root);$this->dom->version = '1.0';$this->dom->encoding = 'UTF-8';$attr = $this->dom->createAttributeNS(self::XSI,'xsi:schemaLocation');$attr->value = self::XMLNS.' '.$schema;$this->dom->documentElement->appendChild($attr);}/*** Returns the DOMNode element which contains $url, or false if not found.** @param string $url URL (loc) we are looking for.** @return mixed DOMNode | false*/protected function findLoc($url){foreach ($this->dom->getElementsByTagNameNS(self::XMLNS, 'loc')as $urlElem) {if ($urlElem->nodeValue == $url) {return $urlElem->parentNode;}}return false;}/*** Set to $nodeValue the value of the $nodeName child of $urlNode.** If $urlNode doen't have a $nodeName child, add it.** @param DOMNode $urlNode The parent of the node we want to update.* @param string $nodeName The name of the node we want to update.* @param string $nodeVal The value we want to put into the node.** @return void*/protected function updateNode($urlNode, $nodeName, $nodeVal){$beforeNode = null;switch ($nodeName) {case 'lastmod':$before = array('changefreq', 'priority');break;case 'changefreq':$before = array('priority');break;default:$before = array();}// replace old priority if it existsforeach ($urlNode->childNodes as $child) {if ($child->nodeName == $nodeName) {$child->nodeValue = $nodeVal;return;}if (is_null($beforeNode) && in_array($child->nodeName, $before)) {$beforeNode = $child;}}// node not found, we need to create it$elem = $this->dom->createElementNS(self::XMLNS, $nodeName, $nodeVal);if (is_null($beforeNode)) {$urlNode->appendChild($elem);} else {$urlNode->insertBefore($elem, $beforeNode);}}/*** Used as callback function to preg_replace_callback to urlencode char** @param array $char $char[0] will be encoded** @return string*/private static function _myUrlEncode($char){return rawurlencode($char[0]);}/*** Ensure url contains valid chars and isn't longer than 2048 chars.** urlencode invalid chars. Convert invalid XML chars to entities.** @param string $url The url we want to verify and encode.** @return string** @throws {@link File_Sitemap_Exception} URL doesn't begin with valid* protocol (http, https, ftp) or encoded URL longer than 2048 chars.*/protected function parseURL($url){$protocols = array('http','https','ftp',);if (!preg_match('/^('.implode($protocols, ':\/\/|').':\/\/)/', $url)) {throw new File_Sitemap_Exception('URL must begin with a protocol ('.implode($protocols, ', ').').',File_Sitemap_Exception::PARSE_ERROR);}// encode XML special chars$url = strtr($url, array('&'=>'&','\''=>''','"'=>'"','>'=>'>','<'=>'<',));// replace other chars with %nn form$url = preg_replace_callback('/[^0-9a-zA-Z_'.':\/?#\[\]@!$&\'()*+,;=%~.-]/','File_Sitemap_Base::_myUrlEncode', $url);if (strlen($url) > 2048) {throw new File_Sitemap_Exception('URL must not be longer than 2048 chars.',File_Sitemap_Exception::PARSE_ERROR);}return $url;}/*** Ensure that $datetime is a valid date time string** If $datetime is conform to the spec, it is returned as is.* Else we try to decode it using strtotime function.** @param string $datetime The date (and time) to pase.** @return string** @see http://www.w3.org/TR/NOTE-datetime* @throws {@link File_Sitemap_Exception} Indalid date / time format.*/protected function parseDateTime($datetime){$valid = preg_match('/^\d{4}(-\d{2}(-\d{2}(T\d{2}:\d{2}(:\d{2}(\.\d+)?)?'.'([+-]\d{2}:\d{2}|Z))?)?)?$/',$datetime);if ($valid) {return $datetime;}// Try to convert it$timestamp = @strtotime($datetime);if ($timestamp === false) {throw new File_Sitemap_Exception('unable to parse date time string.',File_Sitemap_Exception::PARSE_ERROR);}$datetime = date('Y-m-d\TH:i:sP', $timestamp);return $datetime;}/*** Remove DOMNode that contains url $loc from the document.** @param string $loc URL to remove** @return void*/public function remove($loc){$loc = $this->parseURL($loc);$urlNode = $this->findLoc($loc);if ($urlNode !== false) {$this->dom->documentElement->removeChild($urlNode);}}/*** Load sitemap from file. The file can be gzipped or not.** @param string $file Filename (or URL).** @return void** @throws {@link File_Sitemap_Exception} File read error.*/public function load($file){if (substr($file, -2) == 'gz') {$gzfile = gzopen($file, 'r');if ($gzfile === false) {throw new File_Sitemap_Exception('error opening gziped sitemap file.',File_Sitemap_Exception::FILE_ERROR);}$xml = '';while (!gzeof($gzfile)) {$xml .= gzread($gzfile, 10000);}gzclose($gzfile);$this->dom->loadXML($xml);} else {$this->dom->load($file);}}/*** Save sitemap to file.** @param string $file Filename (or URL), including path.* @param boolean $compress gzip the file? Default true.* @param boolean $formatOutput Nice format XML. Default false.** @return void** @throws {@link File_Sitemap_Exception} File write error.*/public function save($file, $compress = true, $formatOutput = false){$this->dom->formatOutput = $formatOutput;if ($compress) {if (substr($file, -3) != '.gz') {$file .= '.gz';}$gzfile = gzopen($file, 'w9');if ($gzfile === false) {throw new File_Sitemap_Exception('error saving gziped sitemap file.',File_Sitemap_Exception::FILE_ERROR);}gzwrite($gzfile, $this->dom->saveXML());gzclose($gzfile);} else {$this->dom->save($file);}}/*** Notify $site that a sitemap was updated at $url** @param string $url URL of the sitemap file (must be valid)* @param mixed $site string | array. URL (or array of URL) of the search* engine ping site** @return void** @throws {@link File_Sitemap_Exception} Sitemap file not reachable* or ping site error.*/public function notify($url,$site = 'http://www.google.com/webmasters/sitemaps/ping') {$this->_includeHTTPRequest();// check that $url exists$req = new HTTP_Request('');$req->setURL($url);$req->sendRequest();$code = $req->getResponseCode();switch ($code) {case 200:// Everything ok!break;default:throw new File_Sitemap_Exception('Cannot reach sitemap file. Error: '.$code,File_Sitemap_Exception::ERROR + $code);}// Ping the web search engineif (!is_array($site)) {$site = array($site);}$req->setMethod(HTTP_REQUEST_METHOD_GET);foreach ($site as $s) {$req->setURL($s);$req->addQueryString('sitemap', $url);$req->sendRequest();$code = $req->getResponseCode();if ($code != 200) {throw new File_Sitemap_Exception('Cannot reach '.$s.'. Error: '.$code,File_Sitemap_Exception::ERROR + $code);}}}/*** Test that all url in sitemap are valid URL** @param array &$results An array that will contains result codes.* key is the url, value is the response code (200, 302, 404, etc.)** @return boolean true if all URLs reached*/public function test(&$results = array()){$this->_includeHTTPRequest();$req = new HTTP_Request('');$allok = true;$urllist = $this->dom->getElementsByTagNameNS(self::XMLNS, 'loc');foreach ($urllist as $urlnode) {$url = html_entity_decode($urlnode->nodeValue);$req->setURL($url);$req->sendRequest();$code = $req->getResponseCode();$results[$url] = $code;if ($code >= 400) {$allok = false;}}return $allok;}/*** Validate the sitemap document against DTD** Be warned that it will issue some warnings if it doesn't validate.** @param string $schema URL of the validating schema.** @return boolean*/public function validate($schema){return $this->dom->schemaValidate($schema);}/*** Check for HTTP_Request package and include it** @return void*/private function _includeHTTPRequest(){static $included = false;if ($included) {return;}@include_once 'HTTP/Request.php';if (!class_exists('HTTP_Request')) {throw new File_Sitemap_Exception('HTTP_Request class not found.',File_Sitemap_Exception::ERROR);}$included = true;}}?>