Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
/* vim: set noai expandtab ts=4 st=4 sw=4: */
3
 
4
/**
5
 * Abstract class providing common functions to File_Sitemap related classes.
6
 *
7
 * PHP versions 5
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions are met:
11
 *
12
 *  * Redistributions of source code must retain the above copyright notice,
13
 *    this list of conditions and the following disclaimer.
14
 *  * Redistributions in binary form must reproduce the above copyright notice,
15
 *    this list of conditions and the following disclaimer in the documentation
16
 *    and/or other materials provided with the distribution.
17
 *  * The names of its contributors may not be used to endorse or promote
18
 *    products derived from this software without specific prior written
19
 *    permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31
 * POSSIBILITY OF SUCH DAMAGE.
32
 *
33
 * @category File
34
 * @package  File_Sitemap
35
 * @author   Charles Brunet <cbrunet@php.net>
36
 * @license  http://www.opensource.org/licenses/bsd-license.html BSD License
37
 * @version  CVS: $Id: Base.php 299129 2010-05-07 22:21:04Z cbrunet $
38
 * @link     http://pear.php.net/package/File_Sitemap
39
 */
40
 
41
require_once "File/Sitemap/Exception.php";
42
 
43
/**
44
 * Abstract class providing common functions to File_Sitemap related classes.
45
 *
46
 * @category File
47
 * @package  File_Sitemap
48
 * @author   Charles Brunet <cbrunet@php.net>
49
 * @license  http://www.opensource.org/licenses/bsd-license.html BSD License
50
 * @version  Release: 0.1.2
51
 * @link     http://pear.php.net/package/File_Sitemap
52
 */
53
abstract class File_Sitemap_Base
54
{
55
 
56
    /**
57
     * The internal DOMDocument used by this class
58
     *
59
     * @var DOMDocument
60
     */
61
    protected $dom;
62
 
63
    /**
64
     * XML namespace
65
     */
66
    const XMLNS = 'http://www.sitemaps.org/schemas/sitemap/0.9';
67
 
68
    /**
69
     * namespace of XMLSchema-instance
70
     */
71
    const XSI = 'http://www.w3.org/2001/XMLSchema-instance';
72
 
73
    /**
74
     * Constructor. Build an empty XML document, with xmlns and root element.
75
     *
76
     * @param string $root   Name of the root element
77
     * @param string $schema Location of the schema
78
     *
79
     * @return void
80
     */
81
    public function __construct($root, $schema)
82
    {
83
        $imp = new DomImplementation();
84
 
85
        $this->dom           = $imp->createDocument(self::XMLNS, $root);
86
        $this->dom->version  = '1.0';
87
        $this->dom->encoding = 'UTF-8';
88
        $attr                = $this->dom->createAttributeNS(
89
            self::XSI,
90
            'xsi:schemaLocation'
91
        );
92
        $attr->value         = self::XMLNS.' '.$schema;
93
        $this->dom->documentElement->appendChild($attr);
94
    }
95
 
96
    /**
97
     * Returns the DOMNode element which contains $url, or false if not found.
98
     *
99
     * @param string $url URL (loc) we are looking for.
100
     *
101
     * @return mixed DOMNode | false
102
     */
103
    protected function findLoc($url)
104
    {
105
        foreach ($this->dom->getElementsByTagNameNS(self::XMLNS, 'loc')
106
                as $urlElem) {
107
            if ($urlElem->nodeValue == $url) {
108
                return $urlElem->parentNode;
109
            }
110
        }
111
        return false;
112
    }
113
 
114
    /**
115
     * Set to $nodeValue the value of the $nodeName child of $urlNode.
116
     *
117
     * If $urlNode doen't have a $nodeName child, add it.
118
     *
119
     * @param DOMNode $urlNode  The parent of the node we want to update.
120
     * @param string  $nodeName The name of the node we want to update.
121
     * @param string  $nodeVal  The value we want to put into the node.
122
     *
123
     * @return void
124
     */
125
    protected function updateNode($urlNode, $nodeName, $nodeVal)
126
    {
127
        $beforeNode = null;
128
        switch ($nodeName) {
129
        case 'lastmod':
130
            $before = array('changefreq', 'priority');
131
            break;
132
        case 'changefreq':
133
            $before = array('priority');
134
            break;
135
        default:
136
            $before = array();
137
        }
138
 
139
        // replace old priority if it exists
140
        foreach ($urlNode->childNodes as $child) {
141
            if ($child->nodeName == $nodeName) {
142
                $child->nodeValue = $nodeVal;
143
                return;
144
            }
145
            if (is_null($beforeNode) && in_array($child->nodeName, $before)) {
146
                $beforeNode = $child;
147
            }
148
        }
149
 
150
        // node not found, we need to create it
151
        $elem = $this->dom->createElementNS(self::XMLNS, $nodeName, $nodeVal);
152
        if (is_null($beforeNode)) {
153
            $urlNode->appendChild($elem);
154
        } else {
155
            $urlNode->insertBefore($elem, $beforeNode);
156
        }
157
    }
158
 
159
    /**
160
     * Used as callback function to preg_replace_callback to urlencode char
161
     *
162
     * @param array $char $char[0] will be encoded
163
     *
164
     * @return string
165
     */
166
    private static function _myUrlEncode($char)
167
    {
168
        return rawurlencode($char[0]);
169
    }
170
 
171
    /**
172
     * Ensure url contains valid chars and isn't longer than 2048 chars.
173
     *
174
     * urlencode invalid chars. Convert invalid XML chars to entities.
175
     *
176
     * @param string $url The url we want to verify and encode.
177
     *
178
     * @return string
179
     *
180
     * @throws {@link File_Sitemap_Exception} URL doesn't begin with valid
181
     *    protocol (http, https, ftp) or encoded URL longer than 2048 chars.
182
     */
183
    protected function parseURL($url)
184
    {
185
        $protocols = array('http',
186
                'https',
187
                'ftp',
188
                );
189
 
190
        if (!preg_match('/^('.implode($protocols, ':\/\/|').':\/\/)/', $url)) {
191
            throw new File_Sitemap_Exception(
192
                    'URL must begin with a protocol ('.
193
                    implode($protocols, ', ').').',
194
                    File_Sitemap_Exception::PARSE_ERROR);
195
        }
196
 
197
        // encode XML special chars
198
        $url = strtr(
199
            $url, array(
200
                '&'=>'&amp;',
201
                '\''=>'&apos;',
202
                '"'=>'&quot;',
203
                '>'=>'&gt;',
204
                '<'=>'&lt;',
205
            )
206
        );
207
        // replace other chars with %nn form
208
        $url = preg_replace_callback(
209
            '/[^0-9a-zA-Z_'.
210
            ':\/?#\[\]@!$&\'()*+,;=%~.-]/',
211
            'File_Sitemap_Base::_myUrlEncode', $url
212
        );
213
 
214
        if (strlen($url) > 2048) {
215
            throw new File_Sitemap_Exception(
216
                'URL must not be longer than 2048 chars.',
217
                File_Sitemap_Exception::PARSE_ERROR
218
            );
219
        }
220
 
221
        return $url;
222
    }
223
 
224
    /**
225
     * Ensure that $datetime is a valid date time string
226
     *
227
     * If $datetime is conform to the spec, it is returned as is.
228
     * Else we try to decode it using strtotime function.
229
     *
230
     * @param string $datetime The date (and time) to pase.
231
     *
232
     * @return string
233
     *
234
     * @see http://www.w3.org/TR/NOTE-datetime
235
     * @throws {@link File_Sitemap_Exception} Indalid date / time format.
236
     */
237
    protected function parseDateTime($datetime)
238
    {
239
        $valid = preg_match(
240
            '/^\d{4}(-\d{2}(-\d{2}(T\d{2}:\d{2}(:\d{2}(\.\d+)?)?'.
241
            '([+-]\d{2}:\d{2}|Z))?)?)?$/',
242
            $datetime
243
        );
244
        if ($valid) {
245
            return $datetime;
246
        }
247
 
248
        // Try to convert it
249
        $timestamp = @strtotime($datetime);
250
        if ($timestamp === false) {
251
            throw new File_Sitemap_Exception(
252
                    'unable to parse date time string.',
253
                    File_Sitemap_Exception::PARSE_ERROR);
254
        }
255
        $datetime = date('Y-m-d\TH:i:sP', $timestamp);
256
        return $datetime;
257
    }
258
 
259
    /**
260
     * Remove DOMNode that contains url $loc from the document.
261
     *
262
     * @param string $loc URL to remove
263
     *
264
     * @return void
265
     */
266
    public function remove($loc)
267
    {
268
        $loc     = $this->parseURL($loc);
269
        $urlNode = $this->findLoc($loc);
270
        if ($urlNode !== false) {
271
            $this->dom->documentElement->removeChild($urlNode);
272
        }
273
    }
274
 
275
    /**
276
     * Load sitemap from file. The file can be gzipped or not.
277
     *
278
     * @param string $file Filename (or URL).
279
     *
280
     * @return void
281
     *
282
     * @throws {@link File_Sitemap_Exception} File read error.
283
     */
284
    public function load($file)
285
    {
286
        if (substr($file, -2) == 'gz') {
287
            $gzfile = gzopen($file, 'r');
288
            if ($gzfile === false) {
289
                throw new File_Sitemap_Exception(
290
                        'error opening gziped sitemap file.',
291
                        File_Sitemap_Exception::FILE_ERROR);
292
            }
293
            $xml = '';
294
            while (!gzeof($gzfile)) {
295
                $xml .= gzread($gzfile, 10000);
296
            }
297
            gzclose($gzfile);
298
            $this->dom->loadXML($xml);
299
        } else {
300
            $this->dom->load($file);
301
        }
302
    }
303
 
304
    /**
305
     * Save sitemap to file.
306
     *
307
     * @param string  $file         Filename (or URL), including path.
308
     * @param boolean $compress     gzip the file? Default true.
309
     * @param boolean $formatOutput Nice format XML. Default false.
310
     *
311
     * @return void
312
     *
313
     * @throws {@link File_Sitemap_Exception} File write error.
314
     */
315
    public function save($file, $compress = true, $formatOutput = false)
316
    {
317
        $this->dom->formatOutput = $formatOutput;
318
 
319
        if ($compress) {
320
            if (substr($file, -3) != '.gz') {
321
                $file .= '.gz';
322
            }
323
            $gzfile = gzopen($file, 'w9');
324
            if ($gzfile === false) {
325
                throw new File_Sitemap_Exception(
326
                        'error saving gziped sitemap file.',
327
                        File_Sitemap_Exception::FILE_ERROR);
328
            }
329
            gzwrite($gzfile, $this->dom->saveXML());
330
            gzclose($gzfile);
331
        } else {
332
            $this->dom->save($file);
333
        }
334
    }
335
 
336
    /**
337
     * Notify $site that a sitemap was updated at $url
338
     *
339
     * @param string $url  URL of the sitemap file (must be valid)
340
     * @param mixed  $site string | array. URL (or array of URL) of the search
341
     *    engine ping site
342
     *
343
     * @return void
344
     *
345
     * @throws {@link File_Sitemap_Exception} Sitemap file not reachable
346
     *    or ping site error.
347
     */
348
    public function notify($url,
349
        $site = 'http://www.google.com/webmasters/sitemaps/ping'
350
    ) {
351
        $this->_includeHTTPRequest();
352
 
353
        // check that $url exists
354
        $req = new HTTP_Request('');
355
        $req->setURL($url);
356
        $req->sendRequest();
357
        $code = $req->getResponseCode();
358
 
359
        switch ($code) {
360
        case 200:
361
            // Everything ok!
362
            break;
363
        default:
364
            throw new File_Sitemap_Exception(
365
                    'Cannot reach sitemap file. Error: '.$code,
366
                    File_Sitemap_Exception::ERROR + $code);
367
        }
368
 
369
        // Ping the web search engine
370
        if (!is_array($site)) {
371
            $site = array($site);
372
        }
373
 
374
        $req->setMethod(HTTP_REQUEST_METHOD_GET);
375
        foreach ($site as $s) {
376
            $req->setURL($s);
377
            $req->addQueryString('sitemap', $url);
378
            $req->sendRequest();
379
            $code = $req->getResponseCode();
380
 
381
            if ($code != 200) {
382
                throw new File_Sitemap_Exception(
383
                        'Cannot reach '.$s.'. Error: '.$code,
384
                    File_Sitemap_Exception::ERROR + $code);
385
            }
386
        }
387
    }
388
 
389
    /**
390
     * Test that all url in sitemap are valid URL
391
     *
392
     * @param array &$results An array that will contains result codes.
393
     *    key is the url, value is the response code (200, 302, 404, etc.)
394
     *
395
     * @return boolean true if all URLs reached
396
     */
397
    public function test(&$results = array())
398
    {
399
        $this->_includeHTTPRequest();
400
 
401
        $req   = new HTTP_Request('');
402
        $allok = true;
403
 
404
        $urllist = $this->dom->getElementsByTagNameNS(self::XMLNS, 'loc');
405
        foreach ($urllist as $urlnode) {
406
            $url = html_entity_decode($urlnode->nodeValue);
407
            $req->setURL($url);
408
            $req->sendRequest();
409
            $code          = $req->getResponseCode();
410
            $results[$url] = $code;
411
            if ($code >= 400) {
412
                $allok = false;
413
            }
414
        }
415
        return $allok;
416
    }
417
 
418
    /**
419
     * Validate the sitemap document against DTD
420
     *
421
     * Be warned that it will issue some warnings if it doesn't validate.
422
     *
423
     * @param string $schema URL of the validating schema.
424
     *
425
     * @return boolean
426
     */
427
    public function validate($schema)
428
    {
429
        return $this->dom->schemaValidate($schema);
430
    }
431
 
432
    /**
433
     * Check for HTTP_Request package and include it
434
     *
435
     * @return void
436
     */
437
    private function _includeHTTPRequest()
438
    {
439
        static $included = false;
440
 
441
        if ($included) {
442
            return;
443
        }
444
 
445
        @include_once 'HTTP/Request.php';
446
 
447
        if (!class_exists('HTTP_Request')) {
448
            throw new File_Sitemap_Exception(
449
                    'HTTP_Request class not found.',
450
                File_Sitemap_Exception::ERROR);
451
        }
452
 
453
        $included = true;
454
    }
455
}
456
 
457
?>