Subversion-Projekte lars-tiefland.prado

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
    /**
3
     *	base include file for SimpleTest
4
     *	@package	SimpleTest
5
     *	@subpackage	WebTester
6
     *	@version	$Id: url.php 1532 2006-12-01 12:28:55Z xue $
7
     */
8
 
9
    /**#@+
10
     *	include other SimpleTest class files
11
     */
12
    require_once(dirname(__FILE__) . '/encoding.php');
13
    /**#@-*/
14
 
15
    /**
16
     *    URL parser to replace parse_url() PHP function which
17
     *    got broken in PHP 4.3.0. Adds some browser specific
18
     *    functionality such as expandomatics.
19
     *    Guesses a bit trying to separate the host from
20
     *    the path and tries to keep a raw, possibly unparsable,
21
     *    request string as long as possible.
22
	 *    @package SimpleTest
23
	 *    @subpackage WebTester
24
     */
25
    class SimpleUrl {
26
        protected $_scheme;
27
        protected $_username;
28
        protected $_password;
29
        protected $_host;
30
        protected $_port;
31
        protected $_path;
32
        protected $_request;
33
        protected $_fragment;
34
        protected $_x;
35
        protected $_y;
36
        protected $_target;
37
        protected $_raw = false;
38
 
39
        /**
40
         *    Constructor. Parses URL into sections.
41
         *    @param string $url        Incoming URL.
42
         *    @access public
43
         */
44
        function SimpleUrl($url) {
45
            list($x, $y) = $this->_chompCoordinates($url);
46
            $this->setCoordinates($x, $y);
47
            $this->_scheme = $this->_chompScheme($url);
48
            list($this->_username, $this->_password) = $this->_chompLogin($url);
49
            $this->_host = $this->_chompHost($url);
50
            $this->_port = false;
51
            if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {
52
                $this->_host = $host_parts[1];
53
                $this->_port = (integer)$host_parts[2];
54
            }
55
            $this->_path = $this->_chompPath($url);
56
            $this->_request = $this->_parseRequest($this->_chompRequest($url));
57
            $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
58
            $this->_target = false;
59
        }
60
 
61
        /**
62
         *    Extracts the X, Y coordinate pair from an image map.
63
         *    @param string $url   URL so far. The coordinates will be
64
         *                         removed.
65
         *    @return array        X, Y as a pair of integers.
66
         *    @access private
67
         */
68
        function _chompCoordinates($url) {
69
            if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
70
                $url = $matches[1];
71
                return array((integer)$matches[2], (integer)$matches[3]);
72
            }
73
            return array(false, false);
74
        }
75
 
76
        /**
77
         *    Extracts the scheme part of an incoming URL.
78
         *    @param string $url   URL so far. The scheme will be
79
         *                         removed.
80
         *    @return string       Scheme part or false.
81
         *    @access private
82
         */
83
        function _chompScheme($url) {
84
            if (preg_match('/(.*?):(\/\/)(.*)/', $url, $matches)) {
85
                $url = $matches[2] . $matches[3];
86
                return $matches[1];
87
            }
88
            return false;
89
        }
90
 
91
        /**
92
         *    Extracts the username and password from the
93
         *    incoming URL. The // prefix will be reattached
94
         *    to the URL after the doublet is extracted.
95
         *    @param string $url    URL so far. The username and
96
         *                          password are removed.
97
         *    @return array         Two item list of username and
98
         *                          password. Will urldecode() them.
99
         *    @access private
100
         */
101
        function _chompLogin($url) {
102
            $prefix = '';
103
            if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
104
                $prefix = $matches[1];
105
                $url = $matches[2];
106
            }
107
            if (preg_match('/(.*?)@(.*)/', $url, $matches)) {
108
                $url = $prefix . $matches[2];
109
                $parts = split(":", $matches[1]);
110
                return array(
111
                        urldecode($parts[0]),
112
                        isset($parts[1]) ? urldecode($parts[1]) : false);
113
            }
114
            $url = $prefix . $url;
115
            return array(false, false);
116
        }
117
 
118
        /**
119
         *    Extracts the host part of an incoming URL.
120
         *    Includes the port number part. Will extract
121
         *    the host if it starts with // or it has
122
         *    a top level domain or it has at least two
123
         *    dots.
124
         *    @param string $url    URL so far. The host will be
125
         *                          removed.
126
         *    @return string        Host part guess or false.
127
         *    @access private
128
         */
129
        function _chompHost($url) {
130
            if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
131
                $url = $matches[3];
132
                return $matches[2];
133
            }
134
            if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
135
                $tlds = SimpleUrl::getAllTopLevelDomains();
136
                if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
137
                    $url = $matches[2] . $matches[3];
138
                    return $matches[1];
139
                } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
140
                    $url = $matches[2] . $matches[3];
141
                    return $matches[1];
142
                }
143
            }
144
            return false;
145
        }
146
 
147
        /**
148
         *    Extracts the path information from the incoming
149
         *    URL. Strips this path from the URL.
150
         *    @param string $url     URL so far. The host will be
151
         *                           removed.
152
         *    @return string         Path part or '/'.
153
         *    @access private
154
         */
155
        function _chompPath($url) {
156
            if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
157
                $url = $matches[2] . $matches[3];
158
                return ($matches[1] ? $matches[1] : '');
159
            }
160
            return '';
161
        }
162
 
163
        /**
164
         *    Strips off the request data.
165
         *    @param string $url  URL so far. The request will be
166
         *                        removed.
167
         *    @return string      Raw request part.
168
         *    @access private
169
         */
170
        function _chompRequest($url) {
171
            if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
172
                $url = $matches[2] . $matches[3];
173
                return $matches[1];
174
            }
175
            return '';
176
        }
177
 
178
        /**
179
         *    Breaks the request down into an object.
180
         *    @param string $raw           Raw request.
181
         *    @return SimpleFormEncoding    Parsed data.
182
         *    @access private
183
         */
184
        function _parseRequest($raw) {
185
            $this->_raw = $raw;
186
            $request = new SimpleGetEncoding();
187
            foreach (split("&", $raw) as $pair) {
188
                if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
189
                    $request->add($matches[1], urldecode($matches[2]));
190
                } elseif ($pair) {
191
                    $request->add($pair, '');
192
                }
193
            }
194
            return $request;
195
        }
196
 
197
        /**
198
         *    Accessor for protocol part.
199
         *    @param string $default    Value to use if not present.
200
         *    @return string            Scheme name, e.g "http".
201
         *    @access public
202
         */
203
        function getScheme($default = false) {
204
            return $this->_scheme ? $this->_scheme : $default;
205
        }
206
 
207
        /**
208
         *    Accessor for user name.
209
         *    @return string    Username preceding host.
210
         *    @access public
211
         */
212
        function getUsername() {
213
            return $this->_username;
214
        }
215
 
216
        /**
217
         *    Accessor for password.
218
         *    @return string    Password preceding host.
219
         *    @access public
220
         */
221
        function getPassword() {
222
            return $this->_password;
223
        }
224
 
225
        /**
226
         *    Accessor for hostname and port.
227
         *    @param string $default    Value to use if not present.
228
         *    @return string            Hostname only.
229
         *    @access public
230
         */
231
        function getHost($default = false) {
232
            return $this->_host ? $this->_host : $default;
233
        }
234
 
235
        /**
236
         *    Accessor for top level domain.
237
         *    @return string       Last part of host.
238
         *    @access public
239
         */
240
        function getTld() {
241
            $path_parts = pathinfo($this->getHost());
242
            return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
243
        }
244
 
245
        /**
246
         *    Accessor for port number.
247
         *    @return integer    TCP/IP port number.
248
         *    @access public
249
         */
250
        function getPort() {
251
            return $this->_port;
252
        }
253
 
254
       /**
255
         *    Accessor for path.
256
         *    @return string    Full path including leading slash if implied.
257
         *    @access public
258
         */
259
        function getPath() {
260
            if (! $this->_path && $this->_host) {
261
                return '/';
262
            }
263
            return $this->_path;
264
        }
265
 
266
        /**
267
         *    Accessor for page if any. This may be a
268
         *    directory name if ambiguious.
269
         *    @return            Page name.
270
         *    @access public
271
         */
272
        function getPage() {
273
            if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
274
                return false;
275
            }
276
            return $matches[1];
277
        }
278
 
279
        /**
280
         *    Gets the path to the page.
281
         *    @return string       Path less the page.
282
         *    @access public
283
         */
284
        function getBasePath() {
285
            if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
286
                return false;
287
            }
288
            return $matches[1];
289
        }
290
 
291
        /**
292
         *    Accessor for fragment at end of URL after the "#".
293
         *    @return string    Part after "#".
294
         *    @access public
295
         */
296
        function getFragment() {
297
            return $this->_fragment;
298
        }
299
 
300
        /**
301
         *    Sets image coordinates. Set to false to clear
302
         *    them.
303
         *    @param integer $x    Horizontal position.
304
         *    @param integer $y    Vertical position.
305
         *    @access public
306
         */
307
        function setCoordinates($x = false, $y = false) {
308
            if (($x === false) || ($y === false)) {
309
                $this->_x = $this->_y = false;
310
                return;
311
            }
312
            $this->_x = (integer)$x;
313
            $this->_y = (integer)$y;
314
        }
315
 
316
        /**
317
         *    Accessor for horizontal image coordinate.
318
         *    @return integer        X value.
319
         *    @access public
320
         */
321
        function getX() {
322
            return $this->_x;
323
        }
324
 
325
        /**
326
         *    Accessor for vertical image coordinate.
327
         *    @return integer        Y value.
328
         *    @access public
329
         */
330
        function getY() {
331
            return $this->_y;
332
        }
333
 
334
        /**
335
         *    Accessor for current request parameters
336
         *    in URL string form. Will return teh original request
337
         *    if at all possible even if it doesn't make much
338
         *    sense.
339
         *    @return string   Form is string "?a=1&b=2", etc.
340
         *    @access public
341
         */
342
        function getEncodedRequest() {
343
            if ($this->_raw) {
344
                $encoded = $this->_raw;
345
            } else {
346
                $encoded = $this->_request->asUrlRequest();
347
            }
348
            if ($encoded) {
349
                return '?' . preg_replace('/^\?/', '', $encoded);
350
            }
351
            return '';
352
        }
353
 
354
        /**
355
         *    Adds an additional parameter to the request.
356
         *    @param string $key            Name of parameter.
357
         *    @param string $value          Value as string.
358
         *    @access public
359
         */
360
        function addRequestParameter($key, $value) {
361
            $this->_raw = false;
362
            $this->_request->add($key, $value);
363
        }
364
 
365
        /**
366
         *    Adds additional parameters to the request.
367
         *    @param hash/SimpleFormEncoding $parameters   Additional
368
         *                                                parameters.
369
         *    @access public
370
         */
371
        function addRequestParameters($parameters) {
372
            $this->_raw = false;
373
            $this->_request->merge($parameters);
374
        }
375
 
376
        /**
377
         *    Clears down all parameters.
378
         *    @access public
379
         */
380
        function clearRequest() {
381
            $this->_raw = false;
382
            $this->_request = new SimpleGetEncoding();
383
        }
384
 
385
        /**
386
         *    Gets the frame target if present. Although
387
         *    not strictly part of the URL specification it
388
         *    acts as similarily to the browser.
389
         *    @return boolean/string    Frame name or false if none.
390
         *    @access public
391
         */
392
        function getTarget() {
393
            return $this->_target;
394
        }
395
 
396
        /**
397
         *    Attaches a frame target.
398
         *    @param string $frame        Name of frame.
399
         *    @access public
400
         */
401
        function setTarget($frame) {
402
            $this->_raw = false;
403
            $this->_target = $frame;
404
        }
405
 
406
        /**
407
         *    Renders the URL back into a string.
408
         *    @return string        URL in canonical form.
409
         *    @access public
410
         */
411
        function asString() {
412
            $scheme = $identity = $host = $path = $encoded = $fragment = '';
413
            if ($this->_username && $this->_password) {
414
                $identity = $this->_username . ':' . $this->_password . '@';
415
            }
416
            if ($this->getHost()) {
417
                $scheme = $this->getScheme() ? $this->getScheme() : 'http';
418
                $host = $this->getHost();
419
            }
420
            if (substr($this->_path, 0, 1) == '/') {
421
                $path = $this->normalisePath($this->_path);
422
            }
423
            $encoded = $this->getEncodedRequest();
424
            $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
425
            $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
426
            return "$scheme://$identity$host$path$encoded$fragment$coords";
427
        }
428
 
429
        /**
430
         *    Replaces unknown sections to turn a relative
431
         *    URL into an absolute one. The base URL can
432
         *    be either a string or a SimpleUrl object.
433
         *    @param string/SimpleUrl $base       Base URL.
434
         *    @access public
435
         */
436
        function makeAbsolute($base) {
437
            if (! is_object($base)) {
438
                $base = new SimpleUrl($base);
439
            }
440
            $scheme = $this->getScheme() ? $this->getScheme() : $base->getScheme();
441
            if ($this->getHost()) {
442
                $host = $this->getHost();
443
                $port = $this->getPort() ? ':' . $this->getPort() : '';
444
                $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
445
                if (! $identity) {
446
                    $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
447
                }
448
            } else {
449
                $host = $base->getHost();
450
                $port = $base->getPort() ? ':' . $base->getPort() : '';
451
                $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
452
            }
453
            $path = $this->normalisePath($this->_extractAbsolutePath($base));
454
            $encoded = $this->getEncodedRequest();
455
            $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
456
            $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
457
            return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
458
        }
459
 
460
        /**
461
         *    Replaces unknown sections of the path with base parts
462
         *    to return a complete absolute one.
463
         *    @param string/SimpleUrl $base       Base URL.
464
         *    @param string                       Absolute path.
465
         *    @access private
466
         */
467
        function _extractAbsolutePath($base) {
468
            if ($this->getHost()) {
469
                return $this->_path;
470
            }
471
            if (! $this->_isRelativePath($this->_path)) {
472
                return $this->_path;
473
            }
474
            if ($this->_path) {
475
                return $base->getBasePath() . $this->_path;
476
            }
477
            return $base->getPath();
478
        }
479
 
480
        /**
481
         *    Simple test to see if a path part is relative.
482
         *    @param string $path        Path to test.
483
         *    @return boolean            True if starts with a "/".
484
         *    @access private
485
         */
486
        function _isRelativePath($path) {
487
            return (substr($path, 0, 1) != '/');
488
        }
489
 
490
        /**
491
         *    Extracts the username and password for use in rendering
492
         *    a URL.
493
         *    @return string/boolean    Form of username:password or false.
494
         *    @access public
495
         */
496
        function getIdentity() {
497
            if ($this->_username && $this->_password) {
498
                return $this->_username . ':' . $this->_password;
499
            }
500
            return false;
501
        }
502
 
503
        /**
504
         *    Replaces . and .. sections of the path.
505
         *    @param string $path    Unoptimised path.
506
         *    @return string         Path with dots removed if possible.
507
         *    @access public
508
         */
509
        function normalisePath($path) {
510
            $path = preg_replace('|/[^/]+/\.\./|', '/', $path);
511
            return preg_replace('|/\./|', '/', $path);
512
        }
513
 
514
        /**
515
         *    A pipe seperated list of all TLDs that result in two part
516
         *    domain names.
517
         *    @return string        Pipe separated list.
518
         *    @access public
519
         *    @static
520
         */
521
        static function getAllTopLevelDomains() {
522
            return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
523
        }
524
    }
525
?>