Blame | Letzte Änderung | Log anzeigen | RSS feed
<?php/*** base include file for SimpleTest* @package SimpleTest* @subpackage WebTester* @version $Id: url.php 1532 2006-12-01 12:28:55Z xue $*//**#@+* include other SimpleTest class files*/require_once(dirname(__FILE__) . '/encoding.php');/**#@-*//*** URL parser to replace parse_url() PHP function which* got broken in PHP 4.3.0. Adds some browser specific* functionality such as expandomatics.* Guesses a bit trying to separate the host from* the path and tries to keep a raw, possibly unparsable,* request string as long as possible.* @package SimpleTest* @subpackage WebTester*/class SimpleUrl {protected $_scheme;protected $_username;protected $_password;protected $_host;protected $_port;protected $_path;protected $_request;protected $_fragment;protected $_x;protected $_y;protected $_target;protected $_raw = false;/*** Constructor. Parses URL into sections.* @param string $url Incoming URL.* @access public*/function SimpleUrl($url) {list($x, $y) = $this->_chompCoordinates($url);$this->setCoordinates($x, $y);$this->_scheme = $this->_chompScheme($url);list($this->_username, $this->_password) = $this->_chompLogin($url);$this->_host = $this->_chompHost($url);$this->_port = false;if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {$this->_host = $host_parts[1];$this->_port = (integer)$host_parts[2];}$this->_path = $this->_chompPath($url);$this->_request = $this->_parseRequest($this->_chompRequest($url));$this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);$this->_target = false;}/*** Extracts the X, Y coordinate pair from an image map.* @param string $url URL so far. The coordinates will be* removed.* @return array X, Y as a pair of integers.* @access private*/function _chompCoordinates($url) {if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {$url = $matches[1];return array((integer)$matches[2], (integer)$matches[3]);}return array(false, false);}/*** Extracts the scheme part of an incoming URL.* @param string $url URL so far. The scheme will be* removed.* @return string Scheme part or false.* @access private*/function _chompScheme($url) {if (preg_match('/(.*?):(\/\/)(.*)/', $url, $matches)) {$url = $matches[2] . $matches[3];return $matches[1];}return false;}/*** Extracts the username and password from the* incoming URL. The // prefix will be reattached* to the URL after the doublet is extracted.* @param string $url URL so far. The username and* password are removed.* @return array Two item list of username and* password. Will urldecode() them.* @access private*/function _chompLogin($url) {$prefix = '';if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {$prefix = $matches[1];$url = $matches[2];}if (preg_match('/(.*?)@(.*)/', $url, $matches)) {$url = $prefix . $matches[2];$parts = split(":", $matches[1]);return array(urldecode($parts[0]),isset($parts[1]) ? urldecode($parts[1]) : false);}$url = $prefix . $url;return array(false, false);}/*** Extracts the host part of an incoming URL.* Includes the port number part. Will extract* the host if it starts with // or it has* a top level domain or it has at least two* dots.* @param string $url URL so far. The host will be* removed.* @return string Host part guess or false.* @access private*/function _chompHost($url) {if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {$url = $matches[3];return $matches[2];}if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {$tlds = SimpleUrl::getAllTopLevelDomains();if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {$url = $matches[2] . $matches[3];return $matches[1];} elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {$url = $matches[2] . $matches[3];return $matches[1];}}return false;}/*** Extracts the path information from the incoming* URL. Strips this path from the URL.* @param string $url URL so far. The host will be* removed.* @return string Path part or '/'.* @access private*/function _chompPath($url) {if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {$url = $matches[2] . $matches[3];return ($matches[1] ? $matches[1] : '');}return '';}/*** Strips off the request data.* @param string $url URL so far. The request will be* removed.* @return string Raw request part.* @access private*/function _chompRequest($url) {if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {$url = $matches[2] . $matches[3];return $matches[1];}return '';}/*** Breaks the request down into an object.* @param string $raw Raw request.* @return SimpleFormEncoding Parsed data.* @access private*/function _parseRequest($raw) {$this->_raw = $raw;$request = new SimpleGetEncoding();foreach (split("&", $raw) as $pair) {if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {$request->add($matches[1], urldecode($matches[2]));} elseif ($pair) {$request->add($pair, '');}}return $request;}/*** Accessor for protocol part.* @param string $default Value to use if not present.* @return string Scheme name, e.g "http".* @access public*/function getScheme($default = false) {return $this->_scheme ? $this->_scheme : $default;}/*** Accessor for user name.* @return string Username preceding host.* @access public*/function getUsername() {return $this->_username;}/*** Accessor for password.* @return string Password preceding host.* @access public*/function getPassword() {return $this->_password;}/*** Accessor for hostname and port.* @param string $default Value to use if not present.* @return string Hostname only.* @access public*/function getHost($default = false) {return $this->_host ? $this->_host : $default;}/*** Accessor for top level domain.* @return string Last part of host.* @access public*/function getTld() {$path_parts = pathinfo($this->getHost());return (isset($path_parts['extension']) ? $path_parts['extension'] : false);}/*** Accessor for port number.* @return integer TCP/IP port number.* @access public*/function getPort() {return $this->_port;}/*** Accessor for path.* @return string Full path including leading slash if implied.* @access public*/function getPath() {if (! $this->_path && $this->_host) {return '/';}return $this->_path;}/*** Accessor for page if any. This may be a* directory name if ambiguious.* @return Page name.* @access public*/function getPage() {if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {return false;}return $matches[1];}/*** Gets the path to the page.* @return string Path less the page.* @access public*/function getBasePath() {if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {return false;}return $matches[1];}/*** Accessor for fragment at end of URL after the "#".* @return string Part after "#".* @access public*/function getFragment() {return $this->_fragment;}/*** Sets image coordinates. Set to false to clear* them.* @param integer $x Horizontal position.* @param integer $y Vertical position.* @access public*/function setCoordinates($x = false, $y = false) {if (($x === false) || ($y === false)) {$this->_x = $this->_y = false;return;}$this->_x = (integer)$x;$this->_y = (integer)$y;}/*** Accessor for horizontal image coordinate.* @return integer X value.* @access public*/function getX() {return $this->_x;}/*** Accessor for vertical image coordinate.* @return integer Y value.* @access public*/function getY() {return $this->_y;}/*** Accessor for current request parameters* in URL string form. Will return teh original request* if at all possible even if it doesn't make much* sense.* @return string Form is string "?a=1&b=2", etc.* @access public*/function getEncodedRequest() {if ($this->_raw) {$encoded = $this->_raw;} else {$encoded = $this->_request->asUrlRequest();}if ($encoded) {return '?' . preg_replace('/^\?/', '', $encoded);}return '';}/*** Adds an additional parameter to the request.* @param string $key Name of parameter.* @param string $value Value as string.* @access public*/function addRequestParameter($key, $value) {$this->_raw = false;$this->_request->add($key, $value);}/*** Adds additional parameters to the request.* @param hash/SimpleFormEncoding $parameters Additional* parameters.* @access public*/function addRequestParameters($parameters) {$this->_raw = false;$this->_request->merge($parameters);}/*** Clears down all parameters.* @access public*/function clearRequest() {$this->_raw = false;$this->_request = new SimpleGetEncoding();}/*** Gets the frame target if present. Although* not strictly part of the URL specification it* acts as similarily to the browser.* @return boolean/string Frame name or false if none.* @access public*/function getTarget() {return $this->_target;}/*** Attaches a frame target.* @param string $frame Name of frame.* @access public*/function setTarget($frame) {$this->_raw = false;$this->_target = $frame;}/*** Renders the URL back into a string.* @return string URL in canonical form.* @access public*/function asString() {$scheme = $identity = $host = $path = $encoded = $fragment = '';if ($this->_username && $this->_password) {$identity = $this->_username . ':' . $this->_password . '@';}if ($this->getHost()) {$scheme = $this->getScheme() ? $this->getScheme() : 'http';$host = $this->getHost();}if (substr($this->_path, 0, 1) == '/') {$path = $this->normalisePath($this->_path);}$encoded = $this->getEncodedRequest();$fragment = $this->getFragment() ? '#'. $this->getFragment() : '';$coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();return "$scheme://$identity$host$path$encoded$fragment$coords";}/*** Replaces unknown sections to turn a relative* URL into an absolute one. The base URL can* be either a string or a SimpleUrl object.* @param string/SimpleUrl $base Base URL.* @access public*/function makeAbsolute($base) {if (! is_object($base)) {$base = new SimpleUrl($base);}$scheme = $this->getScheme() ? $this->getScheme() : $base->getScheme();if ($this->getHost()) {$host = $this->getHost();$port = $this->getPort() ? ':' . $this->getPort() : '';$identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';if (! $identity) {$identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';}} else {$host = $base->getHost();$port = $base->getPort() ? ':' . $base->getPort() : '';$identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';}$path = $this->normalisePath($this->_extractAbsolutePath($base));$encoded = $this->getEncodedRequest();$fragment = $this->getFragment() ? '#'. $this->getFragment() : '';$coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");}/*** Replaces unknown sections of the path with base parts* to return a complete absolute one.* @param string/SimpleUrl $base Base URL.* @param string Absolute path.* @access private*/function _extractAbsolutePath($base) {if ($this->getHost()) {return $this->_path;}if (! $this->_isRelativePath($this->_path)) {return $this->_path;}if ($this->_path) {return $base->getBasePath() . $this->_path;}return $base->getPath();}/*** Simple test to see if a path part is relative.* @param string $path Path to test.* @return boolean True if starts with a "/".* @access private*/function _isRelativePath($path) {return (substr($path, 0, 1) != '/');}/*** Extracts the username and password for use in rendering* a URL.* @return string/boolean Form of username:password or false.* @access public*/function getIdentity() {if ($this->_username && $this->_password) {return $this->_username . ':' . $this->_password;}return false;}/*** Replaces . and .. sections of the path.* @param string $path Unoptimised path.* @return string Path with dots removed if possible.* @access public*/function normalisePath($path) {$path = preg_replace('|/[^/]+/\.\./|', '/', $path);return preg_replace('|/\./|', '/', $path);}/*** A pipe seperated list of all TLDs that result in two part* domain names.* @return string Pipe separated list.* @access public* @static*/static function getAllTopLevelDomains() {return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';}}?>