| 1 |
lars |
1 |
<?php
|
|
|
2 |
/**
|
|
|
3 |
* PHPUnit
|
|
|
4 |
*
|
|
|
5 |
* Copyright (c) 2002-2010, Sebastian Bergmann <sb@sebastian-bergmann.de>.
|
|
|
6 |
* All rights reserved.
|
|
|
7 |
*
|
|
|
8 |
* Redistribution and use in source and binary forms, with or without
|
|
|
9 |
* modification, are permitted provided that the following conditions
|
|
|
10 |
* are met:
|
|
|
11 |
*
|
|
|
12 |
* * Redistributions of source code must retain the above copyright
|
|
|
13 |
* notice, this list of conditions and the following disclaimer.
|
|
|
14 |
*
|
|
|
15 |
* * Redistributions in binary form must reproduce the above copyright
|
|
|
16 |
* notice, this list of conditions and the following disclaimer in
|
|
|
17 |
* the documentation and/or other materials provided with the
|
|
|
18 |
* distribution.
|
|
|
19 |
*
|
|
|
20 |
* * Neither the name of Sebastian Bergmann nor the names of his
|
|
|
21 |
* contributors may be used to endorse or promote products derived
|
|
|
22 |
* from this software without specific prior written permission.
|
|
|
23 |
*
|
|
|
24 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
25 |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
26 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
|
27 |
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
|
28 |
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
29 |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
|
30 |
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
31 |
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
|
32 |
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
33 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
|
34 |
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
35 |
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
36 |
*
|
|
|
37 |
* @category Testing
|
|
|
38 |
* @package PHPUnit
|
|
|
39 |
* @author Sebastian Bergmann <sb@sebastian-bergmann.de>
|
|
|
40 |
* @copyright 2002-2010 Sebastian Bergmann <sb@sebastian-bergmann.de>
|
|
|
41 |
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
|
|
|
42 |
* @link http://www.phpunit.de/
|
|
|
43 |
* @since File available since Release 3.2.0
|
|
|
44 |
*/
|
|
|
45 |
|
|
|
46 |
require_once 'PHPUnit/Util/Filter.php';
|
|
|
47 |
|
|
|
48 |
PHPUnit_Util_Filter::addFileToFilter(__FILE__, 'PHPUNIT');
|
|
|
49 |
|
|
|
50 |
/**
|
|
|
51 |
* XML helpers.
|
|
|
52 |
*
|
|
|
53 |
* @category Testing
|
|
|
54 |
* @package PHPUnit
|
|
|
55 |
* @author Sebastian Bergmann <sb@sebastian-bergmann.de>
|
|
|
56 |
* @copyright 2002-2010 Sebastian Bergmann <sb@sebastian-bergmann.de>
|
|
|
57 |
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
|
|
|
58 |
* @version Release: 3.4.15
|
|
|
59 |
* @link http://www.phpunit.de/
|
|
|
60 |
* @since Class available since Release 3.2.0
|
|
|
61 |
*/
|
|
|
62 |
class PHPUnit_Util_XML
|
|
|
63 |
{
|
|
|
64 |
/**
|
|
|
65 |
* @param string $string
|
|
|
66 |
* @return string
|
|
|
67 |
* @author Kore Nordmann <mail@kore-nordmann.de>
|
|
|
68 |
* @since Method available since Release 3.4.6
|
|
|
69 |
*/
|
|
|
70 |
public static function prepareString($string)
|
|
|
71 |
{
|
|
|
72 |
return preg_replace(
|
|
|
73 |
'([\\x00-\\x04\\x0b\\x0c\\x0e-\\x1f\\x7f])e',
|
|
|
74 |
'sprintf( "&#x%02x;", ord( "\\1" ) )',
|
|
|
75 |
htmlspecialchars(
|
|
|
76 |
self::convertToUtf8($string), ENT_COMPAT, 'UTF-8'
|
|
|
77 |
)
|
|
|
78 |
);
|
|
|
79 |
}
|
|
|
80 |
|
|
|
81 |
/**
|
|
|
82 |
* Converts a string to UTF-8 encoding.
|
|
|
83 |
*
|
|
|
84 |
* @param string $string
|
|
|
85 |
* @return string
|
|
|
86 |
* @since Method available since Release 3.2.19
|
|
|
87 |
*/
|
|
|
88 |
protected static function convertToUtf8($string)
|
|
|
89 |
{
|
|
|
90 |
if (!self::isUtf8($string)) {
|
|
|
91 |
if (function_exists('mb_convert_encoding')) {
|
|
|
92 |
$string = mb_convert_encoding($string, 'UTF-8');
|
|
|
93 |
} else {
|
|
|
94 |
$string = utf8_encode($string);
|
|
|
95 |
}
|
|
|
96 |
}
|
|
|
97 |
|
|
|
98 |
return $string;
|
|
|
99 |
}
|
|
|
100 |
|
|
|
101 |
/**
|
|
|
102 |
* Checks a string for UTF-8 encoding.
|
|
|
103 |
*
|
|
|
104 |
* @param string $string
|
|
|
105 |
* @return boolean
|
|
|
106 |
* @since Method available since Release 3.3.0
|
|
|
107 |
*/
|
|
|
108 |
protected static function isUtf8($string)
|
|
|
109 |
{
|
|
|
110 |
$length = strlen($string);
|
|
|
111 |
|
|
|
112 |
for ($i = 0; $i < $length; $i++) {
|
|
|
113 |
if (ord($string[$i]) < 0x80) {
|
|
|
114 |
$n = 0;
|
|
|
115 |
}
|
|
|
116 |
|
|
|
117 |
else if ((ord($string[$i]) & 0xE0) == 0xC0) {
|
|
|
118 |
$n = 1;
|
|
|
119 |
}
|
|
|
120 |
|
|
|
121 |
else if ((ord($string[$i]) & 0xF0) == 0xE0) {
|
|
|
122 |
$n = 2;
|
|
|
123 |
}
|
|
|
124 |
|
|
|
125 |
else if ((ord($string[$i]) & 0xF0) == 0xF0) {
|
|
|
126 |
$n = 3;
|
|
|
127 |
}
|
|
|
128 |
|
|
|
129 |
else {
|
|
|
130 |
return FALSE;
|
|
|
131 |
}
|
|
|
132 |
|
|
|
133 |
for ($j = 0; $j < $n; $j++) {
|
|
|
134 |
if ((++$i == $length) || ((ord($string[$i]) & 0xC0) != 0x80)) {
|
|
|
135 |
return FALSE;
|
|
|
136 |
}
|
|
|
137 |
}
|
|
|
138 |
}
|
|
|
139 |
|
|
|
140 |
return TRUE;
|
|
|
141 |
}
|
|
|
142 |
|
|
|
143 |
/**
|
|
|
144 |
* Loads an XML (or HTML) file into a DOMDocument object.
|
|
|
145 |
*
|
|
|
146 |
* @param string $filename
|
|
|
147 |
* @param boolean $isHtml
|
|
|
148 |
* @return DOMDocument
|
|
|
149 |
* @since Method available since Release 3.3.0
|
|
|
150 |
*/
|
|
|
151 |
public static function loadFile($filename, $isHtml = FALSE)
|
|
|
152 |
{
|
|
|
153 |
$reporting = error_reporting(0);
|
|
|
154 |
$contents = file_get_contents($filename);
|
|
|
155 |
error_reporting($reporting);
|
|
|
156 |
|
|
|
157 |
if ($contents === FALSE) {
|
|
|
158 |
throw new PHPUnit_Framework_Exception(
|
|
|
159 |
sprintf(
|
|
|
160 |
'Could not read "%s".',
|
|
|
161 |
$filename
|
|
|
162 |
)
|
|
|
163 |
);
|
|
|
164 |
}
|
|
|
165 |
|
|
|
166 |
return self::load($contents, $isHtml, $filename);
|
|
|
167 |
}
|
|
|
168 |
|
|
|
169 |
/**
|
|
|
170 |
* Load an $actual document into a DOMDocument. This is called
|
|
|
171 |
* from the selector assertions.
|
|
|
172 |
*
|
|
|
173 |
* If $actual is already a DOMDocument, it is returned with
|
|
|
174 |
* no changes. Otherwise, $actual is loaded into a new DOMDocument
|
|
|
175 |
* as either HTML or XML, depending on the value of $isHtml.
|
|
|
176 |
*
|
|
|
177 |
* Note: prior to PHPUnit 3.3.0, this method loaded a file and
|
|
|
178 |
* not a string as it currently does. To load a file into a
|
|
|
179 |
* DOMDocument, use loadFile() instead.
|
|
|
180 |
*
|
|
|
181 |
* @param string|DOMDocument $actual
|
|
|
182 |
* @param boolean $isHtml
|
|
|
183 |
* @param string $filename
|
|
|
184 |
* @return DOMDocument
|
|
|
185 |
* @since Method available since Release 3.3.0
|
|
|
186 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
187 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
188 |
*/
|
|
|
189 |
public static function load($actual, $isHtml = FALSE, $filename = '')
|
|
|
190 |
{
|
|
|
191 |
if ($actual instanceof DOMDocument) {
|
|
|
192 |
return $actual;
|
|
|
193 |
}
|
|
|
194 |
|
|
|
195 |
$internal = libxml_use_internal_errors(TRUE);
|
|
|
196 |
$reporting = error_reporting(0);
|
|
|
197 |
$dom = new DOMDocument;
|
|
|
198 |
|
|
|
199 |
if ($isHtml) {
|
|
|
200 |
$loaded = $dom->loadHTML($actual);
|
|
|
201 |
} else {
|
|
|
202 |
$loaded = $dom->loadXML($actual);
|
|
|
203 |
}
|
|
|
204 |
|
|
|
205 |
libxml_use_internal_errors($internal);
|
|
|
206 |
error_reporting($reporting);
|
|
|
207 |
|
|
|
208 |
if ($loaded === FALSE) {
|
|
|
209 |
$message = '';
|
|
|
210 |
|
|
|
211 |
foreach (libxml_get_errors() as $error) {
|
|
|
212 |
$message .= $error->message;
|
|
|
213 |
}
|
|
|
214 |
|
|
|
215 |
if ($filename != '') {
|
|
|
216 |
throw new PHPUnit_Framework_Exception(
|
|
|
217 |
sprintf(
|
|
|
218 |
'Could not load "%s".%s',
|
|
|
219 |
|
|
|
220 |
$filename,
|
|
|
221 |
$message != '' ? "\n" . $message : ''
|
|
|
222 |
)
|
|
|
223 |
);
|
|
|
224 |
} else {
|
|
|
225 |
throw new PHPUnit_Framework_Exception($message);
|
|
|
226 |
}
|
|
|
227 |
}
|
|
|
228 |
|
|
|
229 |
return $dom;
|
|
|
230 |
}
|
|
|
231 |
|
|
|
232 |
/**
|
|
|
233 |
*
|
|
|
234 |
*
|
|
|
235 |
* @param DOMNode $node
|
|
|
236 |
* @return string
|
|
|
237 |
* @since Method available since Release 3.4.0
|
|
|
238 |
*/
|
|
|
239 |
public static function nodeToText(DOMNode $node)
|
|
|
240 |
{
|
|
|
241 |
if ($node->childNodes->length == 1) {
|
|
|
242 |
return $node->nodeValue;
|
|
|
243 |
}
|
|
|
244 |
|
|
|
245 |
$result = '';
|
|
|
246 |
|
|
|
247 |
foreach ($node->childNodes as $childNode) {
|
|
|
248 |
$result .= $node->ownerDocument->saveXML($childNode);
|
|
|
249 |
}
|
|
|
250 |
|
|
|
251 |
return $result;
|
|
|
252 |
}
|
|
|
253 |
|
|
|
254 |
/**
|
|
|
255 |
*
|
|
|
256 |
*
|
|
|
257 |
* @param DOMNode $node
|
|
|
258 |
* @since Method available since Release 3.3.0
|
|
|
259 |
* @author Mattis Stordalen Flister <mattis@xait.no>
|
|
|
260 |
*/
|
|
|
261 |
public static function removeCharacterDataNodes(DOMNode $node)
|
|
|
262 |
{
|
|
|
263 |
if ($node->hasChildNodes()) {
|
|
|
264 |
for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
|
|
|
265 |
if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData) {
|
|
|
266 |
$node->removeChild($child);
|
|
|
267 |
}
|
|
|
268 |
}
|
|
|
269 |
}
|
|
|
270 |
}
|
|
|
271 |
|
|
|
272 |
/**
|
|
|
273 |
* "Convert" a DOMElement object into a PHP variable.
|
|
|
274 |
*
|
|
|
275 |
* @param DOMElement $element
|
|
|
276 |
* @return mixed
|
|
|
277 |
* @since Method available since Release 3.4.0
|
|
|
278 |
*/
|
|
|
279 |
public static function xmlToVariable(DOMElement $element)
|
|
|
280 |
{
|
|
|
281 |
$variable = NULL;
|
|
|
282 |
|
|
|
283 |
switch ($element->tagName) {
|
|
|
284 |
case 'array': {
|
|
|
285 |
$variable = array();
|
|
|
286 |
|
|
|
287 |
foreach ($element->getElementsByTagName('element') as $element) {
|
|
|
288 |
$value = self::xmlToVariable($element->childNodes->item(1));
|
|
|
289 |
|
|
|
290 |
if ($element->hasAttribute('key')) {
|
|
|
291 |
$variable[(string)$element->getAttribute('key')] = $value;
|
|
|
292 |
} else {
|
|
|
293 |
$variable[] = $value;
|
|
|
294 |
}
|
|
|
295 |
}
|
|
|
296 |
}
|
|
|
297 |
break;
|
|
|
298 |
|
|
|
299 |
case 'object': {
|
|
|
300 |
$className = $element->getAttribute('class');
|
|
|
301 |
|
|
|
302 |
if ($element->hasChildNodes()) {
|
|
|
303 |
$arguments = $element->childNodes->item(1)->childNodes;
|
|
|
304 |
$constructorArgs = array();
|
|
|
305 |
|
|
|
306 |
foreach ($arguments as $argument) {
|
|
|
307 |
if ($argument instanceof DOMElement) {
|
|
|
308 |
$constructorArgs[] = self::xmlToVariable($argument);
|
|
|
309 |
}
|
|
|
310 |
}
|
|
|
311 |
|
|
|
312 |
$class = new ReflectionClass($className);
|
|
|
313 |
$variable = $class->newInstanceArgs($constructorArgs);
|
|
|
314 |
} else {
|
|
|
315 |
$variable = new $className;
|
|
|
316 |
}
|
|
|
317 |
}
|
|
|
318 |
break;
|
|
|
319 |
|
|
|
320 |
case 'boolean': {
|
|
|
321 |
$variable = $element->nodeValue == 'true' ? TRUE : FALSE;
|
|
|
322 |
}
|
|
|
323 |
break;
|
|
|
324 |
|
|
|
325 |
case 'integer':
|
|
|
326 |
case 'double':
|
|
|
327 |
case 'string': {
|
|
|
328 |
$variable = $element->nodeValue;
|
|
|
329 |
|
|
|
330 |
settype($variable, $element->tagName);
|
|
|
331 |
}
|
|
|
332 |
break;
|
|
|
333 |
}
|
|
|
334 |
|
|
|
335 |
return $variable;
|
|
|
336 |
}
|
|
|
337 |
|
|
|
338 |
/**
|
|
|
339 |
* Validate list of keys in the associative array.
|
|
|
340 |
*
|
|
|
341 |
* @param array $hash
|
|
|
342 |
* @param array $validKeys
|
|
|
343 |
* @return array
|
|
|
344 |
* @throws InvalidArgumentException
|
|
|
345 |
* @since Method available since Release 3.3.0
|
|
|
346 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
347 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
348 |
*/
|
|
|
349 |
public static function assertValidKeys(array $hash, array $validKeys)
|
|
|
350 |
{
|
|
|
351 |
$valids = array();
|
|
|
352 |
|
|
|
353 |
// Normalize validation keys so that we can use both indexed and
|
|
|
354 |
// associative arrays.
|
|
|
355 |
foreach ($validKeys as $key => $val) {
|
|
|
356 |
is_int($key) ? $valids[$val] = NULL : $valids[$key] = $val;
|
|
|
357 |
}
|
|
|
358 |
|
|
|
359 |
$validKeys = array_keys($valids);
|
|
|
360 |
|
|
|
361 |
// Check for invalid keys.
|
|
|
362 |
foreach ($hash as $key => $value) {
|
|
|
363 |
if (!in_array($key, $validKeys)) {
|
|
|
364 |
$unknown[] = $key;
|
|
|
365 |
}
|
|
|
366 |
}
|
|
|
367 |
|
|
|
368 |
if (!empty($unknown)) {
|
|
|
369 |
throw new InvalidArgumentException(
|
|
|
370 |
'Unknown key(s): ' . implode(', ', $unknown)
|
|
|
371 |
);
|
|
|
372 |
}
|
|
|
373 |
|
|
|
374 |
// Add default values for any valid keys that are empty.
|
|
|
375 |
foreach ($valids as $key => $value) {
|
|
|
376 |
if (!isset($hash[$key])) {
|
|
|
377 |
$hash[$key] = $value;
|
|
|
378 |
}
|
|
|
379 |
}
|
|
|
380 |
|
|
|
381 |
return $hash;
|
|
|
382 |
}
|
|
|
383 |
|
|
|
384 |
/**
|
|
|
385 |
* Parse a CSS selector into an associative array suitable for
|
|
|
386 |
* use with findNodes().
|
|
|
387 |
*
|
|
|
388 |
* @param string $selector
|
|
|
389 |
* @param mixed $content
|
|
|
390 |
* @return array
|
|
|
391 |
* @since Method available since Release 3.3.0
|
|
|
392 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
393 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
394 |
*/
|
|
|
395 |
public static function convertSelectToTag($selector, $content = TRUE)
|
|
|
396 |
{
|
|
|
397 |
$selector = trim(preg_replace("/\s+/", " ", $selector));
|
|
|
398 |
|
|
|
399 |
// substitute spaces within attribute value
|
|
|
400 |
while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector)) {
|
|
|
401 |
$selector = preg_replace(
|
|
|
402 |
'/(\[[^\]]+"[^"]+)\s([^"]+"\])/', "$1__SPACE__$2", $selector
|
|
|
403 |
);
|
|
|
404 |
}
|
|
|
405 |
|
|
|
406 |
if (strstr($selector, ' ')) {
|
|
|
407 |
$elements = explode(' ', $selector);
|
|
|
408 |
} else {
|
|
|
409 |
$elements = array($selector);
|
|
|
410 |
}
|
|
|
411 |
|
|
|
412 |
$previousTag = array();
|
|
|
413 |
|
|
|
414 |
foreach (array_reverse($elements) as $element) {
|
|
|
415 |
$element = str_replace('__SPACE__', ' ', $element);
|
|
|
416 |
|
|
|
417 |
// child selector
|
|
|
418 |
if ($element == '>') {
|
|
|
419 |
$previousTag = array('child' => $previousTag['descendant']);
|
|
|
420 |
continue;
|
|
|
421 |
}
|
|
|
422 |
|
|
|
423 |
$tag = array();
|
|
|
424 |
|
|
|
425 |
// match element tag
|
|
|
426 |
preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
|
|
|
427 |
|
|
|
428 |
if (!empty($eltMatches[1])) {
|
|
|
429 |
$tag['tag'] = $eltMatches[1];
|
|
|
430 |
}
|
|
|
431 |
|
|
|
432 |
// match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
|
|
|
433 |
// and classes (\.[^\.#\[]*))
|
|
|
434 |
preg_match_all(
|
|
|
435 |
"/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/", $element, $matches
|
|
|
436 |
);
|
|
|
437 |
|
|
|
438 |
if (!empty($matches[1])) {
|
|
|
439 |
$classes = array();
|
|
|
440 |
$attrs = array();
|
|
|
441 |
|
|
|
442 |
foreach ($matches[1] as $match) {
|
|
|
443 |
// id matched
|
|
|
444 |
if (substr($match, 0, 1) == '#') {
|
|
|
445 |
$tag['id'] = substr($match, 1);
|
|
|
446 |
}
|
|
|
447 |
|
|
|
448 |
// class matched
|
|
|
449 |
else if (substr($match, 0, 1) == '.') {
|
|
|
450 |
$classes[] = substr($match, 1);
|
|
|
451 |
}
|
|
|
452 |
|
|
|
453 |
// attribute matched
|
|
|
454 |
else if (substr($match, 0, 1) == '[' &&
|
|
|
455 |
substr($match, -1, 1) == ']') {
|
|
|
456 |
$attribute = substr($match, 1, strlen($match) - 2);
|
|
|
457 |
$attribute = str_replace('"', '', $attribute);
|
|
|
458 |
|
|
|
459 |
// match single word
|
|
|
460 |
if (strstr($attribute, '~=')) {
|
|
|
461 |
list($key, $value) = explode('~=', $attribute);
|
|
|
462 |
$value = "regexp:/.*\b$value\b.*/";
|
|
|
463 |
}
|
|
|
464 |
|
|
|
465 |
// match substring
|
|
|
466 |
else if (strstr($attribute, '*=')) {
|
|
|
467 |
list($key, $value) = explode('*=', $attribute);
|
|
|
468 |
$value = "regexp:/.*$value.*/";
|
|
|
469 |
}
|
|
|
470 |
|
|
|
471 |
// exact match
|
|
|
472 |
else {
|
|
|
473 |
list($key, $value) = explode('=', $attribute);
|
|
|
474 |
}
|
|
|
475 |
|
|
|
476 |
$attrs[$key] = $value;
|
|
|
477 |
}
|
|
|
478 |
}
|
|
|
479 |
|
|
|
480 |
if ($classes) {
|
|
|
481 |
$tag['class'] = join(' ', $classes);
|
|
|
482 |
}
|
|
|
483 |
|
|
|
484 |
if ($attrs) {
|
|
|
485 |
$tag['attributes'] = $attrs;
|
|
|
486 |
}
|
|
|
487 |
}
|
|
|
488 |
|
|
|
489 |
// tag content
|
|
|
490 |
if (is_string($content)) {
|
|
|
491 |
$tag['content'] = $content;
|
|
|
492 |
}
|
|
|
493 |
|
|
|
494 |
// determine previous child/descendants
|
|
|
495 |
if (!empty($previousTag['descendant'])) {
|
|
|
496 |
$tag['descendant'] = $previousTag['descendant'];
|
|
|
497 |
}
|
|
|
498 |
|
|
|
499 |
else if (!empty($previousTag['child'])) {
|
|
|
500 |
$tag['child'] = $previousTag['child'];
|
|
|
501 |
}
|
|
|
502 |
|
|
|
503 |
$previousTag = array('descendant' => $tag);
|
|
|
504 |
}
|
|
|
505 |
|
|
|
506 |
return $tag;
|
|
|
507 |
}
|
|
|
508 |
|
|
|
509 |
/**
|
|
|
510 |
* Parse an $actual document and return an array of DOMNodes
|
|
|
511 |
* matching the CSS $selector. If an error occurs, it will
|
|
|
512 |
* return FALSE.
|
|
|
513 |
*
|
|
|
514 |
* To only return nodes containing a certain content, give
|
|
|
515 |
* the $content to match as a string. Otherwise, setting
|
|
|
516 |
* $content to TRUE will return all nodes matching $selector.
|
|
|
517 |
*
|
|
|
518 |
* The $actual document may be a DOMDocument or a string
|
|
|
519 |
* containing XML or HTML, identified by $isHtml.
|
|
|
520 |
*
|
|
|
521 |
* @param array $selector
|
|
|
522 |
* @param string $content
|
|
|
523 |
* @param mixed $actual
|
|
|
524 |
* @param boolean $isHtml
|
|
|
525 |
* @return false|array
|
|
|
526 |
* @since Method available since Release 3.3.0
|
|
|
527 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
528 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
529 |
*/
|
|
|
530 |
public static function cssSelect($selector, $content, $actual, $isHtml = TRUE)
|
|
|
531 |
{
|
|
|
532 |
$matcher = self::convertSelectToTag($selector, $content);
|
|
|
533 |
$dom = self::load($actual, $isHtml);
|
|
|
534 |
$tags = self::findNodes($dom, $matcher);
|
|
|
535 |
|
|
|
536 |
return $tags;
|
|
|
537 |
}
|
|
|
538 |
|
|
|
539 |
/**
|
|
|
540 |
* Parse out the options from the tag using DOM object tree.
|
|
|
541 |
*
|
|
|
542 |
* @param DOMDocument $dom
|
|
|
543 |
* @param array $options
|
|
|
544 |
* @param boolean $isHtml
|
|
|
545 |
* @return array
|
|
|
546 |
* @since Method available since Release 3.3.0
|
|
|
547 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
548 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
549 |
*/
|
|
|
550 |
public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE)
|
|
|
551 |
{
|
|
|
552 |
$valid = array(
|
|
|
553 |
'id', 'class', 'tag', 'content', 'attributes', 'parent',
|
|
|
554 |
'child', 'ancestor', 'descendant', 'children'
|
|
|
555 |
);
|
|
|
556 |
|
|
|
557 |
$filtered = array();
|
|
|
558 |
$options = self::assertValidKeys($options, $valid);
|
|
|
559 |
|
|
|
560 |
// find the element by id
|
|
|
561 |
if ($options['id']) {
|
|
|
562 |
$options['attributes']['id'] = $options['id'];
|
|
|
563 |
}
|
|
|
564 |
|
|
|
565 |
if ($options['class']) {
|
|
|
566 |
$options['attributes']['class'] = $options['class'];
|
|
|
567 |
}
|
|
|
568 |
|
|
|
569 |
// find the element by a tag type
|
|
|
570 |
if ($options['tag']) {
|
|
|
571 |
if ($isHtml) {
|
|
|
572 |
$elements = self::getElementsByCaseInsensitiveTagName(
|
|
|
573 |
$dom, $options['tag']
|
|
|
574 |
);
|
|
|
575 |
} else {
|
|
|
576 |
$elements = $dom->getElementsByTagName($options['tag']);
|
|
|
577 |
}
|
|
|
578 |
|
|
|
579 |
foreach ($elements as $element) {
|
|
|
580 |
$nodes[] = $element;
|
|
|
581 |
}
|
|
|
582 |
|
|
|
583 |
if (empty($nodes)) {
|
|
|
584 |
return FALSE;
|
|
|
585 |
}
|
|
|
586 |
}
|
|
|
587 |
|
|
|
588 |
// no tag selected, get them all
|
|
|
589 |
else {
|
|
|
590 |
$tags = array(
|
|
|
591 |
'a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo',
|
|
|
592 |
'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite',
|
|
|
593 |
'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl',
|
|
|
594 |
'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2',
|
|
|
595 |
'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe',
|
|
|
596 |
'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
|
|
|
597 |
'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup',
|
|
|
598 |
'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select',
|
|
|
599 |
'small', 'span', 'strong', 'style', 'sub', 'sup', 'table',
|
|
|
600 |
'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title',
|
|
|
601 |
'tr', 'tt', 'ul', 'var'
|
|
|
602 |
);
|
|
|
603 |
|
|
|
604 |
foreach ($tags as $tag) {
|
|
|
605 |
if ($isHtml) {
|
|
|
606 |
$elements = self::getElementsByCaseInsensitiveTagName(
|
|
|
607 |
$dom, $tag
|
|
|
608 |
);
|
|
|
609 |
} else {
|
|
|
610 |
$elements = $dom->getElementsByTagName($tag);
|
|
|
611 |
}
|
|
|
612 |
|
|
|
613 |
foreach ($elements as $element) {
|
|
|
614 |
$nodes[] = $element;
|
|
|
615 |
}
|
|
|
616 |
}
|
|
|
617 |
|
|
|
618 |
if (empty($nodes)) {
|
|
|
619 |
return FALSE;
|
|
|
620 |
}
|
|
|
621 |
}
|
|
|
622 |
|
|
|
623 |
// filter by attributes
|
|
|
624 |
if ($options['attributes']) {
|
|
|
625 |
foreach ($nodes as $node) {
|
|
|
626 |
$invalid = FALSE;
|
|
|
627 |
|
|
|
628 |
foreach ($options['attributes'] as $name => $value) {
|
|
|
629 |
// match by regexp if like "regexp:/foo/i"
|
|
|
630 |
if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches)) {
|
|
|
631 |
if (!preg_match($matches[1], $node->getAttribute($name))) {
|
|
|
632 |
$invalid = TRUE;
|
|
|
633 |
}
|
|
|
634 |
}
|
|
|
635 |
|
|
|
636 |
// class can match only a part
|
|
|
637 |
else if ($name == 'class') {
|
|
|
638 |
// split to individual classes
|
|
|
639 |
$findClasses = explode(
|
|
|
640 |
' ', preg_replace("/\s+/", " ", $value)
|
|
|
641 |
);
|
|
|
642 |
|
|
|
643 |
$allClasses = explode(
|
|
|
644 |
' ',
|
|
|
645 |
preg_replace("/\s+/", " ", $node->getAttribute($name))
|
|
|
646 |
);
|
|
|
647 |
|
|
|
648 |
// make sure each class given is in the actual node
|
|
|
649 |
foreach ($findClasses as $findClass) {
|
|
|
650 |
if (!in_array($findClass, $allClasses)) {
|
|
|
651 |
$invalid = TRUE;
|
|
|
652 |
}
|
|
|
653 |
}
|
|
|
654 |
}
|
|
|
655 |
|
|
|
656 |
// match by exact string
|
|
|
657 |
else {
|
|
|
658 |
if ($node->getAttribute($name) != $value) {
|
|
|
659 |
$invalid = TRUE;
|
|
|
660 |
}
|
|
|
661 |
}
|
|
|
662 |
}
|
|
|
663 |
|
|
|
664 |
// if every attribute given matched
|
|
|
665 |
if (!$invalid) {
|
|
|
666 |
$filtered[] = $node;
|
|
|
667 |
}
|
|
|
668 |
}
|
|
|
669 |
|
|
|
670 |
$nodes = $filtered;
|
|
|
671 |
$filtered = array();
|
|
|
672 |
|
|
|
673 |
if (empty($nodes)) {
|
|
|
674 |
return FALSE;
|
|
|
675 |
}
|
|
|
676 |
}
|
|
|
677 |
|
|
|
678 |
// filter by content
|
|
|
679 |
if ($options['content'] !== NULL) {
|
|
|
680 |
foreach ($nodes as $node) {
|
|
|
681 |
$invalid = FALSE;
|
|
|
682 |
|
|
|
683 |
// match by regexp if like "regexp:/foo/i"
|
|
|
684 |
if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches)) {
|
|
|
685 |
if (!preg_match($matches[1], self::getNodeText($node))) {
|
|
|
686 |
$invalid = TRUE;
|
|
|
687 |
}
|
|
|
688 |
}
|
|
|
689 |
|
|
|
690 |
// match by exact string
|
|
|
691 |
else if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
|
|
|
692 |
$invalid = TRUE;
|
|
|
693 |
}
|
|
|
694 |
|
|
|
695 |
if (!$invalid) {
|
|
|
696 |
$filtered[] = $node;
|
|
|
697 |
}
|
|
|
698 |
}
|
|
|
699 |
|
|
|
700 |
$nodes = $filtered;
|
|
|
701 |
$filtered = array();
|
|
|
702 |
|
|
|
703 |
if (empty($nodes)) {
|
|
|
704 |
return FALSE;
|
|
|
705 |
}
|
|
|
706 |
}
|
|
|
707 |
|
|
|
708 |
// filter by parent node
|
|
|
709 |
if ($options['parent']) {
|
|
|
710 |
$parentNodes = self::findNodes($dom, $options['parent']);
|
|
|
711 |
$parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
|
|
|
712 |
|
|
|
713 |
foreach ($nodes as $node) {
|
|
|
714 |
if ($parentNode !== $node->parentNode) {
|
|
|
715 |
break;
|
|
|
716 |
}
|
|
|
717 |
|
|
|
718 |
$filtered[] = $node;
|
|
|
719 |
}
|
|
|
720 |
|
|
|
721 |
$nodes = $filtered;
|
|
|
722 |
$filtered = array();
|
|
|
723 |
|
|
|
724 |
if (empty($nodes)) {
|
|
|
725 |
return FALSE;
|
|
|
726 |
}
|
|
|
727 |
}
|
|
|
728 |
|
|
|
729 |
// filter by child node
|
|
|
730 |
if ($options['child']) {
|
|
|
731 |
$childNodes = self::findNodes($dom, $options['child']);
|
|
|
732 |
$childNodes = !empty($childNodes) ? $childNodes : array();
|
|
|
733 |
|
|
|
734 |
foreach ($nodes as $node) {
|
|
|
735 |
foreach ($node->childNodes as $child) {
|
|
|
736 |
foreach ($childNodes as $childNode) {
|
|
|
737 |
if ($childNode === $child) {
|
|
|
738 |
$filtered[] = $node;
|
|
|
739 |
}
|
|
|
740 |
}
|
|
|
741 |
}
|
|
|
742 |
}
|
|
|
743 |
|
|
|
744 |
$nodes = $filtered;
|
|
|
745 |
$filtered = array();
|
|
|
746 |
|
|
|
747 |
if (empty($nodes)) {
|
|
|
748 |
return FALSE;
|
|
|
749 |
}
|
|
|
750 |
}
|
|
|
751 |
|
|
|
752 |
// filter by ancestor
|
|
|
753 |
if ($options['ancestor']) {
|
|
|
754 |
$ancestorNodes = self::findNodes($dom, $options['ancestor']);
|
|
|
755 |
$ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
|
|
|
756 |
|
|
|
757 |
foreach ($nodes as $node) {
|
|
|
758 |
$parent = $node->parentNode;
|
|
|
759 |
|
|
|
760 |
while ($parent->nodeType != XML_HTML_DOCUMENT_NODE) {
|
|
|
761 |
if ($parent === $ancestorNode) {
|
|
|
762 |
$filtered[] = $node;
|
|
|
763 |
}
|
|
|
764 |
|
|
|
765 |
$parent = $parent->parentNode;
|
|
|
766 |
}
|
|
|
767 |
}
|
|
|
768 |
|
|
|
769 |
$nodes = $filtered;
|
|
|
770 |
$filtered = array();
|
|
|
771 |
|
|
|
772 |
if (empty($nodes)) {
|
|
|
773 |
return FALSE;
|
|
|
774 |
}
|
|
|
775 |
}
|
|
|
776 |
|
|
|
777 |
// filter by descendant
|
|
|
778 |
if ($options['descendant']) {
|
|
|
779 |
$descendantNodes = self::findNodes($dom, $options['descendant']);
|
|
|
780 |
$descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
|
|
|
781 |
|
|
|
782 |
foreach ($nodes as $node) {
|
|
|
783 |
foreach (self::getDescendants($node) as $descendant) {
|
|
|
784 |
foreach ($descendantNodes as $descendantNode) {
|
|
|
785 |
if ($descendantNode === $descendant) {
|
|
|
786 |
$filtered[] = $node;
|
|
|
787 |
}
|
|
|
788 |
}
|
|
|
789 |
}
|
|
|
790 |
}
|
|
|
791 |
|
|
|
792 |
$nodes = $filtered;
|
|
|
793 |
$filtered = array();
|
|
|
794 |
|
|
|
795 |
if (empty($nodes)) {
|
|
|
796 |
return FALSE;
|
|
|
797 |
}
|
|
|
798 |
}
|
|
|
799 |
|
|
|
800 |
// filter by children
|
|
|
801 |
if ($options['children']) {
|
|
|
802 |
$validChild = array('count', 'greater_than', 'less_than', 'only');
|
|
|
803 |
$childOptions = self::assertValidKeys(
|
|
|
804 |
$options['children'], $validChild
|
|
|
805 |
);
|
|
|
806 |
|
|
|
807 |
foreach ($nodes as $node) {
|
|
|
808 |
$childNodes = $node->childNodes;
|
|
|
809 |
|
|
|
810 |
foreach ($childNodes as $childNode) {
|
|
|
811 |
if ($childNode->nodeType !== XML_CDATA_SECTION_NODE &&
|
|
|
812 |
$childNode->nodeType !== XML_TEXT_NODE) {
|
|
|
813 |
$children[] = $childNode;
|
|
|
814 |
}
|
|
|
815 |
}
|
|
|
816 |
|
|
|
817 |
// we must have children to pass this filter
|
|
|
818 |
if (!empty($children)) {
|
|
|
819 |
// exact count of children
|
|
|
820 |
if ($childOptions['count'] !== NULL) {
|
|
|
821 |
if (count($children) !== $childOptions['count']) {
|
|
|
822 |
break;
|
|
|
823 |
}
|
|
|
824 |
}
|
|
|
825 |
|
|
|
826 |
// range count of children
|
|
|
827 |
else if ($childOptions['less_than'] !== NULL &&
|
|
|
828 |
$childOptions['greater_than'] !== NULL) {
|
|
|
829 |
if (count($children) >= $childOptions['less_than'] ||
|
|
|
830 |
count($children) <= $childOptions['greater_than']) {
|
|
|
831 |
break;
|
|
|
832 |
}
|
|
|
833 |
}
|
|
|
834 |
|
|
|
835 |
// less than a given count
|
|
|
836 |
else if ($childOptions['less_than'] !== NULL) {
|
|
|
837 |
if (count($children) >= $childOptions['less_than']) {
|
|
|
838 |
break;
|
|
|
839 |
}
|
|
|
840 |
}
|
|
|
841 |
|
|
|
842 |
// more than a given count
|
|
|
843 |
else if ($childOptions['greater_than'] !== NULL) {
|
|
|
844 |
if (count($children) <= $childOptions['greater_than']) {
|
|
|
845 |
break;
|
|
|
846 |
}
|
|
|
847 |
}
|
|
|
848 |
|
|
|
849 |
// match each child against a specific tag
|
|
|
850 |
if ($childOptions['only']) {
|
|
|
851 |
$onlyNodes = self::findNodes(
|
|
|
852 |
$dom, $childOptions['only']
|
|
|
853 |
);
|
|
|
854 |
|
|
|
855 |
// try to match each child to one of the 'only' nodes
|
|
|
856 |
foreach ($children as $child) {
|
|
|
857 |
$matched = FALSE;
|
|
|
858 |
|
|
|
859 |
foreach ($onlyNodes as $onlyNode) {
|
|
|
860 |
if ($onlyNode === $child) {
|
|
|
861 |
$matched = TRUE;
|
|
|
862 |
}
|
|
|
863 |
}
|
|
|
864 |
|
|
|
865 |
if (!$matched) {
|
|
|
866 |
break(2);
|
|
|
867 |
}
|
|
|
868 |
}
|
|
|
869 |
}
|
|
|
870 |
|
|
|
871 |
$filtered[] = $node;
|
|
|
872 |
}
|
|
|
873 |
}
|
|
|
874 |
|
|
|
875 |
$nodes = $filtered;
|
|
|
876 |
$filtered = array();
|
|
|
877 |
|
|
|
878 |
if (empty($nodes)) {
|
|
|
879 |
return;
|
|
|
880 |
}
|
|
|
881 |
}
|
|
|
882 |
|
|
|
883 |
// return the first node that matches all criteria
|
|
|
884 |
return !empty($nodes) ? $nodes : array();
|
|
|
885 |
}
|
|
|
886 |
|
|
|
887 |
/**
|
|
|
888 |
* Recursively get flat array of all descendants of this node.
|
|
|
889 |
*
|
|
|
890 |
* @param DOMNode $node
|
|
|
891 |
* @return array
|
|
|
892 |
* @since Method available since Release 3.3.0
|
|
|
893 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
894 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
895 |
*/
|
|
|
896 |
protected static function getDescendants(DOMNode $node)
|
|
|
897 |
{
|
|
|
898 |
$allChildren = array();
|
|
|
899 |
$childNodes = $node->childNodes ? $node->childNodes : array();
|
|
|
900 |
|
|
|
901 |
foreach ($childNodes as $child) {
|
|
|
902 |
if ($child->nodeType === XML_CDATA_SECTION_NODE ||
|
|
|
903 |
$child->nodeType === XML_TEXT_NODE) {
|
|
|
904 |
continue;
|
|
|
905 |
}
|
|
|
906 |
|
|
|
907 |
$children = self::getDescendants($child);
|
|
|
908 |
$allChildren = array_merge($allChildren, $children, array($child));
|
|
|
909 |
}
|
|
|
910 |
|
|
|
911 |
return isset($allChildren) ? $allChildren : array();
|
|
|
912 |
}
|
|
|
913 |
|
|
|
914 |
/**
|
|
|
915 |
* Gets elements by case insensitive tagname.
|
|
|
916 |
*
|
|
|
917 |
* @param DOMDocument $dom
|
|
|
918 |
* @param string $tag
|
|
|
919 |
* @return DOMNodeList
|
|
|
920 |
* @since Method available since Release 3.4.0
|
|
|
921 |
*/
|
|
|
922 |
protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag)
|
|
|
923 |
{
|
|
|
924 |
$elements = $dom->getElementsByTagName(strtolower($tag));
|
|
|
925 |
|
|
|
926 |
if ($elements->length == 0) {
|
|
|
927 |
$elements = $dom->getElementsByTagName(strtoupper($tag));
|
|
|
928 |
}
|
|
|
929 |
|
|
|
930 |
return $elements;
|
|
|
931 |
}
|
|
|
932 |
|
|
|
933 |
/**
|
|
|
934 |
* Get the text value of this node's child text node.
|
|
|
935 |
*
|
|
|
936 |
* @param DOMNode $node
|
|
|
937 |
* @return string
|
|
|
938 |
* @since Method available since Release 3.3.0
|
|
|
939 |
* @author Mike Naberezny <mike@maintainable.com>
|
|
|
940 |
* @author Derek DeVries <derek@maintainable.com>
|
|
|
941 |
*/
|
|
|
942 |
protected static function getNodeText(DOMNode $node)
|
|
|
943 |
{
|
|
|
944 |
if (!$node->childNodes instanceof DOMNodeList) {
|
|
|
945 |
return '';
|
|
|
946 |
}
|
|
|
947 |
|
|
|
948 |
$result = '';
|
|
|
949 |
|
|
|
950 |
foreach ($node->childNodes as $childNode) {
|
|
|
951 |
if ($childNode->nodeType === XML_TEXT_NODE) {
|
|
|
952 |
$result .= trim($childNode->data) . ' ';
|
|
|
953 |
} else {
|
|
|
954 |
$result .= self::getNodeText($childNode);
|
|
|
955 |
}
|
|
|
956 |
}
|
|
|
957 |
|
|
|
958 |
return str_replace(' ', ' ', $result);
|
|
|
959 |
}
|
|
|
960 |
}
|
|
|
961 |
?>
|