| 1 |
lars |
1 |
<?php
|
|
|
2 |
// $Header: /cvsroot/html2ps/css.parse.inc.php,v 1.28 2007/03/15 18:37:31 Konstantin Exp $
|
|
|
3 |
|
|
|
4 |
require_once(HTML2PS_DIR.'css.rules.page.inc.php');
|
|
|
5 |
require_once(HTML2PS_DIR.'css.property.collection.php');
|
|
|
6 |
require_once(HTML2PS_DIR.'css.parse.properties.php');
|
|
|
7 |
|
|
|
8 |
define("SELECTOR_CLASS_REGEXP","[\w\d_-]+");
|
|
|
9 |
define("SELECTOR_ID_REGEXP","[\w\d_-]+");
|
|
|
10 |
define("SELECTOR_ATTR_REGEXP","[\w]+");
|
|
|
11 |
define("SELECTOR_ATTR_VALUE_REGEXP","([\w]+)=['\"]?([\w]+)['\"]?");
|
|
|
12 |
define("SELECTOR_ATTR_VALUE_WORD_REGEXP" ,"([\w]+)~=['\"]?([\w]+)['\"]?");
|
|
|
13 |
|
|
|
14 |
// Parse the 'style' attribute value of current node\
|
|
|
15 |
//
|
|
|
16 |
function parse_style_attr($root, &$state, &$pipeline) {
|
|
|
17 |
$style = $root->get_attribute("style");
|
|
|
18 |
|
|
|
19 |
// Some "designers" (obviously lacking the brain and ability to read ) use such constructs:
|
|
|
20 |
//
|
|
|
21 |
// <input maxLength=256 size=45 name=searchfor value="" style="{width:350px}">
|
|
|
22 |
//
|
|
|
23 |
// It is out of standard, as HTML 4.01 says:
|
|
|
24 |
//
|
|
|
25 |
// The syntax of the value of the style attribute is determined by the default style sheet language.
|
|
|
26 |
// For example, for [[CSS2]] inline style, use the declaration block syntax described in section 4.1.8
|
|
|
27 |
// *(without curly brace delimiters)*
|
|
|
28 |
//
|
|
|
29 |
// but still parsed by many browsers; let's be compatible with these idiots - remove curly braces
|
|
|
30 |
//
|
|
|
31 |
$style = preg_replace("/^\s*{/","",$style);
|
|
|
32 |
$style = preg_replace("/}\s*$/","",$style);
|
|
|
33 |
|
|
|
34 |
$properties = parse_css_properties($style, $pipeline);
|
|
|
35 |
|
|
|
36 |
$rule = new CSSRule(array(
|
|
|
37 |
array(SELECTOR_ANY),
|
|
|
38 |
$properties,
|
|
|
39 |
$pipeline->get_base_url(),
|
|
|
40 |
$root
|
|
|
41 |
),
|
|
|
42 |
$pipeline);
|
|
|
43 |
|
|
|
44 |
$rule->apply($root, $state, $pipeline);
|
|
|
45 |
}
|
|
|
46 |
|
|
|
47 |
// TODO: make a real parser instead of if-then-else mess
|
|
|
48 |
//
|
|
|
49 |
// Selector grammar (according to CSS 2.1, paragraph 5.1 & 5.2):
|
|
|
50 |
// Note that this particular grammar is not LL1, but still can be converter to
|
|
|
51 |
// that form
|
|
|
52 |
//
|
|
|
53 |
// COMPOSITE_SELECTOR ::= SELECTOR ("," SELECTOR)*
|
|
|
54 |
//
|
|
|
55 |
// SELECTOR ::= SIMPLE_SELECTOR (COMBINATOR SIMPLE_SELECTOR)*
|
|
|
56 |
//
|
|
|
57 |
// COMBINATOR ::= WHITESPACE* COMBINATOR_SYMBOL WHITESPACE*
|
|
|
58 |
// COMBINATOR_SYMBOL ::= " " | ">" | "+"
|
|
|
59 |
//
|
|
|
60 |
// SIMPLE_SELECTOR ::= TYPE_SELECTOR (ADDITIONAL_SELECTOR)*
|
|
|
61 |
// SIMPLE_SELECTOR ::= UNIVERSAL_SELECTOR (ADDITIONAL_SELECTOR)*
|
|
|
62 |
// SIMPLE_SELECTOR ::= (ADDITIONAL_SELECTOR)*
|
|
|
63 |
//
|
|
|
64 |
// CSS 2.1, p. 5.3: if the universal selector is not the only component of a simple selector, the "*" may be omitted
|
|
|
65 |
// SIMPLE_SELECTOR ::= (ADDITIONAL_SELECTOR)*
|
|
|
66 |
//
|
|
|
67 |
// TYPE_SELECTOR ::= TAG_NAME
|
|
|
68 |
//
|
|
|
69 |
// UNIVERSAL_SELECTOR ::= "*"
|
|
|
70 |
//
|
|
|
71 |
// ADDITIONAL_SELECTOR ::= ATTRIBUTE_SELECTOR | ID_SELECTOR | PSEUDOCLASS | CLASS_SELECTOR | PSEUDOELEMENT
|
|
|
72 |
//
|
|
|
73 |
// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "]"
|
|
|
74 |
// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "=" ATTR_VALUE "]"
|
|
|
75 |
// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "~=" ATTR_VALUE "]"
|
|
|
76 |
// ATTRIBUTE_SELECTOR ::= "[" ATTRIBUTE_NAME "|=" ATTR_VALUE "]"
|
|
|
77 |
//
|
|
|
78 |
// CLASS_SELECTOR ::= "." CLASS_NAME
|
|
|
79 |
//
|
|
|
80 |
// ID_SELECTOR ::= "#" ID_VALUE
|
|
|
81 |
//
|
|
|
82 |
// PSEUDOCLASS ::= ":first-child" |
|
|
|
83 |
// ":link" |
|
|
|
84 |
// ":visited" | // ignored in our case
|
|
|
85 |
// ":hover" | // dynamic - ignored in our case
|
|
|
86 |
// ":active" | // dynamic - ignored in our case
|
|
|
87 |
// ":focus" | // dynamic - ignored in our case
|
|
|
88 |
// ":lang(" LANG ")" | // dynamic - ignored in our case
|
|
|
89 |
//
|
|
|
90 |
// PSEUDOELEMENT ::= ":first-line" |
|
|
|
91 |
// ":first-letter" |
|
|
|
92 |
// ":before" |
|
|
|
93 |
// ":after" |
|
|
|
94 |
//
|
|
|
95 |
// ATTR_VALUE ::= IDENTIFIER | STRING
|
|
|
96 |
// CLASS_NAME ::= INDETIFIER
|
|
|
97 |
// ID_VALUE ::= IDENTIFIER
|
|
|
98 |
//
|
|
|
99 |
function parse_css_selector($raw_selector) {
|
|
|
100 |
// Note a 'trim' call. Is is required as there could be leading/trailing spaces in $raw_selector
|
|
|
101 |
//
|
|
|
102 |
$raw_selector = strtolower(trim($raw_selector));
|
|
|
103 |
|
|
|
104 |
// Direct Parent/child selectors (for example 'table > tr')
|
|
|
105 |
if (preg_match("/^(\S.*)\s*>\s*([^\s]+)$/", $raw_selector, $matches)) {
|
|
|
106 |
return array(SELECTOR_SEQUENCE, array(
|
|
|
107 |
parse_css_selector($matches[2]),
|
|
|
108 |
array(SELECTOR_DIRECT_PARENT,
|
|
|
109 |
parse_css_selector($matches[1]))));
|
|
|
110 |
}
|
|
|
111 |
|
|
|
112 |
// Parent/child selectors (for example 'table td')
|
|
|
113 |
if (preg_match("/^(\S.*)\s+([^\s]+)$/", $raw_selector, $matches)) {
|
|
|
114 |
return array(SELECTOR_SEQUENCE, array(
|
|
|
115 |
parse_css_selector($matches[2]),
|
|
|
116 |
array(SELECTOR_PARENT,
|
|
|
117 |
parse_css_selector($matches[1]))));
|
|
|
118 |
}
|
|
|
119 |
|
|
|
120 |
if (preg_match("/^(.+)\[(".SELECTOR_ATTR_REGEXP.")\]$/", $raw_selector, $matches)) {
|
|
|
121 |
return array(SELECTOR_SEQUENCE, array(
|
|
|
122 |
parse_css_selector($matches[1]),
|
|
|
123 |
array(SELECTOR_ATTR, $matches[2])));
|
|
|
124 |
}
|
|
|
125 |
|
|
|
126 |
if (preg_match("/^(.+)\[".SELECTOR_ATTR_VALUE_REGEXP."\]$/", $raw_selector, $matches)) {
|
|
|
127 |
return array(SELECTOR_SEQUENCE, array(
|
|
|
128 |
parse_css_selector($matches[1]),
|
|
|
129 |
array(SELECTOR_ATTR_VALUE, $matches[2], css_remove_value_quotes($matches[3]))));
|
|
|
130 |
}
|
|
|
131 |
|
|
|
132 |
if (preg_match("/^(.+)\[".SELECTOR_ATTR_VALUE_WORD_REGEXP."\]$/", $raw_selector, $matches)) {
|
|
|
133 |
return array(SELECTOR_SEQUENCE, array(
|
|
|
134 |
parse_css_selector($matches[1]),
|
|
|
135 |
array(SELECTOR_ATTR_VALUE_WORD, $matches[2], css_remove_value_quotes($matches[3]))));
|
|
|
136 |
}
|
|
|
137 |
|
|
|
138 |
// pseudoclasses & pseudoelements
|
|
|
139 |
if (preg_match("/^([#\.\s\w_-]*):(\w+)$/", $raw_selector, $matches)) {
|
|
|
140 |
if ($matches[1] === "") { $matches[1] = "*"; };
|
|
|
141 |
|
|
|
142 |
switch($matches[2]) {
|
|
|
143 |
case "lowlink":
|
|
|
144 |
return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOCLASS_LINK_LOW_PRIORITY)));
|
|
|
145 |
case "link":
|
|
|
146 |
return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOCLASS_LINK)));
|
|
|
147 |
case "before":
|
|
|
148 |
return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOELEMENT_BEFORE)));
|
|
|
149 |
case "after":
|
|
|
150 |
return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_PSEUDOELEMENT_AFTER)));
|
|
|
151 |
};
|
|
|
152 |
};
|
|
|
153 |
|
|
|
154 |
// :lang() pseudoclass
|
|
|
155 |
if (preg_match("/^([#\.\s\w_-]+):lang\((\w+)\)$/", $raw_selector, $matches)) {
|
|
|
156 |
return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), array(SELECTOR_LANGUAGE, $matches[2])));
|
|
|
157 |
};
|
|
|
158 |
|
|
|
159 |
if (preg_match("/^(\S+)(\.\S+)$/", $raw_selector, $matches)) {
|
|
|
160 |
return array(SELECTOR_SEQUENCE, array(parse_css_selector($matches[1]), parse_css_selector($matches[2])));
|
|
|
161 |
};
|
|
|
162 |
|
|
|
163 |
switch ($raw_selector{0}) {
|
|
|
164 |
case '#':
|
|
|
165 |
return array(SELECTOR_ID, substr($raw_selector,1));
|
|
|
166 |
case '.':
|
|
|
167 |
return array(SELECTOR_CLASS, substr($raw_selector,1));
|
|
|
168 |
};
|
|
|
169 |
|
|
|
170 |
if (preg_match("/^(\w+)#(".SELECTOR_ID_REGEXP.")$/", $raw_selector, $matches)) {
|
|
|
171 |
return array(SELECTOR_SEQUENCE, array(array(SELECTOR_ID, $matches[2]), array(SELECTOR_TAG, $matches[1])));
|
|
|
172 |
};
|
|
|
173 |
|
|
|
174 |
if ($raw_selector === "*") {
|
|
|
175 |
return array(SELECTOR_ANY);
|
|
|
176 |
};
|
|
|
177 |
|
|
|
178 |
return array(SELECTOR_TAG,$raw_selector);
|
|
|
179 |
}
|
|
|
180 |
|
|
|
181 |
function parse_css_selectors($raw_selectors) {
|
|
|
182 |
$offset = 0;
|
|
|
183 |
$selectors = array();
|
|
|
184 |
|
|
|
185 |
$selector_strings = explode(",",$raw_selectors);
|
|
|
186 |
|
|
|
187 |
foreach ($selector_strings as $selector_string) {
|
|
|
188 |
// See comment on SELECTOR_ANY regarding why this code is commented
|
|
|
189 |
// Remove the '* html' string from the selector
|
|
|
190 |
// $selector_string = preg_replace('/^\s*\*\s+html/','',$selector_string);
|
|
|
191 |
|
|
|
192 |
$selector_string = trim($selector_string);
|
|
|
193 |
|
|
|
194 |
// Support for non-valid CSS similar to: "selector1,selector2, {rules}"
|
|
|
195 |
// In this case we'll get three selectors; last will be empty string
|
|
|
196 |
|
|
|
197 |
if (!empty($selector_string)) {
|
|
|
198 |
$selectors[] = parse_css_selector($selector_string);
|
|
|
199 |
};
|
|
|
200 |
};
|
|
|
201 |
|
|
|
202 |
return $selectors;
|
|
|
203 |
}
|
|
|
204 |
|
|
|
205 |
// function &parse_css_property($property, &$pipeline) {
|
|
|
206 |
// if (preg_match("/^(.*?)\s*:\s*(.*)/",$property, $matches)) {
|
|
|
207 |
// $name = strtolower(trim($matches[1]));
|
|
|
208 |
// $code = CSS::name2code($name);
|
|
|
209 |
// if (is_null($code)) {
|
|
|
210 |
// error_log(sprintf("Unsupported CSS property: '%s'", $name));
|
|
|
211 |
// $null = null;
|
|
|
212 |
// return $null;
|
|
|
213 |
// };
|
|
|
214 |
|
|
|
215 |
// $collection =& new CSSPropertyCollection();
|
|
|
216 |
// $collection->add_property(CSSPropertyDeclaration::create($code, trim($matches[2]), $pipeline));
|
|
|
217 |
// return $collection;
|
|
|
218 |
// } elseif (preg_match("/@import\s+\"(.*)\";/",$property, $matches)) {
|
|
|
219 |
// // @import "<url>"
|
|
|
220 |
// $collection =& css_import(trim($matches[1]), $pipeline);
|
|
|
221 |
// return $collection;
|
|
|
222 |
// } elseif (preg_match("/@import\s+url\((.*)\);/",$property, $matches)) {
|
|
|
223 |
// // @import url()
|
|
|
224 |
// $collection =& css_import(trim($matches[1]), $pipeline);
|
|
|
225 |
// return $collection;
|
|
|
226 |
// } elseif (preg_match("/@import\s+(.*);/",$property, $matches)) {
|
|
|
227 |
// // @import <url>
|
|
|
228 |
// $collection =& css_import(trim($matches[1]), $pipeline);
|
|
|
229 |
// return $collection;
|
|
|
230 |
// } else {
|
|
|
231 |
// $collection =& new CSSPropertyCollection();
|
|
|
232 |
// return $collection;
|
|
|
233 |
// };
|
|
|
234 |
// }
|
|
|
235 |
|
|
|
236 |
?>
|