| 1 |
lars |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
require_once(HTML2PS_DIR.'encoding.inc.php');
|
|
|
4 |
require_once(HTML2PS_DIR.'encoding.entities.inc.php');
|
|
|
5 |
require_once(HTML2PS_DIR.'encoding.glyphs.inc.php');
|
|
|
6 |
require_once(HTML2PS_DIR.'encoding.iso-8859-1.inc.php');
|
|
|
7 |
require_once(HTML2PS_DIR.'encoding.iso-8859-2.inc.php');
|
|
|
8 |
require_once(HTML2PS_DIR.'encoding.iso-8859-3.inc.php');
|
|
|
9 |
require_once(HTML2PS_DIR.'encoding.iso-8859-4.inc.php');
|
|
|
10 |
require_once(HTML2PS_DIR.'encoding.iso-8859-5.inc.php');
|
|
|
11 |
require_once(HTML2PS_DIR.'encoding.iso-8859-6.inc.php');
|
|
|
12 |
require_once(HTML2PS_DIR.'encoding.iso-8859-7.inc.php');
|
|
|
13 |
require_once(HTML2PS_DIR.'encoding.iso-8859-8.inc.php');
|
|
|
14 |
require_once(HTML2PS_DIR.'encoding.iso-8859-9.inc.php');
|
|
|
15 |
require_once(HTML2PS_DIR.'encoding.iso-8859-10.inc.php');
|
|
|
16 |
require_once(HTML2PS_DIR.'encoding.iso-8859-11.inc.php');
|
|
|
17 |
require_once(HTML2PS_DIR.'encoding.iso-8859-13.inc.php');
|
|
|
18 |
require_once(HTML2PS_DIR.'encoding.iso-8859-14.inc.php');
|
|
|
19 |
require_once(HTML2PS_DIR.'encoding.iso-8859-15.inc.php');
|
|
|
20 |
require_once(HTML2PS_DIR.'encoding.koi8-r.inc.php');
|
|
|
21 |
require_once(HTML2PS_DIR.'encoding.cp866.inc.php');
|
|
|
22 |
require_once(HTML2PS_DIR.'encoding.windows-1250.inc.php');
|
|
|
23 |
require_once(HTML2PS_DIR.'encoding.windows-1251.inc.php');
|
|
|
24 |
require_once(HTML2PS_DIR.'encoding.windows-1252.inc.php');
|
|
|
25 |
require_once(HTML2PS_DIR.'encoding.dingbats.inc.php');
|
|
|
26 |
require_once(HTML2PS_DIR.'encoding.symbol.inc.php');
|
|
|
27 |
|
|
|
28 |
// TODO: this works for PS encoding names only
|
|
|
29 |
class ManagerEncoding {
|
|
|
30 |
var $_encodings = array();
|
|
|
31 |
|
|
|
32 |
/**
|
|
|
33 |
* Number of the current custom encoding vector
|
|
|
34 |
*/
|
|
|
35 |
var $_custom_vector_index = 0;
|
|
|
36 |
|
|
|
37 |
var $_utf8_mapping;
|
|
|
38 |
|
|
|
39 |
function ManagerEncoding() {
|
|
|
40 |
$this->new_custom_encoding_vector();
|
|
|
41 |
}
|
|
|
42 |
|
|
|
43 |
/**
|
|
|
44 |
* Add new custom symbol not present in the existing encoding
|
|
|
45 |
* vectors.
|
|
|
46 |
*
|
|
|
47 |
* Note: encoding vector this character was placed to should be
|
|
|
48 |
* extracted via get_current_custom_encoding_name immediately after
|
|
|
49 |
* add_custom_char call.
|
|
|
50 |
*
|
|
|
51 |
* @param char[2] $char UCS-2 character (represented as 2-octet
|
|
|
52 |
* string)
|
|
|
53 |
*
|
|
|
54 |
* @return char index of this character in custom encoding vector
|
|
|
55 |
*/
|
|
|
56 |
function add_custom_char($char) {
|
|
|
57 |
// Check if current encoding vector is full; if it is, we should
|
|
|
58 |
// add a new one.
|
|
|
59 |
if ($this->is_custom_encoding_full()) {
|
|
|
60 |
$this->new_custom_encoding_vector();
|
|
|
61 |
};
|
|
|
62 |
|
|
|
63 |
// Get name of the custom encoding where new character should be
|
|
|
64 |
// placed
|
|
|
65 |
$vector_name = $this->get_current_custom_encoding_name();
|
|
|
66 |
|
|
|
67 |
// Get (zero-based) index of this character in the encoding vector
|
|
|
68 |
$index = count($this->_encodings[$vector_name]);
|
|
|
69 |
|
|
|
70 |
// Add new character to the custom encoding vector
|
|
|
71 |
$this->_encodings[$vector_name][chr($index)] = $char;
|
|
|
72 |
|
|
|
73 |
// Add new character to the UTF8 mapping table
|
|
|
74 |
$this->_utf8_mapping[code_to_utf8($char)][$vector_name] = chr($index);
|
|
|
75 |
|
|
|
76 |
return chr($index);
|
|
|
77 |
}
|
|
|
78 |
|
|
|
79 |
function generate_mapping($mapping_file) {
|
|
|
80 |
global $g_utf8_converters;
|
|
|
81 |
|
|
|
82 |
$this->_utf8_mapping = array();
|
|
|
83 |
foreach (array_keys($g_utf8_converters) as $encoding) {
|
|
|
84 |
$flipped = array_flip($g_utf8_converters[$encoding][0]);
|
|
|
85 |
foreach ($flipped as $utf => $code) {
|
|
|
86 |
$this->_utf8_mapping[code_to_utf8($utf)][$encoding] = $code;
|
|
|
87 |
};
|
|
|
88 |
};
|
|
|
89 |
|
|
|
90 |
$file = fopen($mapping_file,'w');
|
|
|
91 |
fwrite($file, serialize($this->_utf8_mapping));
|
|
|
92 |
fclose($file);
|
|
|
93 |
}
|
|
|
94 |
|
|
|
95 |
function &get() {
|
|
|
96 |
global $g_manager_encodings;
|
|
|
97 |
return $g_manager_encodings;
|
|
|
98 |
}
|
|
|
99 |
|
|
|
100 |
function get_canonized_encoding_name($encoding) {
|
|
|
101 |
global $g_encoding_aliases;
|
|
|
102 |
|
|
|
103 |
if (isset($g_encoding_aliases[$encoding])) {
|
|
|
104 |
return $g_encoding_aliases[$encoding];
|
|
|
105 |
};
|
|
|
106 |
|
|
|
107 |
return $encoding;
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
function get_current_custom_encoding_name() {
|
|
|
111 |
return $this->get_custom_encoding_name($this->get_custom_vector_index());
|
|
|
112 |
}
|
|
|
113 |
|
|
|
114 |
function get_custom_encoding_name($index) {
|
|
|
115 |
return sprintf('custom%d',
|
|
|
116 |
$index);
|
|
|
117 |
}
|
|
|
118 |
|
|
|
119 |
function get_custom_vector_index() {
|
|
|
120 |
return $this->_custom_vector_index;
|
|
|
121 |
}
|
|
|
122 |
|
|
|
123 |
function get_encoding_glyphs($encoding) {
|
|
|
124 |
$vector = $this->get_encoding_vector($encoding);
|
|
|
125 |
if (is_null($vector)) {
|
|
|
126 |
error_log(sprintf("Cannot get encoding vector for encoding '%s'", $encoding));
|
|
|
127 |
return null;
|
|
|
128 |
};
|
|
|
129 |
return $this->vector_to_glyphs($vector);
|
|
|
130 |
}
|
|
|
131 |
|
|
|
132 |
/**
|
|
|
133 |
* Get an encoding vector (array containing 256 elements; every
|
|
|
134 |
* element is an ucs-2 encoded character)
|
|
|
135 |
*
|
|
|
136 |
* @param $encoding Encoding name
|
|
|
137 |
*
|
|
|
138 |
* @return Array encoding vector; null if this encoding is not known to the script
|
|
|
139 |
*/
|
|
|
140 |
function get_encoding_vector($encoding) {
|
|
|
141 |
$encoding = $this->get_canonized_encoding_name($encoding);
|
|
|
142 |
|
|
|
143 |
global $g_utf8_converters;
|
|
|
144 |
if (isset($g_utf8_converters[$encoding])) {
|
|
|
145 |
$vector = $g_utf8_converters[$encoding][0];
|
|
|
146 |
} elseif (isset($this->_encodings[$encoding])) {
|
|
|
147 |
$vector = $this->_encodings[$encoding];
|
|
|
148 |
} else {
|
|
|
149 |
return null;
|
|
|
150 |
};
|
|
|
151 |
|
|
|
152 |
for ($i = 0; $i <= 255; $i++) {
|
|
|
153 |
if (!isset($vector[chr($i)])) {
|
|
|
154 |
$vector[chr($i)] = 0xFFFF;
|
|
|
155 |
};
|
|
|
156 |
};
|
|
|
157 |
|
|
|
158 |
return $vector;
|
|
|
159 |
}
|
|
|
160 |
|
|
|
161 |
function get_glyph_to_code_mapping($encoding) {
|
|
|
162 |
$vector = $this->get_encoding_vector($encoding);
|
|
|
163 |
|
|
|
164 |
$result = array();
|
|
|
165 |
foreach ($vector as $code => $uccode) {
|
|
|
166 |
if (isset($GLOBALS['g_unicode_glyphs'][$uccode])) {
|
|
|
167 |
$result[$GLOBALS['g_unicode_glyphs'][$uccode]][] = $code;
|
|
|
168 |
};
|
|
|
169 |
};
|
|
|
170 |
|
|
|
171 |
return $result;
|
|
|
172 |
}
|
|
|
173 |
|
|
|
174 |
function get_mapping($char) {
|
|
|
175 |
if (!isset($this->_utf8_mapping)) {
|
|
|
176 |
$this->load_mapping(CACHE_DIR . 'utf8.mappings.dat');
|
|
|
177 |
};
|
|
|
178 |
|
|
|
179 |
if (!isset($this->_utf8_mapping[$char])) {
|
|
|
180 |
return null;
|
|
|
181 |
};
|
|
|
182 |
return $this->_utf8_mapping[$char];
|
|
|
183 |
}
|
|
|
184 |
|
|
|
185 |
function get_next_utf8_char($raw_content, &$ptr) {
|
|
|
186 |
if ((ord($raw_content[$ptr]) & 0xF0) == 0xF0) {
|
|
|
187 |
$charlen = 4;
|
|
|
188 |
} elseif ((ord($raw_content[$ptr]) & 0xE0) == 0xE0) {
|
|
|
189 |
$charlen = 3;
|
|
|
190 |
} elseif ((ord($raw_content[$ptr]) & 0xC0) == 0xC0) {
|
|
|
191 |
$charlen = 2;
|
|
|
192 |
} else {
|
|
|
193 |
$charlen = 1;
|
|
|
194 |
};
|
|
|
195 |
|
|
|
196 |
$char = substr($raw_content,$ptr,$charlen);
|
|
|
197 |
$ptr += $charlen;
|
|
|
198 |
|
|
|
199 |
return $char;
|
|
|
200 |
}
|
|
|
201 |
|
|
|
202 |
function get_ps_encoding_vector($encoding) {
|
|
|
203 |
$vector = $this->get_encoding_vector($encoding);
|
|
|
204 |
|
|
|
205 |
$result = "/".$encoding." [ \n";
|
|
|
206 |
for ($i=0; $i<256; $i++) {
|
|
|
207 |
if ($i % 10 == 0) { $result .= "\n"; };
|
|
|
208 |
|
|
|
209 |
// ! Note the order of array checking; optimizing interpreters may break this
|
|
|
210 |
if (isset($vector[chr($i)]) && isset($GLOBALS['g_unicode_glyphs'][$vector[chr($i)]])) {
|
|
|
211 |
$result .= " /".$GLOBALS['g_unicode_glyphs'][$vector[chr($i)]];
|
|
|
212 |
} else {
|
|
|
213 |
$result .= " /.notdef";
|
|
|
214 |
};
|
|
|
215 |
};
|
|
|
216 |
$result .= " ] readonly def";
|
|
|
217 |
|
|
|
218 |
return $result;
|
|
|
219 |
}
|
|
|
220 |
|
|
|
221 |
function is_custom_encoding($encoding) {
|
|
|
222 |
return preg_match('/^custom\d+$/', $encoding);
|
|
|
223 |
}
|
|
|
224 |
|
|
|
225 |
function is_custom_encoding_full() {
|
|
|
226 |
return count($this->_encodings[$this->get_current_custom_encoding_name()]) >= 256;
|
|
|
227 |
}
|
|
|
228 |
|
|
|
229 |
function load_mapping($mapping_file) {
|
|
|
230 |
if (!is_readable($mapping_file)) {
|
|
|
231 |
$this->generate_mapping($mapping_file);
|
|
|
232 |
} else {
|
|
|
233 |
$this->_utf8_mapping = unserialize(file_get_contents($mapping_file));
|
|
|
234 |
};
|
|
|
235 |
}
|
|
|
236 |
|
|
|
237 |
/**
|
|
|
238 |
* Create new custom 256-characters encoding vector. Reserve first
|
|
|
239 |
* 32 symbols for system use.
|
|
|
240 |
*
|
|
|
241 |
* Custom encoding vectors have names 'customX' when X stand for the
|
|
|
242 |
* encoding index.
|
|
|
243 |
*/
|
|
|
244 |
function new_custom_encoding_vector() {
|
|
|
245 |
$initial_vector = array();
|
|
|
246 |
for ($i = 0; $i <= 32; $i++) {
|
|
|
247 |
$initial_vector[chr($i)] = chr($i);
|
|
|
248 |
};
|
|
|
249 |
$this->register_encoding(sprintf('custom%d',
|
|
|
250 |
$this->next_custom_vector_index()),
|
|
|
251 |
$initial_vector);
|
|
|
252 |
}
|
|
|
253 |
|
|
|
254 |
/**
|
|
|
255 |
* Returns index for the next custom encoding
|
|
|
256 |
*/
|
|
|
257 |
function next_custom_vector_index() {
|
|
|
258 |
return ++$this->_custom_vector_index;
|
|
|
259 |
}
|
|
|
260 |
|
|
|
261 |
function register_encoding($name, $vector) {
|
|
|
262 |
$this->_encodings[$name] = $vector;
|
|
|
263 |
}
|
|
|
264 |
|
|
|
265 |
function to_utf8($word, $encoding) {
|
|
|
266 |
$vector = $this->get_encoding_vector($encoding);
|
|
|
267 |
|
|
|
268 |
$converted = '';
|
|
|
269 |
for ($i=0, $size=strlen($word); $i < $size; $i++) {
|
|
|
270 |
$converted .= code_to_utf8($vector[$word{$i}]);
|
|
|
271 |
};
|
|
|
272 |
|
|
|
273 |
return $converted;
|
|
|
274 |
}
|
|
|
275 |
|
|
|
276 |
function vector_to_glyphs($vector) {
|
|
|
277 |
$result = array();
|
|
|
278 |
|
|
|
279 |
foreach ($vector as $code => $ucs2) {
|
|
|
280 |
if (isset($GLOBALS['g_unicode_glyphs'][$ucs2])) {
|
|
|
281 |
$result[$code] = $GLOBALS['g_unicode_glyphs'][$ucs2];
|
|
|
282 |
} elseif ($ucs2 == 0xFFFF) {
|
|
|
283 |
$result[$code] = ".notdef";
|
|
|
284 |
} else {
|
|
|
285 |
// Use "Unicode and Glyph Names" mapping from Adobe
|
|
|
286 |
// http://partners.adobe.com/public/developer/opentype/index_glyph.html
|
|
|
287 |
$result[$code] = sprintf("u%04X", $ucs2);
|
|
|
288 |
};
|
|
|
289 |
};
|
|
|
290 |
|
|
|
291 |
return $result;
|
|
|
292 |
}
|
|
|
293 |
}
|
|
|
294 |
|
|
|
295 |
global $g_manager_encodings;
|
|
|
296 |
$g_manager_encodings = new ManagerEncoding;
|
|
|
297 |
?>
|