| 1 |
lars |
1 |
<?php
|
|
|
2 |
/**
|
|
|
3 |
* Converts to and from JSON format.
|
|
|
4 |
*
|
|
|
5 |
* JSON (JavaScript Object Notation) is a lightweight data-interchange
|
|
|
6 |
* format. It is easy for humans to read and write. It is easy for machines
|
|
|
7 |
* to parse and generate. It is based on a subset of the JavaScript
|
|
|
8 |
* Programming Language, Standard ECMA-262 3rd Edition - December 1999.
|
|
|
9 |
* This feature can also be found in Python. JSON is a text format that is
|
|
|
10 |
* completely language independent but uses conventions that are familiar
|
|
|
11 |
* to programmers of the C-family of languages, including C, C++, C#, Java,
|
|
|
12 |
* JavaScript, Perl, TCL, and many others. These properties make JSON an
|
|
|
13 |
* ideal data-interchange language.
|
|
|
14 |
*
|
|
|
15 |
* This package provides a simple encoder and decoder for JSON notation. It
|
|
|
16 |
* is intended for use with client-side Javascript applications that make
|
|
|
17 |
* use of HTTPRequest to perform server communication functions - data can
|
|
|
18 |
* be encoded into JSON notation for use in a client-side javascript, or
|
|
|
19 |
* decoded from incoming Javascript requests. JSON format is native to
|
|
|
20 |
* Javascript, and can be directly eval()'ed with no further parsing
|
|
|
21 |
* overhead
|
|
|
22 |
*
|
|
|
23 |
* All strings should be in ASCII or UTF-8 format!
|
|
|
24 |
*
|
|
|
25 |
* PHP versions 4 and 5
|
|
|
26 |
*
|
|
|
27 |
* LICENSE: Redistribution and use in source and binary forms, with or
|
|
|
28 |
* without modification, are permitted provided that the following
|
|
|
29 |
* conditions are met: Redistributions of source code must retain the
|
|
|
30 |
* above copyright notice, this list of conditions and the following
|
|
|
31 |
* disclaimer. Redistributions in binary form must reproduce the above
|
|
|
32 |
* copyright notice, this list of conditions and the following disclaimer
|
|
|
33 |
* in the documentation and/or other materials provided with the
|
|
|
34 |
* distribution.
|
|
|
35 |
*
|
|
|
36 |
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
|
|
37 |
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
|
38 |
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
|
|
39 |
* NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
40 |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
|
41 |
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
|
42 |
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
|
43 |
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
|
|
44 |
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
|
45 |
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
|
|
46 |
* DAMAGE.
|
|
|
47 |
*
|
|
|
48 |
* @package System.Web.Javascripts
|
|
|
49 |
* @author Michal Migurski <mike-json@teczno.com>
|
|
|
50 |
* @author Matt Knapp <mdknapp[at]gmail[dot]com>
|
|
|
51 |
* @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
|
|
|
52 |
* @copyright 2005 Michal Migurski
|
|
|
53 |
* @license http://www.opensource.org/licenses/bsd-license.php
|
|
|
54 |
* @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
|
|
|
55 |
*/
|
|
|
56 |
|
|
|
57 |
/**
|
|
|
58 |
* Converts to and from JSON format.
|
|
|
59 |
*
|
|
|
60 |
* @package System.Web.Javascripts
|
|
|
61 |
* @author Michal Migurski <mike-json@teczno.com>
|
|
|
62 |
* @author Matt Knapp <mdknapp[at]gmail[dot]com>
|
|
|
63 |
* @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
|
|
|
64 |
* @copyright 2005 Michal Migurski
|
|
|
65 |
* @license http://www.php.net/license/3_0.txt PHP License 3.0
|
|
|
66 |
*/
|
|
|
67 |
class TJSON
|
|
|
68 |
{
|
|
|
69 |
/**
|
|
|
70 |
* Marker constant for JSON::decode(), used to flag stack state
|
|
|
71 |
*/
|
|
|
72 |
const JSON_SLICE = 1;
|
|
|
73 |
|
|
|
74 |
/**
|
|
|
75 |
* Marker constant for JSON::decode(), used to flag stack state
|
|
|
76 |
*/
|
|
|
77 |
const JSON_IN_STR = 2;
|
|
|
78 |
|
|
|
79 |
/**
|
|
|
80 |
* Marker constant for JSON::decode(), used to flag stack state
|
|
|
81 |
*/
|
|
|
82 |
const JSON_IN_ARR = 4;
|
|
|
83 |
|
|
|
84 |
/**
|
|
|
85 |
* Marker constant for JSON::decode(), used to flag stack state
|
|
|
86 |
*/
|
|
|
87 |
const JSON_IN_OBJ = 8;
|
|
|
88 |
|
|
|
89 |
/**
|
|
|
90 |
* Marker constant for JSON::decode(), used to flag stack state
|
|
|
91 |
*/
|
|
|
92 |
const JSON_IN_CMT = 16;
|
|
|
93 |
|
|
|
94 |
/**
|
|
|
95 |
* Behavior switch for JSON::decode()
|
|
|
96 |
*/
|
|
|
97 |
const JSON_LOOSE_TYPE = 10;
|
|
|
98 |
|
|
|
99 |
/**
|
|
|
100 |
* Behavior switch for JSON::decode()
|
|
|
101 |
*/
|
|
|
102 |
const JSON_STRICT_TYPE = 11;
|
|
|
103 |
|
|
|
104 |
/**
|
|
|
105 |
* constructs a new JSON instance
|
|
|
106 |
*
|
|
|
107 |
* @param int $use object behavior: when encoding or decoding,
|
|
|
108 |
* be loose or strict about object/array usage
|
|
|
109 |
*
|
|
|
110 |
* possible values:
|
|
|
111 |
* self::JSON_STRICT_TYPE - strict typing, default
|
|
|
112 |
* "{...}" syntax creates objects in decode.
|
|
|
113 |
* self::JSON_LOOSE_TYPE - loose typing
|
|
|
114 |
* "{...}" syntax creates associative arrays in decode.
|
|
|
115 |
*/
|
|
|
116 |
public function __construct($use=self::JSON_STRICT_TYPE)
|
|
|
117 |
{
|
|
|
118 |
$this->use = $use;
|
|
|
119 |
}
|
|
|
120 |
|
|
|
121 |
/**
|
|
|
122 |
* encodes an arbitrary variable into JSON format
|
|
|
123 |
*
|
|
|
124 |
* @param mixed $var any number, boolean, string, array, or object to be encoded.
|
|
|
125 |
* see argument 1 to JSON() above for array-parsing behavior.
|
|
|
126 |
* if var is a strng, note that encode() always expects it
|
|
|
127 |
* to be in ASCII or UTF-8 format!
|
|
|
128 |
*
|
|
|
129 |
* @return string JSON string representation of input var
|
|
|
130 |
* @access public
|
|
|
131 |
*/
|
|
|
132 |
public function encode($var)
|
|
|
133 |
{
|
|
|
134 |
switch (gettype($var)) {
|
|
|
135 |
case 'boolean':
|
|
|
136 |
return $var ? 'true' : 'false';
|
|
|
137 |
|
|
|
138 |
case 'NULL':
|
|
|
139 |
return 'null';
|
|
|
140 |
|
|
|
141 |
case 'integer':
|
|
|
142 |
return (int) $var;
|
|
|
143 |
|
|
|
144 |
case 'double':
|
|
|
145 |
case 'float':
|
|
|
146 |
return (float) $var;
|
|
|
147 |
|
|
|
148 |
case 'string':
|
|
|
149 |
if (($g=Prado::getApplication()->getGlobalization(false))!==null &&
|
|
|
150 |
strtoupper($enc=$g->getCharset())!='UTF-8')
|
|
|
151 |
$var=iconv($enc, 'UTF-8', $var);
|
|
|
152 |
|
|
|
153 |
// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
|
|
|
154 |
$ascii = '';
|
|
|
155 |
$strlen_var = strlen($var);
|
|
|
156 |
|
|
|
157 |
/*
|
|
|
158 |
* Iterate over every character in the string,
|
|
|
159 |
* escaping with a slash or encoding to UTF-8 where necessary
|
|
|
160 |
*/
|
|
|
161 |
for ($c = 0; $c < $strlen_var; ++$c) {
|
|
|
162 |
|
|
|
163 |
$ord_var_c = ord($var{$c});
|
|
|
164 |
|
|
|
165 |
switch (true) {
|
|
|
166 |
case $ord_var_c == 0x08:
|
|
|
167 |
$ascii .= '\b';
|
|
|
168 |
break;
|
|
|
169 |
case $ord_var_c == 0x09:
|
|
|
170 |
$ascii .= '\t';
|
|
|
171 |
break;
|
|
|
172 |
case $ord_var_c == 0x0A:
|
|
|
173 |
$ascii .= '\n';
|
|
|
174 |
break;
|
|
|
175 |
case $ord_var_c == 0x0C:
|
|
|
176 |
$ascii .= '\f';
|
|
|
177 |
break;
|
|
|
178 |
case $ord_var_c == 0x0D:
|
|
|
179 |
$ascii .= '\r';
|
|
|
180 |
break;
|
|
|
181 |
|
|
|
182 |
case $ord_var_c == 0x22:
|
|
|
183 |
case $ord_var_c == 0x2F:
|
|
|
184 |
case $ord_var_c == 0x5C:
|
|
|
185 |
// double quote, slash, slosh
|
|
|
186 |
$ascii .= '\\'.$var{$c};
|
|
|
187 |
break;
|
|
|
188 |
|
|
|
189 |
case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
|
|
|
190 |
// characters U-00000000 - U-0000007F (same as ASCII)
|
|
|
191 |
$ascii .= $var{$c};
|
|
|
192 |
break;
|
|
|
193 |
|
|
|
194 |
case (($ord_var_c & 0xE0) == 0xC0):
|
|
|
195 |
// characters U-00000080 - U-000007FF, mask 110XXXXX
|
|
|
196 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
197 |
$char = pack('C*', $ord_var_c, ord($var{$c+1}));
|
|
|
198 |
$c+=1;
|
|
|
199 |
$utf16 = $this->utf8_to_utf16be($char);
|
|
|
200 |
$ascii .= sprintf('\u%04s', bin2hex($utf16));
|
|
|
201 |
break;
|
|
|
202 |
|
|
|
203 |
case (($ord_var_c & 0xF0) == 0xE0):
|
|
|
204 |
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
|
|
|
205 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
206 |
$char = pack('C*', $ord_var_c,
|
|
|
207 |
ord($var{$c+1}),
|
|
|
208 |
ord($var{$c+2}));
|
|
|
209 |
$c+=2;
|
|
|
210 |
$utf16 = $this->utf8_to_utf16be($char);
|
|
|
211 |
$ascii .= sprintf('\u%04s', bin2hex($utf16));
|
|
|
212 |
break;
|
|
|
213 |
|
|
|
214 |
case (($ord_var_c & 0xF8) == 0xF0):
|
|
|
215 |
// characters U-00010000 - U-001FFFFF, mask 11110XXX
|
|
|
216 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
217 |
$char = pack('C*', $ord_var_c,
|
|
|
218 |
ord($var{$c+1}),
|
|
|
219 |
ord($var{$c+2}),
|
|
|
220 |
ord($var{$c+3}));
|
|
|
221 |
$c+=3;
|
|
|
222 |
$utf16 = $this->utf8_to_utf16be($char);
|
|
|
223 |
$ascii .= sprintf('\u%04s', bin2hex($utf16));
|
|
|
224 |
break;
|
|
|
225 |
|
|
|
226 |
case (($ord_var_c & 0xFC) == 0xF8):
|
|
|
227 |
// characters U-00200000 - U-03FFFFFF, mask 111110XX
|
|
|
228 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
229 |
$char = pack('C*', $ord_var_c,
|
|
|
230 |
ord($var{$c+1}),
|
|
|
231 |
ord($var{$c+2}),
|
|
|
232 |
ord($var{$c+3}),
|
|
|
233 |
ord($var{$c+4}));
|
|
|
234 |
$c+=4;
|
|
|
235 |
$utf16 = $this->utf8_to_utf16be($char);
|
|
|
236 |
$ascii .= sprintf('\u%04s', bin2hex($utf16));
|
|
|
237 |
break;
|
|
|
238 |
|
|
|
239 |
case (($ord_var_c & 0xFE) == 0xFC):
|
|
|
240 |
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
|
|
|
241 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
242 |
$char = pack('C*', $ord_var_c,
|
|
|
243 |
ord($var{$c+1}),
|
|
|
244 |
ord($var{$c+2}),
|
|
|
245 |
ord($var{$c+3}),
|
|
|
246 |
ord($var{$c+4}),
|
|
|
247 |
ord($var{$c+5}));
|
|
|
248 |
$c+=5;
|
|
|
249 |
$utf16 = $this->utf8_to_utf16be($char);
|
|
|
250 |
$ascii .= sprintf('\u%04s', bin2hex($utf16));
|
|
|
251 |
break;
|
|
|
252 |
}
|
|
|
253 |
}
|
|
|
254 |
|
|
|
255 |
return '"'.$ascii.'"';
|
|
|
256 |
|
|
|
257 |
case 'array':
|
|
|
258 |
/*
|
|
|
259 |
* As per JSON spec if any array key is not an integer
|
|
|
260 |
* we must treat the the whole array as an object. We
|
|
|
261 |
* also try to catch a sparsely populated associative
|
|
|
262 |
* array with numeric keys here because some JS engines
|
|
|
263 |
* will create an array with empty indexes up to
|
|
|
264 |
* max_index which can cause memory issues and because
|
|
|
265 |
* the keys, which may be relevant, will be remapped
|
|
|
266 |
* otherwise.
|
|
|
267 |
*
|
|
|
268 |
* As per the ECMA and JSON specification an object may
|
|
|
269 |
* have any string as a property. Unfortunately due to
|
|
|
270 |
* a hole in the ECMA specification if the key is a
|
|
|
271 |
* ECMA reserved word or starts with a digit the
|
|
|
272 |
* parameter is only accessible using ECMAScript's
|
|
|
273 |
* bracket notation.
|
|
|
274 |
*/
|
|
|
275 |
|
|
|
276 |
// treat as a JSON object
|
|
|
277 |
if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
|
|
|
278 |
return '{' .
|
|
|
279 |
join(',', array_map(array($this, 'name_value'),
|
|
|
280 |
array_keys($var),
|
|
|
281 |
array_values($var)))
|
|
|
282 |
. '}';
|
|
|
283 |
}
|
|
|
284 |
|
|
|
285 |
// treat it like a regular array
|
|
|
286 |
return '[' . join(',', array_map(array($this, 'encode'), $var)) . ']';
|
|
|
287 |
|
|
|
288 |
case 'object':
|
|
|
289 |
$vars = get_object_vars($var);
|
|
|
290 |
return '{' .
|
|
|
291 |
join(',', array_map(array($this, 'name_value'),
|
|
|
292 |
array_keys($vars),
|
|
|
293 |
array_values($vars)))
|
|
|
294 |
. '}';
|
|
|
295 |
|
|
|
296 |
default:
|
|
|
297 |
return '';
|
|
|
298 |
}
|
|
|
299 |
}
|
|
|
300 |
|
|
|
301 |
/**
|
|
|
302 |
* encodes an arbitrary variable into JSON format, alias for encode()
|
|
|
303 |
* @see JSON::encode()
|
|
|
304 |
*
|
|
|
305 |
* @param mixed $var any number, boolean, string, array, or object to be encoded.
|
|
|
306 |
* see argument 1 to JSON() above for array-parsing behavior.
|
|
|
307 |
* if var is a strng, note that encode() always expects it
|
|
|
308 |
* to be in ASCII or UTF-8 format!
|
|
|
309 |
*
|
|
|
310 |
* @return string JSON string representation of input var
|
|
|
311 |
* @access public
|
|
|
312 |
*/
|
|
|
313 |
public function enc($var)
|
|
|
314 |
{
|
|
|
315 |
return $this->encode($var);
|
|
|
316 |
}
|
|
|
317 |
|
|
|
318 |
/** function name_value
|
|
|
319 |
* array-walking function for use in generating JSON-formatted name-value pairs
|
|
|
320 |
*
|
|
|
321 |
* @param string $name name of key to use
|
|
|
322 |
* @param mixed $value reference to an array element to be encoded
|
|
|
323 |
*
|
|
|
324 |
* @return string JSON-formatted name-value pair, like '"name":value'
|
|
|
325 |
* @access private
|
|
|
326 |
*/
|
|
|
327 |
protected function name_value($name, $value)
|
|
|
328 |
{
|
|
|
329 |
return $this->encode(strval($name)) . ':' . $this->encode($value);
|
|
|
330 |
}
|
|
|
331 |
|
|
|
332 |
/**
|
|
|
333 |
* reduce a string by removing leading and trailing comments and whitespace
|
|
|
334 |
*
|
|
|
335 |
* @param $str string string value to strip of comments and whitespace
|
|
|
336 |
*
|
|
|
337 |
* @return string string value stripped of comments and whitespace
|
|
|
338 |
* @access private
|
|
|
339 |
*/
|
|
|
340 |
protected function reduce_string($str)
|
|
|
341 |
{
|
|
|
342 |
$str = preg_replace(array(
|
|
|
343 |
|
|
|
344 |
// eliminate single line comments in '// ...' form
|
|
|
345 |
'#^\s*//(.+)$#m',
|
|
|
346 |
|
|
|
347 |
// eliminate multi-line comments in '/* ... */' form, at start of string
|
|
|
348 |
'#^\s*/\*(.+)\*/#Us',
|
|
|
349 |
|
|
|
350 |
// eliminate multi-line comments in '/* ... */' form, at end of string
|
|
|
351 |
'#/\*(.+)\*/\s*$#Us'
|
|
|
352 |
|
|
|
353 |
), '', $str);
|
|
|
354 |
|
|
|
355 |
// eliminate extraneous space
|
|
|
356 |
return trim($str);
|
|
|
357 |
}
|
|
|
358 |
|
|
|
359 |
/**
|
|
|
360 |
* decodes a JSON string into appropriate variable
|
|
|
361 |
*
|
|
|
362 |
* @param string $str JSON-formatted string
|
|
|
363 |
*
|
|
|
364 |
* @return mixed number, boolean, string, array, or object
|
|
|
365 |
* corresponding to given JSON input string.
|
|
|
366 |
* See argument 1 to JSON() above for object-output behavior.
|
|
|
367 |
* Note that decode() always returns strings
|
|
|
368 |
* in ASCII or UTF-8 format!
|
|
|
369 |
* @access public
|
|
|
370 |
*/
|
|
|
371 |
public function decode($str)
|
|
|
372 |
{
|
|
|
373 |
$str = $this->reduce_string($str);
|
|
|
374 |
|
|
|
375 |
switch (strtolower($str)) {
|
|
|
376 |
case 'true':
|
|
|
377 |
return true;
|
|
|
378 |
|
|
|
379 |
case 'false':
|
|
|
380 |
return false;
|
|
|
381 |
|
|
|
382 |
case 'null':
|
|
|
383 |
return null;
|
|
|
384 |
|
|
|
385 |
default:
|
|
|
386 |
if (is_numeric($str)) {
|
|
|
387 |
// Lookie-loo, it's a number
|
|
|
388 |
|
|
|
389 |
// This would work on its own, but I'm trying to be
|
|
|
390 |
// good about returning integers where appropriate:
|
|
|
391 |
// return (float)$str;
|
|
|
392 |
|
|
|
393 |
// Return float or int, as appropriate
|
|
|
394 |
return ((float)$str == (integer)$str)
|
|
|
395 |
? (integer)$str
|
|
|
396 |
: (float)$str;
|
|
|
397 |
|
|
|
398 |
} elseif (preg_match('/^("|\').+(\1)$/s', $str, $m) && $m[1] == $m[2]) {
|
|
|
399 |
// STRINGS RETURNED IN UTF-8 FORMAT
|
|
|
400 |
$delim = substr($str, 0, 1);
|
|
|
401 |
$chrs = substr($str, 1, -1);
|
|
|
402 |
$utf8 = '';
|
|
|
403 |
$strlen_chrs = strlen($chrs);
|
|
|
404 |
|
|
|
405 |
for ($c = 0; $c < $strlen_chrs; ++$c) {
|
|
|
406 |
|
|
|
407 |
$substr_chrs_c_2 = substr($chrs, $c, 2);
|
|
|
408 |
$ord_chrs_c = ord($chrs{$c});
|
|
|
409 |
|
|
|
410 |
switch (true) {
|
|
|
411 |
case $substr_chrs_c_2 == '\b':
|
|
|
412 |
$utf8 .= chr(0x08);
|
|
|
413 |
++$c;
|
|
|
414 |
break;
|
|
|
415 |
case $substr_chrs_c_2 == '\t':
|
|
|
416 |
$utf8 .= chr(0x09);
|
|
|
417 |
++$c;
|
|
|
418 |
break;
|
|
|
419 |
case $substr_chrs_c_2 == '\n':
|
|
|
420 |
$utf8 .= chr(0x0A);
|
|
|
421 |
++$c;
|
|
|
422 |
break;
|
|
|
423 |
case $substr_chrs_c_2 == '\f':
|
|
|
424 |
$utf8 .= chr(0x0C);
|
|
|
425 |
++$c;
|
|
|
426 |
break;
|
|
|
427 |
case $substr_chrs_c_2 == '\r':
|
|
|
428 |
$utf8 .= chr(0x0D);
|
|
|
429 |
++$c;
|
|
|
430 |
break;
|
|
|
431 |
|
|
|
432 |
case $substr_chrs_c_2 == '\\"':
|
|
|
433 |
case $substr_chrs_c_2 == '\\\'':
|
|
|
434 |
case $substr_chrs_c_2 == '\\\\':
|
|
|
435 |
case $substr_chrs_c_2 == '\\/':
|
|
|
436 |
if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
|
|
|
437 |
($delim == "'" && $substr_chrs_c_2 != '\\"')) {
|
|
|
438 |
$utf8 .= $chrs{++$c};
|
|
|
439 |
}
|
|
|
440 |
break;
|
|
|
441 |
|
|
|
442 |
case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
|
|
|
443 |
// single, escaped unicode character
|
|
|
444 |
$utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
|
|
|
445 |
. chr(hexdec(substr($chrs, ($c+4), 2)));
|
|
|
446 |
$utf8 .= $this->utf16be_to_utf8($utf16);
|
|
|
447 |
$c+=5;
|
|
|
448 |
break;
|
|
|
449 |
|
|
|
450 |
case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
|
|
|
451 |
$utf8 .= $chrs{$c};
|
|
|
452 |
break;
|
|
|
453 |
|
|
|
454 |
case ($ord_chrs_c & 0xE0) == 0xC0:
|
|
|
455 |
// characters U-00000080 - U-000007FF, mask 110XXXXX
|
|
|
456 |
//see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
457 |
$utf8 .= substr($chrs, $c, 2);
|
|
|
458 |
++$c;
|
|
|
459 |
break;
|
|
|
460 |
|
|
|
461 |
case ($ord_chrs_c & 0xF0) == 0xE0:
|
|
|
462 |
// characters U-00000800 - U-0000FFFF, mask 1110XXXX
|
|
|
463 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
464 |
$utf8 .= substr($chrs, $c, 3);
|
|
|
465 |
$c += 2;
|
|
|
466 |
break;
|
|
|
467 |
|
|
|
468 |
case ($ord_chrs_c & 0xF8) == 0xF0:
|
|
|
469 |
// characters U-00010000 - U-001FFFFF, mask 11110XXX
|
|
|
470 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
471 |
$utf8 .= substr($chrs, $c, 4);
|
|
|
472 |
$c += 3;
|
|
|
473 |
break;
|
|
|
474 |
|
|
|
475 |
case ($ord_chrs_c & 0xFC) == 0xF8:
|
|
|
476 |
// characters U-00200000 - U-03FFFFFF, mask 111110XX
|
|
|
477 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
478 |
$utf8 .= substr($chrs, $c, 5);
|
|
|
479 |
$c += 4;
|
|
|
480 |
break;
|
|
|
481 |
|
|
|
482 |
case ($ord_chrs_c & 0xFE) == 0xFC:
|
|
|
483 |
// characters U-04000000 - U-7FFFFFFF, mask 1111110X
|
|
|
484 |
// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
|
485 |
$utf8 .= substr($chrs, $c, 6);
|
|
|
486 |
$c += 5;
|
|
|
487 |
break;
|
|
|
488 |
|
|
|
489 |
}
|
|
|
490 |
|
|
|
491 |
}
|
|
|
492 |
|
|
|
493 |
return $utf8;
|
|
|
494 |
|
|
|
495 |
} elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
|
|
|
496 |
// array, or object notation
|
|
|
497 |
|
|
|
498 |
if ($str{0} == '[') {
|
|
|
499 |
$stk = array(self::JSON_IN_ARR);
|
|
|
500 |
$arr = array();
|
|
|
501 |
} else {
|
|
|
502 |
if ($this->use == self::JSON_LOOSE_TYPE) {
|
|
|
503 |
$stk = array(self::JSON_IN_OBJ);
|
|
|
504 |
$obj = array();
|
|
|
505 |
} else {
|
|
|
506 |
$stk = array(self::JSON_IN_OBJ);
|
|
|
507 |
$obj = new stdClass();
|
|
|
508 |
}
|
|
|
509 |
}
|
|
|
510 |
|
|
|
511 |
array_push($stk, array('what' => self::JSON_SLICE,
|
|
|
512 |
'where' => 0,
|
|
|
513 |
'delim' => false));
|
|
|
514 |
|
|
|
515 |
$chrs = substr($str, 1, -1);
|
|
|
516 |
$chrs = $this->reduce_string($chrs);
|
|
|
517 |
|
|
|
518 |
if ($chrs == '') {
|
|
|
519 |
if (reset($stk) == self::JSON_IN_ARR) {
|
|
|
520 |
return $arr;
|
|
|
521 |
|
|
|
522 |
} else {
|
|
|
523 |
return $obj;
|
|
|
524 |
|
|
|
525 |
}
|
|
|
526 |
}
|
|
|
527 |
|
|
|
528 |
//print("\nparsing {$chrs}\n");
|
|
|
529 |
|
|
|
530 |
$strlen_chrs = strlen($chrs);
|
|
|
531 |
|
|
|
532 |
for ($c = 0; $c <= $strlen_chrs; ++$c) {
|
|
|
533 |
|
|
|
534 |
$top = end($stk);
|
|
|
535 |
$substr_chrs_c_2 = substr($chrs, $c, 2);
|
|
|
536 |
|
|
|
537 |
if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == self::JSON_SLICE))) {
|
|
|
538 |
// found a comma that is not inside a string, array, etc.,
|
|
|
539 |
// OR we've reached the end of the character list
|
|
|
540 |
$slice = substr($chrs, $top['where'], ($c - $top['where']));
|
|
|
541 |
array_push($stk, array('what' => self::JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
|
|
|
542 |
//print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
|
|
|
543 |
|
|
|
544 |
if (reset($stk) == self::JSON_IN_ARR) {
|
|
|
545 |
// we are in an array, so just push an element onto the stack
|
|
|
546 |
array_push($arr, $this->decode($slice));
|
|
|
547 |
|
|
|
548 |
} elseif (reset($stk) == self::JSON_IN_OBJ) {
|
|
|
549 |
// we are in an object, so figure
|
|
|
550 |
// out the property name and set an
|
|
|
551 |
// element in an associative array,
|
|
|
552 |
// for now
|
|
|
553 |
if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
|
|
|
554 |
// "name":value pair
|
|
|
555 |
$key = $this->decode($parts[1]);
|
|
|
556 |
$val = $this->decode($parts[2]);
|
|
|
557 |
|
|
|
558 |
if ($this->use == self::JSON_LOOSE_TYPE) {
|
|
|
559 |
$obj[$key] = $val;
|
|
|
560 |
} else {
|
|
|
561 |
$obj->$key = $val;
|
|
|
562 |
}
|
|
|
563 |
} elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
|
|
|
564 |
// name:value pair, where name is unquoted
|
|
|
565 |
$key = $parts[1];
|
|
|
566 |
$val = $this->decode($parts[2]);
|
|
|
567 |
|
|
|
568 |
if ($this->use == self::JSON_LOOSE_TYPE) {
|
|
|
569 |
$obj[$key] = $val;
|
|
|
570 |
} else {
|
|
|
571 |
$obj->$key = $val;
|
|
|
572 |
}
|
|
|
573 |
}
|
|
|
574 |
|
|
|
575 |
}
|
|
|
576 |
|
|
|
577 |
} elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != self::JSON_IN_STR)) {
|
|
|
578 |
// found a quote, and we are not inside a string
|
|
|
579 |
array_push($stk, array('what' => self::JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
|
|
|
580 |
//print("Found start of string at {$c}\n");
|
|
|
581 |
|
|
|
582 |
} elseif (($chrs{$c} == $top['delim']) &&
|
|
|
583 |
($top['what'] == self::JSON_IN_STR) &&
|
|
|
584 |
(($chrs{$c - 1} != "\\") ||
|
|
|
585 |
($chrs{$c - 1} == "\\" && $chrs{$c - 2} == "\\"))) {
|
|
|
586 |
// found a quote, we're in a string, and it's not escaped
|
|
|
587 |
array_pop($stk);
|
|
|
588 |
//print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
|
|
|
589 |
|
|
|
590 |
} elseif (($chrs{$c} == '[') &&
|
|
|
591 |
in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
|
|
|
592 |
// found a left-bracket, and we are in an array, object, or slice
|
|
|
593 |
array_push($stk, array('what' => self::JSON_IN_ARR, 'where' => $c, 'delim' => false));
|
|
|
594 |
//print("Found start of array at {$c}\n");
|
|
|
595 |
|
|
|
596 |
} elseif (($chrs{$c} == ']') && ($top['what'] == self::JSON_IN_ARR)) {
|
|
|
597 |
// found a right-bracket, and we're in an array
|
|
|
598 |
array_pop($stk);
|
|
|
599 |
//print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
|
|
|
600 |
|
|
|
601 |
} elseif (($chrs{$c} == '{') &&
|
|
|
602 |
in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
|
|
|
603 |
// found a left-brace, and we are in an array, object, or slice
|
|
|
604 |
array_push($stk, array('what' => self::JSON_IN_OBJ, 'where' => $c, 'delim' => false));
|
|
|
605 |
//print("Found start of object at {$c}\n");
|
|
|
606 |
|
|
|
607 |
} elseif (($chrs{$c} == '}') && ($top['what'] == self::JSON_IN_OBJ)) {
|
|
|
608 |
// found a right-brace, and we're in an object
|
|
|
609 |
array_pop($stk);
|
|
|
610 |
//print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
|
|
|
611 |
|
|
|
612 |
} elseif (($substr_chrs_c_2 == '/*') &&
|
|
|
613 |
in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
|
|
|
614 |
// found a comment start, and we are in an array, object, or slice
|
|
|
615 |
array_push($stk, array('what' => self::JSON_IN_CMT, 'where' => $c, 'delim' => false));
|
|
|
616 |
$c++;
|
|
|
617 |
//print("Found start of comment at {$c}\n");
|
|
|
618 |
|
|
|
619 |
} elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::JSON_IN_CMT)) {
|
|
|
620 |
// found a comment end, and we're in one now
|
|
|
621 |
array_pop($stk);
|
|
|
622 |
$c++;
|
|
|
623 |
|
|
|
624 |
for ($i = $top['where']; $i <= $c; ++$i)
|
|
|
625 |
$chrs = substr_replace($chrs, ' ', $i, 1);
|
|
|
626 |
|
|
|
627 |
//print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
|
|
|
628 |
|
|
|
629 |
}
|
|
|
630 |
|
|
|
631 |
}
|
|
|
632 |
|
|
|
633 |
if (reset($stk) == self::JSON_IN_ARR) {
|
|
|
634 |
return $arr;
|
|
|
635 |
|
|
|
636 |
} elseif (reset($stk) == self::JSON_IN_OBJ) {
|
|
|
637 |
return $obj;
|
|
|
638 |
|
|
|
639 |
}
|
|
|
640 |
|
|
|
641 |
}
|
|
|
642 |
}
|
|
|
643 |
}
|
|
|
644 |
|
|
|
645 |
/**
|
|
|
646 |
* decodes a JSON string into appropriate variable; alias for decode()
|
|
|
647 |
* @see JSON::decode()
|
|
|
648 |
*
|
|
|
649 |
* @param string $str JSON-formatted string
|
|
|
650 |
*
|
|
|
651 |
* @return mixed number, boolean, string, array, or object
|
|
|
652 |
* corresponding to given JSON input string.
|
|
|
653 |
* See argument 1 to JSON() above for object-output behavior.
|
|
|
654 |
* Note that decode() always returns strings
|
|
|
655 |
* in ASCII or UTF-8 format!
|
|
|
656 |
*/
|
|
|
657 |
public function dec($var)
|
|
|
658 |
{
|
|
|
659 |
return $this->decode($var);
|
|
|
660 |
}
|
|
|
661 |
|
|
|
662 |
|
|
|
663 |
/**
|
|
|
664 |
* This function returns any UTF-8 encoded text as a list of
|
|
|
665 |
* Unicode values:
|
|
|
666 |
*
|
|
|
667 |
* @author Scott Michael Reynen <scott@randomchaos.com>
|
|
|
668 |
* @link http://www.randomchaos.com/document.php?source=php_and_unicode
|
|
|
669 |
* @see unicode_to_utf8()
|
|
|
670 |
*/
|
|
|
671 |
protected function utf8_to_unicode( &$str )
|
|
|
672 |
{
|
|
|
673 |
$unicode = array();
|
|
|
674 |
$values = array();
|
|
|
675 |
$lookingFor = 1;
|
|
|
676 |
|
|
|
677 |
for ($i = 0; $i < strlen( $str ); $i++ )
|
|
|
678 |
{
|
|
|
679 |
$thisValue = ord( $str[ $i ] );
|
|
|
680 |
if ( $thisValue < 128 )
|
|
|
681 |
$unicode[] = $thisValue;
|
|
|
682 |
else
|
|
|
683 |
{
|
|
|
684 |
if ( count( $values ) == 0 )
|
|
|
685 |
$lookingFor = ( $thisValue < 224 ) ? 2 : 3;
|
|
|
686 |
$values[] = $thisValue;
|
|
|
687 |
if ( count( $values ) == $lookingFor )
|
|
|
688 |
{
|
|
|
689 |
$number = ( $lookingFor == 3 ) ?
|
|
|
690 |
( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
|
|
|
691 |
( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
|
|
|
692 |
$unicode[] = $number;
|
|
|
693 |
$values = array();
|
|
|
694 |
$lookingFor = 1;
|
|
|
695 |
}
|
|
|
696 |
}
|
|
|
697 |
}
|
|
|
698 |
return $unicode;
|
|
|
699 |
}
|
|
|
700 |
|
|
|
701 |
/**
|
|
|
702 |
* This function converts a Unicode array back to its UTF-8 representation
|
|
|
703 |
*
|
|
|
704 |
* @author Scott Michael Reynen <scott@randomchaos.com>
|
|
|
705 |
* @link http://www.randomchaos.com/document.php?source=php_and_unicode
|
|
|
706 |
* @see utf8_to_unicode()
|
|
|
707 |
*/
|
|
|
708 |
protected function unicode_to_utf8( &$str )
|
|
|
709 |
{
|
|
|
710 |
$utf8 = '';
|
|
|
711 |
foreach( $str as $unicode )
|
|
|
712 |
{
|
|
|
713 |
if ( $unicode < 128 )
|
|
|
714 |
{
|
|
|
715 |
$utf8.= chr( $unicode );
|
|
|
716 |
}
|
|
|
717 |
elseif ( $unicode < 2048 )
|
|
|
718 |
{
|
|
|
719 |
$utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
|
|
|
720 |
$utf8.= chr( 128 + ( $unicode % 64 ) );
|
|
|
721 |
}
|
|
|
722 |
else
|
|
|
723 |
{
|
|
|
724 |
$utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
|
|
|
725 |
$utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
|
|
|
726 |
$utf8.= chr( 128 + ( $unicode % 64 ) );
|
|
|
727 |
}
|
|
|
728 |
}
|
|
|
729 |
return $utf8;
|
|
|
730 |
}
|
|
|
731 |
|
|
|
732 |
/**
|
|
|
733 |
* UTF-8 to UTF-16BE conversion.
|
|
|
734 |
*
|
|
|
735 |
* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
|
|
|
736 |
*/
|
|
|
737 |
protected function utf8_to_utf16be(&$str, $bom = false)
|
|
|
738 |
{
|
|
|
739 |
$out = $bom ? "\xFE\xFF" : '';
|
|
|
740 |
if(function_exists('mb_convert_encoding'))
|
|
|
741 |
return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
|
|
|
742 |
|
|
|
743 |
$uni = $this->utf8_to_unicode($str);
|
|
|
744 |
foreach($uni as $cp)
|
|
|
745 |
$out .= pack('n',$cp);
|
|
|
746 |
return $out;
|
|
|
747 |
}
|
|
|
748 |
|
|
|
749 |
/**
|
|
|
750 |
* UTF-8 to UTF-16BE conversion.
|
|
|
751 |
*
|
|
|
752 |
* Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
|
|
|
753 |
*/
|
|
|
754 |
protected function utf16be_to_utf8(&$str)
|
|
|
755 |
{
|
|
|
756 |
$uni = unpack('n*',$str);
|
|
|
757 |
return $this->unicode_to_utf8($uni);
|
|
|
758 |
}
|
|
|
759 |
|
|
|
760 |
}
|
|
|
761 |
|
|
|
762 |
?>
|