Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
/**
3
 * This file contains the class XML_Query2XML_ISO9075Mapper.
4
 *
5
 * PHP version 5
6
 *
7
 * @category  XML
8
 * @package   XML_Query2XML
9
 * @author    Lukas Feiler <lukas.feiler@lukasfeiler.com>
10
 * @copyright 2006 Lukas Feiler
11
 * @license   http://www.gnu.org/copyleft/lesser.html  LGPL Version 2.1
12
 * @version   CVS: $Id: ISO9075Mapper.php 302637 2010-08-22 14:34:31Z lukasfeiler $
13
 * @link      http://pear.php.net/package/XML_Query2XML
14
 */
15
 
16
/**
17
 * PEAR_Exception is used as the parent for XML_Query2XML_ISO9075Mapper_Exception.
18
 */
19
require_once 'PEAR/Exception.php';
20
 
21
/**
22
 * I18N_UnicodeString is used for converting UTF-8 to Unicode and vice versa.
23
 */
24
require_once 'I18N/UnicodeString.php';
25
 
26
/**
27
 * Maps SQL identifiers to XML names according to Final Committee Draft for
28
 * ISO/IEC 9075-14:2005, section "9.1 Mapping SQL <identifier>s to XML Names".
29
 *
30
 * ISO/IEC 9075-14:2005 is available online at
31
 * http://www.sqlx.org/SQL-XML-documents/5FCD-14-XML-2004-07.pdf
32
 *
33
 * A lot of characters are legal in SQL identifiers but cannot be used within
34
 * XML names. To begin with, SQL identifiers can contain any Unicode character
35
 * while XML names are limited to a certain set of characters. E.g the
36
 * SQL identifier "<21yrs in age" obviously is not a valid XML name.
37
 * '#', '{', and '}' are also not allowed. Fully escaped SQL identifiers
38
 * also must not contain a column (':') or start with "xml" (in any case
39
 * combination). Illegal characters are mapped to a string of the form
40
 * _xUUUU_ where UUUU is the Unicode value of the character.
41
 *
42
 * The following is a table of example mappings:
43
 * <pre>
44
 * +----------------+------------------------+------------------------------------+
45
 * | SQL-Identifier | Fully escaped XML name | Comment                            |
46
 * +----------------+------------------------+------------------------------------+
47
 * | dept:id        | dept_x003A_id          | ":" is illegal                     |
48
 * | xml_name       | _x0078_ml_name         | must not start with [Xx][Mm][Ll]   |
49
 * | XML_name       | _x0058_ML_name         | must not start with [Xx][Mm][Ll]   |
50
 * | hire date      | hire_x0020_date        | space is illegal too               |
51
 * | Works@home     | Works_x0040_home       | "@" is illegal                     |
52
 * | file_xls       | file_x005F_xls         | "_" gets mapped if followed by "x" |
53
 * | FIRST_NAME     | FIRST_NAME             | no problem here                    |
54
 * +----------------+------------------------+------------------------------------+
55
 * </pre>
56
 *
57
 * @category  XML
58
 * @package   XML_Query2XML
59
 * @author    Lukas Feiler <lukas.feiler@lukasfeiler.com>
60
 * @copyright 2006 Lukas Feiler
61
 * @license   http://www.gnu.org/copyleft/lesser.html  LGPL Version 2.1
62
 * @version   Release: 1.7.2
63
 * @link      http://pear.php.net/package/XML_Query2XML
64
 */
65
class XML_Query2XML_ISO9075Mapper
66
{
67
    /**
68
     * This method maps an SQL identifier to an XML name according to
69
     * FCD ISO/IEC 9075-14:2005.
70
     *
71
     * @param string $sqlIdentifier The SQL identifier as a UTF-8 string.
72
     *
73
     * @return string The fully escaped XML name.
74
     * @throws XML_Query2XML_ISO9075Mapper_Exception If $sqlIdentifier was a
75
     *                                               malformed UTF-8 string.
76
     */
77
    public static function map($sqlIdentifier)
78
    {
79
        /*
80
         * S as defined in section 9.1, paragraph 1 with the difference that
81
         * if N is the number of characters in SQLI the characters of SQLI,
82
         * in order from left to right are S[0], S[1], ..., S[N-1].
83
         */
84
        $S = self::_utf8ToUnicode($sqlIdentifier);
85
 
86
        /*
87
         * X as defined in section 9.1, paragraph 4 with the differnce that
88
         * for each i between 0 (zero) and N-1, X[i] will be the Unicode
89
         * character string.
90
         */
91
        $X = array();
92
 
93
        /*
94
         * section 9.1, paragraph 4 lit a
95
         * a) If S[i] has no mapping to Unicode (i.e., TM(S[i]) is undefined),
96
         * then X[i] is implementation-defined.
97
         */
98
        for ($i = 0; $i < count($S); $i++) {
99
            if (self::_unicodeToUtf8($S[$i]) == ':') {
100
                // section 9.1, paragraph 4 lit b: If Si is <colon>, then
101
 
102
                if ($i == 0) {
103
                    // i) If i = 0 (zero), then let Xi be _x003A_.
104
                    $X[$i] = '_x003A_';
105
                } else {
106
                    // ii) If EV is fully escaped, then let Xi be _x003A_.
107
                    $X[$i] = '_x003A_';
108
                }
109
                /*
110
                 * iii) Otherwise, let X[i] be T[i]
111
                 * we always do a full escape - therefore we do
112
                 * not have to implement iii)
113
                 */
114
 
115
            } elseif (
116
                $i < count($S) - 1 &&
117
                self::_unicodeToUtf8($S[$i]) == '_' &&
118
                self::_unicodeToUtf8($S[$i+1]) == 'x'
119
            ) {
120
                /*
121
                 * section 9.1, paragraph 4 lit c: if i < N–1, S[i] is <underscore>,
122
                 * and S[i+1] is the lowercase letter x, then let X[i] be _x005F_.
123
                 */
124
                $X[$i] = '_x005F_';
125
 
126
            } elseif (
127
                !self::_isValidNameChar($S[$i]) ||
128
                $i == 0 &&
129
                !self::_isValidNameStartChar($S[$i])
130
            ) {
131
                /*
132
                 * section 9.1, paragraph 4 lit e: the SQL-implementation supports
133
                 * Feature X211, "XML 1.1 support", and either T[i] is not a valid
134
                 * XML 1.1 NameChar, or i = 0 (zerno) and T[0] is not a valid
135
                 * XML 1.1 NameStartChar
136
                 */
137
 
138
                $X[$i] = dechex($S[$i]);
139
                if (strlen($X[$i]) < 4) {
140
                    /*
141
                     * ii) 1) If U1 = 0 (zero), U2 = 0 (zero), U3 = 0 (zero), and
142
                     * U4 = 0 (zero), then let X[i} be _xU5U6U7U8_.
143
                     */
144
                    $X[$i] = str_pad($X[$i], 4, '0', STR_PAD_LEFT);
145
                } elseif (strlen($X[$i]) > 4) {
146
                    // ii) 2) Otherwise, let X[i] be _xU3U4U5U6U7U8_.
147
                    $X[$i] = str_pad($X[$i], 8, '0', STR_PAD_LEFT);
148
                }
149
                $X[$i] = '_x' . $X[$i] . '_';
150
            } else {
151
                /*
152
                 * section 9.1, paragraph 4 lit f: Otherwise, let X[i] be T[i].
153
                 * NOTE 21 — That is, any character in SQLI that does not occasion
154
                 * a problem as a character in an XML 1.0 NCName or XML 1.1 NCName
155
                 * is simply copied into the result.
156
                 */
157
                $X[$i] = self::_unicodeToUtf8($S[$i]);
158
            }
159
        }
160
 
161
        if (
162
            count($S) >=3 &&
163
            strpos(
164
                strtolower(
165
                    self::_unicodeToUtf8($S[0])
166
                    . self::_unicodeToUtf8($S[1])
167
                    . self::_unicodeToUtf8($S[2])
168
                ),
169
                'xml'
170
            ) === 0
171
        ) {
172
            /*
173
             * section 9.1, paragraph 4 lit d: if EV is fully escaped,
174
             * i = 0 (zero), N >= 3, S[0] is either the uppercase letter
175
             * X or the lowercase letter x, S[1] is either the uppercase
176
             * letter M or the lowercase letter m, and S[2] is either the
177
             * uppercase letter L or the lowercase letter l, then
178
             */
179
 
180
            if (self::_unicodeToUtf8($S[0]) == 'x') {
181
                // i) If S[0] is the lowercase letter x, then let X[0] be _x0078_.
182
                $X[0] = '_x0078_';
183
            } elseif (self::_unicodeToUtf8($S[0]) == 'X') {
184
                // ii) If S[0] is the uppercase letter X, then let X[0] be _x0058_.
185
                $X[0] = '_x0058_';
186
            }
187
        }
188
 
189
        /*
190
         * section 9.1, paragraph 5: let XMLN be the character string concatenation
191
         * of X[0], X[1], ..., and X[N-1] in order from left to right.
192
         */
193
        $XMLN = '';
194
        for ($i = 0; $i < count($X); $i++) {
195
            $XMLN .= $X[$i];
196
        }
197
        return $XMLN;
198
    }
199
 
200
    /**
201
     * Returns whether $char is a valid XML 1.1. NameStartChar.
202
     * NameStartChar is defined as:
203
     * NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] |
204
     *                   [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
205
     *                   [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
206
     *                   [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
207
     *                   [#x10000-#xEFFFF]
208
     *
209
     * @param int $c A unicode character as an integer.
210
     *
211
     * @return boolean Wheather $c is a valid NameStartChar.
212
     * @link http://www.w3.org/TR/xml11/
213
     */
214
    private static function _isValidNameStartChar($c)
215
    {
216
        return preg_match('/^[:A-Z_a-z]$/', self::_unicodeToUtf8($c)) !== 0 ||
217
               $c >= hexdec('C0') && $c <= hexdec('D6') ||
218
               $c >= hexdec('D8') && $c <= hexdec('F6') ||
219
               $c >= hexdec('F8') && $c <= hexdec('2FF') ||
220
               $c >= hexdec('370') && $c <= hexdec('37D') ||
221
               $c >= hexdec('37F') && $c <= hexdec('1FFF') ||
222
               $c >= hexdec('200C') && $c <= hexdec('200D') ||
223
               $c >= hexdec('2070') && $c <= hexdec('218F') ||
224
               $c >= hexdec('2C00') && $c <= hexdec('2FEF') ||
225
               $c >= hexdec('3001') && $c <= hexdec('D7FF') ||
226
               $c >= hexdec('F900') && $c <= hexdec('FDCF') ||
227
               $c >= hexdec('FDF0') && $c <= hexdec('FFFD') ||
228
               $c >= hexdec('10000') && $c <= hexdec('EFFFF');
229
    }
230
 
231
 
232
    /**
233
     * Returns whether $char is a valid XML 1.1. NameChar.
234
     * NameChar is defined as:
235
     * NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] |
236
     *              [#x203F-#x2040]
237
     *
238
     * @param int $c A unicode character as an integer.
239
     *
240
     * @return boolean Wheather $char is a valid NameChar.
241
     * @link http://www.w3.org/TR/xml11/
242
     */
243
    private static function _isValidNameChar($c)
244
    {
245
        return self::_isValidNameStartChar($c) ||
246
               preg_match('/^[-\.0-9]$/', self::_unicodeToUtf8($c)) !== 0 ||
247
               $c == hexdec('B7') ||
248
               $c >= hexdec('0300') && $c <= hexdec('036F') ||
249
               $c >= hexdec('203F') && $c <= hexdec('2040');
250
    }
251
 
252
    /**
253
     * Converts a single unicode character represended by an integer
254
     * to an UTF-8 chracter
255
     *
256
     * @param int $char The unicode character as an integer
257
     *
258
     * @return string The UTF-8 character.
259
     */
260
    private static function _unicodeToUtf8($char)
261
    {
262
        return I18N_UnicodeString::unicodeCharToUtf8($char);
263
    }
264
 
265
    /**
266
     * Converts a UTF-8 string into unicode integers.
267
     *
268
     * @param string $string A string containing Unicode values encoded in UTF-8
269
     *
270
     * @return array The array of Unicode values.
271
     * @throws XML_Query2XML_ISO9075Mapper_Exception If a malformed UTF-8 string
272
     *                                               was passed as argument.
273
     */
274
    private static function _utf8ToUnicode($string)
275
    {
276
        $string = I18N_UnicodeString::utf8ToUnicode($string);
277
        if (is_object($string) && strtolower(get_class($string)) == 'pear_error') {
278
            /*
279
             * unit tests:
280
             *  testMapException1()
281
             *  testMapException2()
282
             *  testMapException3()
283
             */
284
            throw new XML_Query2XML_ISO9075Mapper_Exception(
285
                $string->getMessage()
286
            );
287
        }
288
        return $string;
289
    }
290
}
291
 
292
/**
293
 * Only XML_Query2XML_ISO9075Mapper will throw this exception.
294
 * It does not extend XML_Query2XML_Exception because the
295
 * class XML_Query2XML_ISO9075Mapper should be usable without
296
 * XML_Query2XML. XML_Query2XML itself will never throw this
297
 * exception.
298
 *
299
 * @category XML
300
 * @package  XML_Query2XML
301
 * @author   Lukas Feiler <lukas.feiler@lukasfeiler.com>
302
 * @license  http://www.gnu.org/copyleft/lesser.html  LGPL Version 2.1
303
 * @link     http://pear.php.net/package/XML_Query2XML
304
 */
305
class XML_Query2XML_ISO9075Mapper_Exception extends PEAR_Exception
306
{
307
 
308
    /**
309
     * Constructor method
310
     *
311
     * @param string $message The error message.
312
     */
313
    public function __construct($message)
314
    {
315
        parent::__construct($message);
316
    }
317
}
318
?>