Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
 
3
/*
4
 * This file is part of the symfony package.
5
 * (c) Fabien Potencier <fabien.potencier@symfony-project.com>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
 
11
require_once(dirname(__FILE__).'/sfYamlInline.php');
12
 
13
if (!defined('PREG_BAD_UTF8_OFFSET_ERROR'))
14
{
15
  define('PREG_BAD_UTF8_OFFSET_ERROR', 5);
16
}
17
 
18
/**
19
 * sfYamlParser parses YAML strings to convert them to PHP arrays.
20
 *
21
 * @package    symfony
22
 * @subpackage yaml
23
 * @author     Fabien Potencier <fabien.potencier@symfony-project.com>
24
 * @version    SVN: $Id: sfYamlParser.class.php 10832 2008-08-13 07:46:08Z fabien $
25
 */
26
class sfYamlParser
27
{
28
  protected
29
    $offset        = 0,
30
    $lines         = array(),
31
    $currentLineNb = -1,
32
    $currentLine   = '',
33
    $refs          = array();
34
 
35
  /**
36
   * Constructor
37
   *
38
   * @param integer $offset The offset of YAML document (used for line numbers in error messages)
39
   */
40
  public function __construct($offset = 0)
41
  {
42
    $this->offset = $offset;
43
  }
44
 
45
  /**
46
   * Parses a YAML string to a PHP value.
47
   *
48
   * @param  string $value A YAML string
49
   *
50
   * @return mixed  A PHP value
51
   *
52
   * @throws InvalidArgumentException If the YAML is not valid
53
   */
54
  public function parse($value)
55
  {
56
    $this->currentLineNb = -1;
57
    $this->currentLine = '';
58
    $this->lines = explode("\n", $this->cleanup($value));
59
 
60
    if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2)
61
    {
62
      $mbEncoding = mb_internal_encoding();
63
      mb_internal_encoding('UTF-8');
64
    }
65
 
66
    $data = array();
67
    while ($this->moveToNextLine())
68
    {
69
      if ($this->isCurrentLineEmpty())
70
      {
71
        continue;
72
      }
73
 
74
      // tab?
75
      if (preg_match('#^\t+#', $this->currentLine))
76
      {
77
        throw new InvalidArgumentException(sprintf('A YAML file cannot contain tabs as indentation at line %d (%s).', $this->getRealCurrentLineNb() + 1, $this->currentLine));
78
      }
79
 
80
      $isRef = $isInPlace = $isProcessed = false;
81
      if (preg_match('#^\-(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values))
82
      {
83
        if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches))
84
        {
85
          $isRef = $matches['ref'];
86
          $values['value'] = $matches['value'];
87
        }
88
 
89
        // array
90
        if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#'))
91
        {
92
          $c = $this->getRealCurrentLineNb() + 1;
93
          $parser = new sfYamlParser($c);
94
          $parser->refs =& $this->refs;
95
          $data[] = $parser->parse($this->getNextEmbedBlock());
96
        }
97
        else
98
        {
99
          if (preg_match('/^([^ ]+)\: +({.*?)$/u', $values['value'], $matches))
100
          {
101
            $data[] = array($matches[1] => sfYamlInline::load($matches[2]));
102
          }
103
          else
104
          {
105
            $data[] = $this->parseValue($values['value']);
106
          }
107
        }
108
      }
109
      else if (preg_match('#^(?P<key>'.sfYamlInline::REGEX_QUOTED_STRING.'|[^ \{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values))
110
      {
111
        $key = sfYamlInline::parseScalar($values['key']);
112
 
113
        if ('<<' === $key)
114
        {
115
          if (isset($values['value']) && '*' === substr($values['value'], 0, 1))
116
          {
117
            $isInPlace = substr($values['value'], 1);
118
            if (!array_key_exists($isInPlace, $this->refs))
119
            {
120
              throw new InvalidArgumentException(sprintf('Reference "%s" does not exist at line %s (%s).', $isInPlace, $this->getRealCurrentLineNb() + 1, $this->currentLine));
121
            }
122
          }
123
          else
124
          {
125
            if (isset($values['value']) && $values['value'] !== '')
126
            {
127
              $value = $values['value'];
128
            }
129
            else
130
            {
131
              $value = $this->getNextEmbedBlock();
132
            }
133
            $c = $this->getRealCurrentLineNb() + 1;
134
            $parser = new sfYamlParser($c);
135
            $parser->refs =& $this->refs;
136
            $parsed = $parser->parse($value);
137
 
138
            $merged = array();
139
            if (!is_array($parsed))
140
            {
141
              throw new InvalidArgumentException(sprintf("YAML merge keys used with a scalar value instead of an array at line %s (%s)", $this->getRealCurrentLineNb() + 1, $this->currentLine));
142
            }
143
            else if (isset($parsed[0]))
144
            {
145
              // Numeric array, merge individual elements
146
              foreach (array_reverse($parsed) as $parsedItem)
147
              {
148
                if (!is_array($parsedItem))
149
                {
150
                  throw new InvalidArgumentException(sprintf("Merge items must be arrays at line %s (%s).", $this->getRealCurrentLineNb() + 1, $parsedItem));
151
                }
152
                $merged = array_merge($parsedItem, $merged);
153
              }
154
            }
155
            else
156
            {
157
              // Associative array, merge
158
              $merged = array_merge($merged, $parsed);
159
            }
160
 
161
            $isProcessed = $merged;
162
          }
163
        }
164
        else if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches))
165
        {
166
          $isRef = $matches['ref'];
167
          $values['value'] = $matches['value'];
168
        }
169
 
170
        if ($isProcessed)
171
        {
172
          // Merge keys
173
          $data = $isProcessed;
174
        }
175
        // hash
176
        else if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#'))
177
        {
178
          // if next line is less indented or equal, then it means that the current value is null
179
          if ($this->isNextLineIndented())
180
          {
181
            $data[$key] = null;
182
          }
183
          else
184
          {
185
            $c = $this->getRealCurrentLineNb() + 1;
186
            $parser = new sfYamlParser($c);
187
            $parser->refs =& $this->refs;
188
            $data[$key] = $parser->parse($this->getNextEmbedBlock());
189
          }
190
        }
191
        else
192
        {
193
          if ($isInPlace)
194
          {
195
            $data = $this->refs[$isInPlace];
196
          }
197
          else
198
          {
199
            $data[$key] = $this->parseValue($values['value']);
200
          }
201
        }
202
      }
203
      else
204
      {
205
        // 1-liner followed by newline
206
        if (2 == count($this->lines) && empty($this->lines[1]))
207
        {
208
          $value = sfYamlInline::load($this->lines[0]);
209
          if (is_array($value))
210
          {
211
            $first = reset($value);
212
            if ('*' === substr($first, 0, 1))
213
            {
214
              $data = array();
215
              foreach ($value as $alias)
216
              {
217
                $data[] = $this->refs[substr($alias, 1)];
218
              }
219
              $value = $data;
220
            }
221
          }
222
 
223
          if (isset($mbEncoding))
224
          {
225
            mb_internal_encoding($mbEncoding);
226
          }
227
 
228
          return $value;
229
        }
230
 
231
        switch (preg_last_error())
232
        {
233
          case PREG_INTERNAL_ERROR:
234
            $error = 'Internal PCRE error on line';
235
            break;
236
          case PREG_BACKTRACK_LIMIT_ERROR:
237
            $error = 'pcre.backtrack_limit reached on line';
238
            break;
239
          case PREG_RECURSION_LIMIT_ERROR:
240
            $error = 'pcre.recursion_limit reached on line';
241
            break;
242
          case PREG_BAD_UTF8_ERROR:
243
            $error = 'Malformed UTF-8 data on line';
244
            break;
245
          case PREG_BAD_UTF8_OFFSET_ERROR:
246
            $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point on line';
247
            break;
248
          default:
249
            $error = 'Unable to parse line';
250
        }
251
 
252
        throw new InvalidArgumentException(sprintf('%s %d (%s).', $error, $this->getRealCurrentLineNb() + 1, $this->currentLine));
253
      }
254
 
255
      if ($isRef)
256
      {
257
        $this->refs[$isRef] = end($data);
258
      }
259
    }
260
 
261
    if (isset($mbEncoding))
262
    {
263
      mb_internal_encoding($mbEncoding);
264
    }
265
 
266
    return empty($data) ? null : $data;
267
  }
268
 
269
  /**
270
   * Returns the current line number (takes the offset into account).
271
   *
272
   * @return integer The current line number
273
   */
274
  protected function getRealCurrentLineNb()
275
  {
276
    return $this->currentLineNb + $this->offset;
277
  }
278
 
279
  /**
280
   * Returns the current line indentation.
281
   *
282
   * @return integer The current line indentation
283
   */
284
  protected function getCurrentLineIndentation()
285
  {
286
    return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
287
  }
288
 
289
  /**
290
   * Returns the next embed block of YAML.
291
   *
292
   * @return string A YAML string
293
   */
294
  protected function getNextEmbedBlock()
295
  {
296
    $this->moveToNextLine();
297
 
298
    $newIndent = $this->getCurrentLineIndentation();
299
 
300
    if (!$this->isCurrentLineEmpty() && 0 == $newIndent)
301
    {
302
      throw new InvalidArgumentException(sprintf('Indentation problem at line %d (%s)', $this->getRealCurrentLineNb() + 1, $this->currentLine));
303
    }
304
 
305
    $data = array(substr($this->currentLine, $newIndent));
306
 
307
    while ($this->moveToNextLine())
308
    {
309
      if ($this->isCurrentLineEmpty())
310
      {
311
        if ($this->isCurrentLineBlank())
312
        {
313
          $data[] = substr($this->currentLine, $newIndent);
314
        }
315
 
316
        continue;
317
      }
318
 
319
      $indent = $this->getCurrentLineIndentation();
320
 
321
      if (preg_match('#^(?P<text> *)$#', $this->currentLine, $match))
322
      {
323
        // empty line
324
        $data[] = $match['text'];
325
      }
326
      else if ($indent >= $newIndent)
327
      {
328
        $data[] = substr($this->currentLine, $newIndent);
329
      }
330
      else if (0 == $indent)
331
      {
332
        $this->moveToPreviousLine();
333
 
334
        break;
335
      }
336
      else
337
      {
338
        throw new InvalidArgumentException(sprintf('Indentation problem at line %d (%s)', $this->getRealCurrentLineNb() + 1, $this->currentLine));
339
      }
340
    }
341
 
342
    return implode("\n", $data);
343
  }
344
 
345
  /**
346
   * Moves the parser to the next line.
347
   */
348
  protected function moveToNextLine()
349
  {
350
    if ($this->currentLineNb >= count($this->lines) - 1)
351
    {
352
      return false;
353
    }
354
 
355
    $this->currentLine = $this->lines[++$this->currentLineNb];
356
 
357
    return true;
358
  }
359
 
360
  /**
361
   * Moves the parser to the previous line.
362
   */
363
  protected function moveToPreviousLine()
364
  {
365
    $this->currentLine = $this->lines[--$this->currentLineNb];
366
  }
367
 
368
  /**
369
   * Parses a YAML value.
370
   *
371
   * @param  string $value A YAML value
372
   *
373
   * @return mixed  A PHP value
374
   */
375
  protected function parseValue($value)
376
  {
377
    if ('*' === substr($value, 0, 1))
378
    {
379
      if (false !== $pos = strpos($value, '#'))
380
      {
381
        $value = substr($value, 1, $pos - 2);
382
      }
383
      else
384
      {
385
        $value = substr($value, 1);
386
      }
387
 
388
      if (!array_key_exists($value, $this->refs))
389
      {
390
        throw new InvalidArgumentException(sprintf('Reference "%s" does not exist (%s).', $value, $this->currentLine));
391
      }
392
      return $this->refs[$value];
393
    }
394
 
395
    if (preg_match('/^(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?$/', $value, $matches))
396
    {
397
      $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
398
 
399
      return $this->parseFoldedScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), intval(abs($modifiers)));
400
    }
401
    else
402
    {
403
      return sfYamlInline::load($value);
404
    }
405
  }
406
 
407
  /**
408
   * Parses a folded scalar.
409
   *
410
   * @param  string  $separator   The separator that was used to begin this folded scalar (| or >)
411
   * @param  string  $indicator   The indicator that was used to begin this folded scalar (+ or -)
412
   * @param  integer $indentation The indentation that was used to begin this folded scalar
413
   *
414
   * @return string  The text value
415
   */
416
  protected function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
417
  {
418
    $separator = '|' == $separator ? "\n" : ' ';
419
    $text = '';
420
 
421
    $notEOF = $this->moveToNextLine();
422
 
423
    while ($notEOF && $this->isCurrentLineBlank())
424
    {
425
      $text .= "\n";
426
 
427
      $notEOF = $this->moveToNextLine();
428
    }
429
 
430
    if (!$notEOF)
431
    {
432
      return '';
433
    }
434
 
435
    if (!preg_match('#^(?P<indent>'.($indentation ? str_repeat(' ', $indentation) : ' +').')(?P<text>.*)$#u', $this->currentLine, $matches))
436
    {
437
      $this->moveToPreviousLine();
438
 
439
      return '';
440
    }
441
 
442
    $textIndent = $matches['indent'];
443
    $previousIndent = 0;
444
 
445
    $text .= $matches['text'].$separator;
446
    while ($this->currentLineNb + 1 < count($this->lines))
447
    {
448
      $this->moveToNextLine();
449
 
450
      if (preg_match('#^(?P<indent> {'.strlen($textIndent).',})(?P<text>.+)$#u', $this->currentLine, $matches))
451
      {
452
        if (' ' == $separator && $previousIndent != $matches['indent'])
453
        {
454
          $text = substr($text, 0, -1)."\n";
455
        }
456
        $previousIndent = $matches['indent'];
457
 
458
        $text .= str_repeat(' ', $diff = strlen($matches['indent']) - strlen($textIndent)).$matches['text'].($diff ? "\n" : $separator);
459
      }
460
      else if (preg_match('#^(?P<text> *)$#', $this->currentLine, $matches))
461
      {
462
        $text .= preg_replace('#^ {1,'.strlen($textIndent).'}#', '', $matches['text'])."\n";
463
      }
464
      else
465
      {
466
        $this->moveToPreviousLine();
467
 
468
        break;
469
      }
470
    }
471
 
472
    if (' ' == $separator)
473
    {
474
      // replace last separator by a newline
475
      $text = preg_replace('/ (\n*)$/', "\n$1", $text);
476
    }
477
 
478
    switch ($indicator)
479
    {
480
      case '':
481
        $text = preg_replace('#\n+$#s', "\n", $text);
482
        break;
483
      case '+':
484
        break;
485
      case '-':
486
        $text = preg_replace('#\n+$#s', '', $text);
487
        break;
488
    }
489
 
490
    return $text;
491
  }
492
 
493
  /**
494
   * Returns true if the next line is indented.
495
   *
496
   * @return Boolean Returns true if the next line is indented, false otherwise
497
   */
498
  protected function isNextLineIndented()
499
  {
500
    $currentIndentation = $this->getCurrentLineIndentation();
501
    $notEOF = $this->moveToNextLine();
502
 
503
    while ($notEOF && $this->isCurrentLineEmpty())
504
    {
505
      $notEOF = $this->moveToNextLine();
506
    }
507
 
508
    if (false === $notEOF)
509
    {
510
      return false;
511
    }
512
 
513
    $ret = false;
514
    if ($this->getCurrentLineIndentation() <= $currentIndentation)
515
    {
516
      $ret = true;
517
    }
518
 
519
    $this->moveToPreviousLine();
520
 
521
    return $ret;
522
  }
523
 
524
  /**
525
   * Returns true if the current line is blank or if it is a comment line.
526
   *
527
   * @return Boolean Returns true if the current line is empty or if it is a comment line, false otherwise
528
   */
529
  protected function isCurrentLineEmpty()
530
  {
531
    return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
532
  }
533
 
534
  /**
535
   * Returns true if the current line is blank.
536
   *
537
   * @return Boolean Returns true if the current line is blank, false otherwise
538
   */
539
  protected function isCurrentLineBlank()
540
  {
541
    return '' == trim($this->currentLine, ' ');
542
  }
543
 
544
  /**
545
   * Returns true if the current line is a comment line.
546
   *
547
   * @return Boolean Returns true if the current line is a comment line, false otherwise
548
   */
549
  protected function isCurrentLineComment()
550
  {
551
    //checking explicitly the first char of the trim is faster than loops or strpos
552
    $ltrimmedLine = ltrim($this->currentLine, ' ');
553
    return $ltrimmedLine[0] === '#';
554
  }
555
 
556
  /**
557
   * Cleanups a YAML string to be parsed.
558
   *
559
   * @param  string $value The input YAML string
560
   *
561
   * @return string A cleaned up YAML string
562
   */
563
  protected function cleanup($value)
564
  {
565
    $value = str_replace(array("\r\n", "\r"), "\n", $value);
566
 
567
    if (!preg_match("#\n$#", $value))
568
    {
569
      $value .= "\n";
570
    }
571
 
572
    // strip YAML header
573
    $count = 0;
574
    $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#su', '', $value, -1, $count);
575
    $this->offset += $count;
576
 
577
    // remove leading comments
578
    $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
579
    if ($count == 1)
580
    {
581
      // items have been removed, update the offset
582
      $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
583
      $value = $trimmedValue;
584
    }
585
 
586
    // remove start of the document marker (---)
587
    $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
588
    if ($count == 1)
589
    {
590
      // items have been removed, update the offset
591
      $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
592
      $value = $trimmedValue;
593
 
594
      // remove end of the document marker (...)
595
      $value = preg_replace('#\.\.\.\s*$#s', '', $value);
596
    }
597
 
598
    return $value;
599
  }
600
}