Subversion-Projekte lars-tiefland.laravel_shop

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
148 lars 1
<?php
2
 
3
namespace Faker\Provider;
4
 
5
abstract class Text extends Base
6
{
7
    protected static $baseText = '';
8
    protected static $separator = ' ';
9
    protected static $separatorLen = 1;
10
    protected $explodedText;
11
    protected $consecutiveWords = [];
12
    protected static $textStartsWithUppercase = true;
13
 
14
    /**
15
     * Generate a text string by the Markov chain algorithm.
16
     *
17
     * Depending on the $maxNbChars, returns a random valid looking text. The algorithm
18
     * generates a weighted table with the specified number of words as the index and the
19
     * possible following words as the value.
20
     *
21
     * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key'
22
     *
23
     * @param int $maxNbChars Maximum number of characters the text should contain (minimum: 10)
24
     * @param int $indexSize  Determines how many words are considered for the generation of the next word.
25
     *                        The minimum is 1, and it produces a higher level of randomness, although the
26
     *                        generated text usually doesn't make sense. Higher index sizes (up to 5)
27
     *                        produce more correct text, at the price of less randomness.
28
     *
29
     * @return string
30
     */
31
    public function realText($maxNbChars = 200, $indexSize = 2)
32
    {
33
        return $this->realTextBetween((int) round($maxNbChars * 0.8), $maxNbChars, $indexSize);
34
    }
35
 
36
    /**
37
     * Generate a text string by the Markov chain algorithm.
38
     *
39
     * Depending on the $maxNbChars, returns a random valid looking text. The algorithm
40
     * generates a weighted table with the specified number of words as the index and the
41
     * possible following words as the value.
42
     *
43
     * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key'
44
     *
45
     * @param int $minNbChars Minimum number of characters the text should contain (maximum: 8)
46
     * @param int $maxNbChars Maximum number of characters the text should contain (minimum: 10)
47
     * @param int $indexSize  Determines how many words are considered for the generation of the next word.
48
     *                        The minimum is 1, and it produces a higher level of randomness, although the
49
     *                        generated text usually doesn't make sense. Higher index sizes (up to 5)
50
     *                        produce more correct text, at the price of less randomness.
51
     *
52
     * @return string
53
     */
54
    public function realTextBetween($minNbChars = 160, $maxNbChars = 200, $indexSize = 2)
55
    {
56
        if ($minNbChars < 1) {
57
            throw new \InvalidArgumentException('minNbChars must be at least 1');
58
        }
59
 
60
        if ($maxNbChars < 10) {
61
            throw new \InvalidArgumentException('maxNbChars must be at least 10');
62
        }
63
 
64
        if ($indexSize < 1) {
65
            throw new \InvalidArgumentException('indexSize must be at least 1');
66
        }
67
 
68
        if ($indexSize > 5) {
69
            throw new \InvalidArgumentException('indexSize must be at most 5');
70
        }
71
 
72
        if ($minNbChars >= $maxNbChars) {
73
            throw new \InvalidArgumentException('minNbChars must be smaller than maxNbChars');
74
        }
75
 
76
        $words = $this->getConsecutiveWords($indexSize);
77
        $iterations = 0;
78
 
79
        do {
80
            ++$iterations;
81
 
82
            if ($iterations >= 100) {
83
                throw new \OverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations));
84
            }
85
 
86
            $result = $this->generateText($maxNbChars, $words);
87
        } while (static::strlen($result) <= $minNbChars);
88
 
89
        return $result;
90
    }
91
 
92
    /**
93
     * @param int   $maxNbChars
94
     * @param array $words
95
     *
96
     * @return string
97
     */
98
    protected function generateText($maxNbChars, $words)
99
    {
100
        $result = [];
101
        $resultLength = 0;
102
        // take a random starting point
103
        $next = static::randomKey($words);
104
 
105
        while ($resultLength < $maxNbChars && isset($words[$next])) {
106
            // fetch a random word to append
107
            $word = static::randomElement($words[$next]);
108
 
109
            // calculate next index
110
            $currentWords = static::explode($next);
111
            $currentWords[] = $word;
112
            array_shift($currentWords);
113
            $next = static::implode($currentWords);
114
 
115
            // ensure text starts with an uppercase letter
116
            if ($resultLength == 0 && !static::validStart($word)) {
117
                continue;
118
            }
119
 
120
            // append the element
121
            $result[] = $word;
122
            $resultLength += static::strlen($word) + static::$separatorLen;
123
        }
124
 
125
        // remove the element that caused the text to overflow
126
        array_pop($result);
127
 
128
        // build result
129
        $result = static::implode($result);
130
 
131
        return static::appendEnd($result);
132
    }
133
 
134
    protected function getConsecutiveWords($indexSize)
135
    {
136
        if (!isset($this->consecutiveWords[$indexSize])) {
137
            $parts = $this->getExplodedText();
138
            $words = [];
139
            $index = [];
140
 
141
            for ($i = 0; $i < $indexSize; ++$i) {
142
                $index[] = array_shift($parts);
143
            }
144
 
145
            for ($i = 0, $count = count($parts); $i < $count; ++$i) {
146
                $stringIndex = static::implode($index);
147
 
148
                if (!isset($words[$stringIndex])) {
149
                    $words[$stringIndex] = [];
150
                }
151
                $word = $parts[$i];
152
                $words[$stringIndex][] = $word;
153
                array_shift($index);
154
                $index[] = $word;
155
            }
156
            // cache look up words for performance
157
            $this->consecutiveWords[$indexSize] = $words;
158
        }
159
 
160
        return $this->consecutiveWords[$indexSize];
161
    }
162
 
163
    protected function getExplodedText()
164
    {
165
        if ($this->explodedText === null) {
166
            $this->explodedText = static::explode(preg_replace('/\s+/u', ' ', static::$baseText));
167
        }
168
 
169
        return $this->explodedText;
170
    }
171
 
172
    protected static function explode($text)
173
    {
174
        return explode(static::$separator, $text);
175
    }
176
 
177
    protected static function implode($words)
178
    {
179
        return implode(static::$separator, $words);
180
    }
181
 
182
    protected static function strlen($text)
183
    {
184
        return function_exists('mb_strlen') ? mb_strlen($text, 'UTF-8') : strlen($text);
185
    }
186
 
187
    protected static function validStart($word)
188
    {
189
        $isValid = true;
190
 
191
        if (static::$textStartsWithUppercase) {
192
            $isValid = preg_match('/^\p{Lu}/u', $word);
193
        }
194
 
195
        return $isValid;
196
    }
197
 
198
    protected static function appendEnd($text)
199
    {
200
        return preg_replace("/([ ,-:;\x{2013}\x{2014}]+$)/us", '', $text) . '.';
201
    }
202
}