| 148 |
lars |
1 |
<?php declare(strict_types=1);
|
|
|
2 |
/*
|
|
|
3 |
* This file is part of sebastian/diff.
|
|
|
4 |
*
|
|
|
5 |
* (c) Sebastian Bergmann <sebastian@phpunit.de>
|
|
|
6 |
*
|
|
|
7 |
* For the full copyright and license information, please view the LICENSE
|
|
|
8 |
* file that was distributed with this source code.
|
|
|
9 |
*/
|
|
|
10 |
namespace SebastianBergmann\Diff;
|
|
|
11 |
|
|
|
12 |
use const PHP_INT_SIZE;
|
|
|
13 |
use const PREG_SPLIT_DELIM_CAPTURE;
|
|
|
14 |
use const PREG_SPLIT_NO_EMPTY;
|
|
|
15 |
use function array_shift;
|
|
|
16 |
use function array_unshift;
|
|
|
17 |
use function array_values;
|
|
|
18 |
use function count;
|
|
|
19 |
use function current;
|
|
|
20 |
use function end;
|
|
|
21 |
use function get_class;
|
|
|
22 |
use function gettype;
|
|
|
23 |
use function is_array;
|
|
|
24 |
use function is_object;
|
|
|
25 |
use function is_string;
|
|
|
26 |
use function key;
|
|
|
27 |
use function min;
|
|
|
28 |
use function preg_split;
|
|
|
29 |
use function prev;
|
|
|
30 |
use function reset;
|
|
|
31 |
use function sprintf;
|
|
|
32 |
use function substr;
|
|
|
33 |
use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
|
|
|
34 |
use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder;
|
|
|
35 |
|
|
|
36 |
final class Differ
|
|
|
37 |
{
|
|
|
38 |
public const OLD = 0;
|
|
|
39 |
|
|
|
40 |
public const ADDED = 1;
|
|
|
41 |
|
|
|
42 |
public const REMOVED = 2;
|
|
|
43 |
|
|
|
44 |
public const DIFF_LINE_END_WARNING = 3;
|
|
|
45 |
|
|
|
46 |
public const NO_LINE_END_EOF_WARNING = 4;
|
|
|
47 |
|
|
|
48 |
/**
|
|
|
49 |
* @var DiffOutputBuilderInterface
|
|
|
50 |
*/
|
|
|
51 |
private $outputBuilder;
|
|
|
52 |
|
|
|
53 |
/**
|
|
|
54 |
* @param DiffOutputBuilderInterface $outputBuilder
|
|
|
55 |
*
|
|
|
56 |
* @throws InvalidArgumentException
|
|
|
57 |
*/
|
|
|
58 |
public function __construct($outputBuilder = null)
|
|
|
59 |
{
|
|
|
60 |
if ($outputBuilder instanceof DiffOutputBuilderInterface) {
|
|
|
61 |
$this->outputBuilder = $outputBuilder;
|
|
|
62 |
} elseif (null === $outputBuilder) {
|
|
|
63 |
$this->outputBuilder = new UnifiedDiffOutputBuilder;
|
|
|
64 |
} elseif (is_string($outputBuilder)) {
|
|
|
65 |
// PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support
|
|
|
66 |
// @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056
|
|
|
67 |
// @deprecated
|
|
|
68 |
$this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder);
|
|
|
69 |
} else {
|
|
|
70 |
throw new InvalidArgumentException(
|
|
|
71 |
sprintf(
|
|
|
72 |
'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.',
|
|
|
73 |
is_object($outputBuilder) ? 'instance of "' . get_class($outputBuilder) . '"' : gettype($outputBuilder) . ' "' . $outputBuilder . '"'
|
|
|
74 |
)
|
|
|
75 |
);
|
|
|
76 |
}
|
|
|
77 |
}
|
|
|
78 |
|
|
|
79 |
/**
|
|
|
80 |
* Returns the diff between two arrays or strings as string.
|
|
|
81 |
*
|
|
|
82 |
* @param array|string $from
|
|
|
83 |
* @param array|string $to
|
|
|
84 |
*/
|
|
|
85 |
public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string
|
|
|
86 |
{
|
|
|
87 |
$diff = $this->diffToArray(
|
|
|
88 |
$this->normalizeDiffInput($from),
|
|
|
89 |
$this->normalizeDiffInput($to),
|
|
|
90 |
$lcs
|
|
|
91 |
);
|
|
|
92 |
|
|
|
93 |
return $this->outputBuilder->getDiff($diff);
|
|
|
94 |
}
|
|
|
95 |
|
|
|
96 |
/**
|
|
|
97 |
* Returns the diff between two arrays or strings as array.
|
|
|
98 |
*
|
|
|
99 |
* Each array element contains two elements:
|
|
|
100 |
* - [0] => mixed $token
|
|
|
101 |
* - [1] => 2|1|0
|
|
|
102 |
*
|
|
|
103 |
* - 2: REMOVED: $token was removed from $from
|
|
|
104 |
* - 1: ADDED: $token was added to $from
|
|
|
105 |
* - 0: OLD: $token is not changed in $to
|
|
|
106 |
*
|
|
|
107 |
* @param array|string $from
|
|
|
108 |
* @param array|string $to
|
|
|
109 |
* @param LongestCommonSubsequenceCalculator $lcs
|
|
|
110 |
*/
|
|
|
111 |
public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array
|
|
|
112 |
{
|
|
|
113 |
if (is_string($from)) {
|
|
|
114 |
$from = $this->splitStringByLines($from);
|
|
|
115 |
} elseif (!is_array($from)) {
|
|
|
116 |
throw new InvalidArgumentException('"from" must be an array or string.');
|
|
|
117 |
}
|
|
|
118 |
|
|
|
119 |
if (is_string($to)) {
|
|
|
120 |
$to = $this->splitStringByLines($to);
|
|
|
121 |
} elseif (!is_array($to)) {
|
|
|
122 |
throw new InvalidArgumentException('"to" must be an array or string.');
|
|
|
123 |
}
|
|
|
124 |
|
|
|
125 |
[$from, $to, $start, $end] = self::getArrayDiffParted($from, $to);
|
|
|
126 |
|
|
|
127 |
if ($lcs === null) {
|
|
|
128 |
$lcs = $this->selectLcsImplementation($from, $to);
|
|
|
129 |
}
|
|
|
130 |
|
|
|
131 |
$common = $lcs->calculate(array_values($from), array_values($to));
|
|
|
132 |
$diff = [];
|
|
|
133 |
|
|
|
134 |
foreach ($start as $token) {
|
|
|
135 |
$diff[] = [$token, self::OLD];
|
|
|
136 |
}
|
|
|
137 |
|
|
|
138 |
reset($from);
|
|
|
139 |
reset($to);
|
|
|
140 |
|
|
|
141 |
foreach ($common as $token) {
|
|
|
142 |
while (($fromToken = reset($from)) !== $token) {
|
|
|
143 |
$diff[] = [array_shift($from), self::REMOVED];
|
|
|
144 |
}
|
|
|
145 |
|
|
|
146 |
while (($toToken = reset($to)) !== $token) {
|
|
|
147 |
$diff[] = [array_shift($to), self::ADDED];
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
$diff[] = [$token, self::OLD];
|
|
|
151 |
|
|
|
152 |
array_shift($from);
|
|
|
153 |
array_shift($to);
|
|
|
154 |
}
|
|
|
155 |
|
|
|
156 |
while (($token = array_shift($from)) !== null) {
|
|
|
157 |
$diff[] = [$token, self::REMOVED];
|
|
|
158 |
}
|
|
|
159 |
|
|
|
160 |
while (($token = array_shift($to)) !== null) {
|
|
|
161 |
$diff[] = [$token, self::ADDED];
|
|
|
162 |
}
|
|
|
163 |
|
|
|
164 |
foreach ($end as $token) {
|
|
|
165 |
$diff[] = [$token, self::OLD];
|
|
|
166 |
}
|
|
|
167 |
|
|
|
168 |
if ($this->detectUnmatchedLineEndings($diff)) {
|
|
|
169 |
array_unshift($diff, ["#Warning: Strings contain different line endings!\n", self::DIFF_LINE_END_WARNING]);
|
|
|
170 |
}
|
|
|
171 |
|
|
|
172 |
return $diff;
|
|
|
173 |
}
|
|
|
174 |
|
|
|
175 |
/**
|
|
|
176 |
* Casts variable to string if it is not a string or array.
|
|
|
177 |
*
|
|
|
178 |
* @return array|string
|
|
|
179 |
*/
|
|
|
180 |
private function normalizeDiffInput($input)
|
|
|
181 |
{
|
|
|
182 |
if (!is_array($input) && !is_string($input)) {
|
|
|
183 |
return (string) $input;
|
|
|
184 |
}
|
|
|
185 |
|
|
|
186 |
return $input;
|
|
|
187 |
}
|
|
|
188 |
|
|
|
189 |
/**
|
|
|
190 |
* Checks if input is string, if so it will split it line-by-line.
|
|
|
191 |
*/
|
|
|
192 |
private function splitStringByLines(string $input): array
|
|
|
193 |
{
|
|
|
194 |
return preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
|
|
|
195 |
}
|
|
|
196 |
|
|
|
197 |
private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
|
|
|
198 |
{
|
|
|
199 |
// We do not want to use the time-efficient implementation if its memory
|
|
|
200 |
// footprint will probably exceed this value. Note that the footprint
|
|
|
201 |
// calculation is only an estimation for the matrix and the LCS method
|
|
|
202 |
// will typically allocate a bit more memory than this.
|
|
|
203 |
$memoryLimit = 100 * 1024 * 1024;
|
|
|
204 |
|
|
|
205 |
if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
|
|
|
206 |
return new MemoryEfficientLongestCommonSubsequenceCalculator;
|
|
|
207 |
}
|
|
|
208 |
|
|
|
209 |
return new TimeEfficientLongestCommonSubsequenceCalculator;
|
|
|
210 |
}
|
|
|
211 |
|
|
|
212 |
/**
|
|
|
213 |
* Calculates the estimated memory footprint for the DP-based method.
|
|
|
214 |
*
|
|
|
215 |
* @return float|int
|
|
|
216 |
*/
|
|
|
217 |
private function calculateEstimatedFootprint(array $from, array $to)
|
|
|
218 |
{
|
|
|
219 |
$itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
|
|
|
220 |
|
|
|
221 |
return $itemSize * min(count($from), count($to)) ** 2;
|
|
|
222 |
}
|
|
|
223 |
|
|
|
224 |
/**
|
|
|
225 |
* Returns true if line ends don't match in a diff.
|
|
|
226 |
*/
|
|
|
227 |
private function detectUnmatchedLineEndings(array $diff): bool
|
|
|
228 |
{
|
|
|
229 |
$newLineBreaks = ['' => true];
|
|
|
230 |
$oldLineBreaks = ['' => true];
|
|
|
231 |
|
|
|
232 |
foreach ($diff as $entry) {
|
|
|
233 |
if (self::OLD === $entry[1]) {
|
|
|
234 |
$ln = $this->getLinebreak($entry[0]);
|
|
|
235 |
$oldLineBreaks[$ln] = true;
|
|
|
236 |
$newLineBreaks[$ln] = true;
|
|
|
237 |
} elseif (self::ADDED === $entry[1]) {
|
|
|
238 |
$newLineBreaks[$this->getLinebreak($entry[0])] = true;
|
|
|
239 |
} elseif (self::REMOVED === $entry[1]) {
|
|
|
240 |
$oldLineBreaks[$this->getLinebreak($entry[0])] = true;
|
|
|
241 |
}
|
|
|
242 |
}
|
|
|
243 |
|
|
|
244 |
// if either input or output is a single line without breaks than no warning should be raised
|
|
|
245 |
if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
|
|
|
246 |
return false;
|
|
|
247 |
}
|
|
|
248 |
|
|
|
249 |
// two way compare
|
|
|
250 |
foreach ($newLineBreaks as $break => $set) {
|
|
|
251 |
if (!isset($oldLineBreaks[$break])) {
|
|
|
252 |
return true;
|
|
|
253 |
}
|
|
|
254 |
}
|
|
|
255 |
|
|
|
256 |
foreach ($oldLineBreaks as $break => $set) {
|
|
|
257 |
if (!isset($newLineBreaks[$break])) {
|
|
|
258 |
return true;
|
|
|
259 |
}
|
|
|
260 |
}
|
|
|
261 |
|
|
|
262 |
return false;
|
|
|
263 |
}
|
|
|
264 |
|
|
|
265 |
private function getLinebreak($line): string
|
|
|
266 |
{
|
|
|
267 |
if (!is_string($line)) {
|
|
|
268 |
return '';
|
|
|
269 |
}
|
|
|
270 |
|
|
|
271 |
$lc = substr($line, -1);
|
|
|
272 |
|
|
|
273 |
if ("\r" === $lc) {
|
|
|
274 |
return "\r";
|
|
|
275 |
}
|
|
|
276 |
|
|
|
277 |
if ("\n" !== $lc) {
|
|
|
278 |
return '';
|
|
|
279 |
}
|
|
|
280 |
|
|
|
281 |
if ("\r\n" === substr($line, -2)) {
|
|
|
282 |
return "\r\n";
|
|
|
283 |
}
|
|
|
284 |
|
|
|
285 |
return "\n";
|
|
|
286 |
}
|
|
|
287 |
|
|
|
288 |
private static function getArrayDiffParted(array &$from, array &$to): array
|
|
|
289 |
{
|
|
|
290 |
$start = [];
|
|
|
291 |
$end = [];
|
|
|
292 |
|
|
|
293 |
reset($to);
|
|
|
294 |
|
|
|
295 |
foreach ($from as $k => $v) {
|
|
|
296 |
$toK = key($to);
|
|
|
297 |
|
|
|
298 |
if ($toK === $k && $v === $to[$k]) {
|
|
|
299 |
$start[$k] = $v;
|
|
|
300 |
|
|
|
301 |
unset($from[$k], $to[$k]);
|
|
|
302 |
} else {
|
|
|
303 |
break;
|
|
|
304 |
}
|
|
|
305 |
}
|
|
|
306 |
|
|
|
307 |
end($from);
|
|
|
308 |
end($to);
|
|
|
309 |
|
|
|
310 |
do {
|
|
|
311 |
$fromK = key($from);
|
|
|
312 |
$toK = key($to);
|
|
|
313 |
|
|
|
314 |
if (null === $fromK || null === $toK || current($from) !== current($to)) {
|
|
|
315 |
break;
|
|
|
316 |
}
|
|
|
317 |
|
|
|
318 |
prev($from);
|
|
|
319 |
prev($to);
|
|
|
320 |
|
|
|
321 |
$end = [$fromK => $from[$fromK]] + $end;
|
|
|
322 |
unset($from[$fromK], $to[$toK]);
|
|
|
323 |
} while (true);
|
|
|
324 |
|
|
|
325 |
return [$from, $to, $start, $end];
|
|
|
326 |
}
|
|
|
327 |
}
|