Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
//
3
//  FPDI - Version 1.4.2
4
//
5
//    Copyright 2004-2011 Setasign - Jan Slabon
6
//
7
//  Licensed under the Apache License, Version 2.0 (the "License");
8
//  you may not use this file except in compliance with the License.
9
//  You may obtain a copy of the License at
10
//
11
//      http://www.apache.org/licenses/LICENSE-2.0
12
//
13
//  Unless required by applicable law or agreed to in writing, software
14
//  distributed under the License is distributed on an "AS IS" BASIS,
15
//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
//  See the License for the specific language governing permissions and
17
//  limitations under the License.
18
//
19
 
20
if (!defined ('PDF_TYPE_NULL'))
21
    define ('PDF_TYPE_NULL', 0);
22
if (!defined ('PDF_TYPE_NUMERIC'))
23
    define ('PDF_TYPE_NUMERIC', 1);
24
if (!defined ('PDF_TYPE_TOKEN'))
25
    define ('PDF_TYPE_TOKEN', 2);
26
if (!defined ('PDF_TYPE_HEX'))
27
    define ('PDF_TYPE_HEX', 3);
28
if (!defined ('PDF_TYPE_STRING'))
29
    define ('PDF_TYPE_STRING', 4);
30
if (!defined ('PDF_TYPE_DICTIONARY'))
31
    define ('PDF_TYPE_DICTIONARY', 5);
32
if (!defined ('PDF_TYPE_ARRAY'))
33
    define ('PDF_TYPE_ARRAY', 6);
34
if (!defined ('PDF_TYPE_OBJDEC'))
35
    define ('PDF_TYPE_OBJDEC', 7);
36
if (!defined ('PDF_TYPE_OBJREF'))
37
    define ('PDF_TYPE_OBJREF', 8);
38
if (!defined ('PDF_TYPE_OBJECT'))
39
    define ('PDF_TYPE_OBJECT', 9);
40
if (!defined ('PDF_TYPE_STREAM'))
41
    define ('PDF_TYPE_STREAM', 10);
42
if (!defined ('PDF_TYPE_BOOLEAN'))
43
    define ('PDF_TYPE_BOOLEAN', 11);
44
if (!defined ('PDF_TYPE_REAL'))
45
    define ('PDF_TYPE_REAL', 12);
46
 
47
require_once('pdf_context.php');
48
 
49
if (!class_exists('pdf_parser', false)) {
50
 
51
    class pdf_parser {
52
 
53
    	/**
54
         * Filename
55
         * @var string
56
         */
57
        var $filename;
58
 
59
        /**
60
         * File resource
61
         * @var resource
62
         */
63
        var $f;
64
 
65
        /**
66
         * PDF Context
67
         * @var object pdf_context-Instance
68
         */
69
        var $c;
70
 
71
        /**
72
         * xref-Data
73
         * @var array
74
         */
75
        var $xref;
76
 
77
        /**
78
         * root-Object
79
         * @var array
80
         */
81
        var $root;
82
 
83
        /**
84
         * PDF version of the loaded document
85
         * @var string
86
         */
87
        var $pdfVersion;
88
 
89
        /**
90
	     * For reading encrypted documents and xref/objectstreams are in use
91
	     *
92
	     * @var boolean
93
	     */
94
	    var $readPlain = true;
95
 
96
        /**
97
         * Constructor
98
         *
99
         * @param string $filename  Source-Filename
100
         */
101
    	function pdf_parser($filename) {
102
            $this->filename = $filename;
103
 
104
            $this->f = @fopen($this->filename, 'rb');
105
 
106
            if (!$this->f)
107
                $this->error(sprintf('Cannot open %s !', $filename));
108
 
109
            $this->getPDFVersion();
110
 
111
            $this->c = new pdf_context($this->f);
112
 
113
            // Read xref-Data
114
            $this->xref = array();
115
            $this->pdf_read_xref($this->xref, $this->pdf_find_xref());
116
 
117
            // Check for Encryption
118
            $this->getEncryption();
119
 
120
            // Read root
121
            $this->pdf_read_root();
122
        }
123
 
124
        /**
125
         * Close the opened file
126
         */
127
        function closeFile() {
128
        	if (isset($this->f) && is_resource($this->f)) {
129
        	    fclose($this->f);
130
        		unset($this->f);
131
        	}
132
        }
133
 
134
        /**
135
         * Print Error and die
136
         *
137
         * @param string $msg  Error-Message
138
         */
139
        function error($msg) {
140
        	die('<b>PDF-Parser Error:</b> ' . $msg);
141
        }
142
 
143
        /**
144
         * Check Trailer for Encryption
145
         */
146
        function getEncryption() {
147
            if (isset($this->xref['trailer'][1]['/Encrypt'])) {
148
            	$this->error('File is encrypted!');
149
            }
150
        }
151
 
152
    	/**
153
         * Find/Return /Root
154
         *
155
         * @return array
156
         */
157
        function pdf_find_root() {
158
            if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) {
159
                $this->error('Wrong Type of Root-Element! Must be an indirect reference');
160
            }
161
 
162
            return $this->xref['trailer'][1]['/Root'];
163
        }
164
 
165
        /**
166
         * Read the /Root
167
         */
168
        function pdf_read_root() {
169
            // read root
170
            $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root());
171
        }
172
 
173
        /**
174
         * Get PDF-Version
175
         *
176
         * And reset the PDF Version used in FPDI if needed
177
         */
178
        function getPDFVersion() {
179
            fseek($this->f, 0);
180
            preg_match('/\d\.\d/',fread($this->f, 16), $m);
181
            if (isset($m[0]))
182
                $this->pdfVersion = $m[0];
183
            return $this->pdfVersion;
184
        }
185
 
186
        /**
187
         * Find the xref-Table
188
         */
189
        function pdf_find_xref() {
190
           	$toRead = 1500;
191
 
192
            $stat = fseek ($this->f, -$toRead, SEEK_END);
193
            if ($stat === -1) {
194
                fseek ($this->f, 0);
195
            }
196
           	$data = fread($this->f, $toRead);
197
 
198
            $pos = strlen($data) - strpos(strrev($data), strrev('startxref'));
199
            $data = substr($data, $pos);
200
 
201
            if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
202
                $this->error('Unable to find pointer to xref table');
203
        	}
204
 
205
        	return (int) $matches[1];
206
        }
207
 
208
        /**
209
         * Read xref-table
210
         *
211
         * @param array $result Array of xref-table
212
         * @param integer $offset of xref-table
213
         */
214
        function pdf_read_xref(&$result, $offset) {
215
            $o_pos = $offset-min(20, $offset);
216
        	fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs
217
 
218
            $data = fread($this->f, 100);
219
 
220
            $xrefPos = strrpos($data, 'xref');
221
 
222
            if ($xrefPos === false) {
223
                fseek($this->f, $offset);
224
                $c = new pdf_context($this->f);
225
                $xrefStreamObjDec = $this->pdf_read_value($c);
226
 
227
                if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) {
228
                    $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename));
229
                } else {
230
                    $this->error('Unable to find xref table.');
231
                }
232
            }
233
 
234
            if (!isset($result['xref_location'])) {
235
                $result['xref_location'] = $o_pos + $xrefPos;
236
                $result['max_object'] = 0;
237
        	}
238
 
239
        	$cylces = -1;
240
            $bytesPerCycle = 100;
241
 
242
        	fseek($this->f, $o_pos = $o_pos + $xrefPos + 4); // set the handle directly after the "xref"-keyword
243
            $data = fread($this->f, $bytesPerCycle);
244
 
245
            while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cylces++, 0))) === false && !feof($this->f)) {
246
                $data .= fread($this->f, $bytesPerCycle);
247
            }
248
 
249
            if ($trailerPos === false) {
250
                $this->error('Trailer keyword not found after xref table');
251
            }
252
 
253
            $data = substr($data, 0, $trailerPos);
254
 
255
            // get Line-Ending
256
            preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks
257
 
258
            $differentLineEndings = count(array_unique($m[0]));
259
            if ($differentLineEndings > 1) {
260
                $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
261
            } else {
262
                $lines = explode($m[0][1], $data);
263
            }
264
 
265
            $data = $differentLineEndings = $m = null;
266
            unset($data, $differentLineEndings, $m);
267
 
268
            $linesCount = count($lines);
269
 
270
            $start = 1;
271
 
272
            for ($i = 0; $i < $linesCount; $i++) {
273
                $line = trim($lines[$i]);
274
                if ($line) {
275
                    $pieces = explode(' ', $line);
276
                    $c = count($pieces);
277
                    switch($c) {
278
                        case 2:
279
                            $start = (int)$pieces[0];
280
                            $end   = $start + (int)$pieces[1];
281
                            if ($end > $result['max_object'])
282
                                $result['max_object'] = $end;
283
                            break;
284
                        case 3:
285
                            if (!isset($result['xref'][$start]))
286
                                $result['xref'][$start] = array();
287
 
288
                            if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
289
                    	        $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
290
                    	    }
291
                            $start++;
292
                            break;
293
                        default:
294
                            $this->error('Unexpected data in xref table');
295
                    }
296
                }
297
            }
298
 
299
            $lines = $pieces = $line = $start = $end = $gen = null;
300
            unset($lines, $pieces, $line, $start, $end, $gen);
301
 
302
            fseek($this->f, $o_pos + $trailerPos + 7);
303
 
304
            $c = new pdf_context($this->f);
305
    	    $trailer = $this->pdf_read_value($c);
306
 
307
    	    $c = null;
308
    	    unset($c);
309
 
310
    	    if (!isset($result['trailer'])) {
311
                $result['trailer'] = $trailer;
312
    	    }
313
 
314
    	    if (isset($trailer[1]['/Prev'])) {
315
    	        $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]);
316
    	    }
317
 
318
    	    $trailer = null;
319
    	    unset($trailer);
320
 
321
            return true;
322
        }
323
 
324
        /**
325
         * Reads an Value
326
         *
327
         * @param object $c pdf_context
328
         * @param string $token a Token
329
         * @return mixed
330
         */
331
        function pdf_read_value(&$c, $token = null) {
332
        	if (is_null($token)) {
333
        	    $token = $this->pdf_read_token($c);
334
        	}
335
 
336
            if ($token === false) {
337
        	    return false;
338
        	}
339
 
340
        	switch ($token) {
341
                case	'<':
342
        			// This is a hex string.
343
        			// Read the value, then the terminator
344
 
345
                    $pos = $c->offset;
346
 
347
        			while(1) {
348
 
349
                        $match = strpos ($c->buffer, '>', $pos);
350
 
351
        				// If you can't find it, try
352
        				// reading more data from the stream
353
 
354
        				if ($match === false) {
355
        					if (!$c->increase_length()) {
356
        						return false;
357
        					} else {
358
                            	continue;
359
                        	}
360
        				}
361
 
362
        				$result = substr ($c->buffer, $c->offset, $match - $c->offset);
363
        				$c->offset = $match + 1;
364
 
365
        				return array (PDF_TYPE_HEX, $result);
366
                    }
367
 
368
                    break;
369
        		case	'<<':
370
        			// This is a dictionary.
371
 
372
        			$result = array();
373
 
374
        			// Recurse into this function until we reach
375
        			// the end of the dictionary.
376
        			while (($key = $this->pdf_read_token($c)) !== '>>') {
377
        				if ($key === false) {
378
        					return false;
379
        				}
380
 
381
        				if (($value =   $this->pdf_read_value($c)) === false) {
382
        					return false;
383
        				}
384
 
385
        				// Catch missing value
386
        				if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') {
387
        				    $result[$key] = array(PDF_TYPE_NULL);
388
        				    break;
389
        				}
390
 
391
        				$result[$key] = $value;
392
        			}
393
 
394
        			return array (PDF_TYPE_DICTIONARY, $result);
395
 
396
        		case	'[':
397
        			// This is an array.
398
 
399
        			$result = array();
400
 
401
        			// Recurse into this function until we reach
402
        			// the end of the array.
403
        			while (($token = $this->pdf_read_token($c)) !== ']') {
404
                        if ($token === false) {
405
        					return false;
406
        				}
407
 
408
        				if (($value = $this->pdf_read_value($c, $token)) === false) {
409
                            return false;
410
        				}
411
 
412
        				$result[] = $value;
413
        			}
414
 
415
                    return array (PDF_TYPE_ARRAY, $result);
416
 
417
        		case	'('		:
418
                    // This is a string
419
                    $pos = $c->offset;
420
 
421
                    $openBrackets = 1;
422
        			do {
423
                        for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
424
                            switch (ord($c->buffer[$pos])) {
425
                                case 0x28: // '('
426
                                    $openBrackets++;
427
                                    break;
428
                                case 0x29: // ')'
429
                                    $openBrackets--;
430
                                    break;
431
                                case 0x5C: // backslash
432
                                    $pos++;
433
                            }
434
                        }
435
        			} while($openBrackets != 0 && $c->increase_length());
436
 
437
        			$result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
438
        			$c->offset = $pos;
439
 
440
        			return array (PDF_TYPE_STRING, $result);
441
 
442
                case 'stream':
443
                	$o_pos = ftell($c->file)-strlen($c->buffer);
444
    		        $o_offset = $c->offset;
445
 
446
    		        $c->reset($startpos = $o_pos + $o_offset);
447
 
448
    		        $e = 0; // ensure line breaks in front of the stream
449
    		        if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
450
    		        	$e++;
451
    		        if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
452
    		        	$e++;
453
 
454
    		        if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) {
455
    		        	$tmp_c = new pdf_context($this->f);
456
    		        	$tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']);
457
    		        	$length = $tmp_length[1][1];
458
    		        } else {
459
    		        	$length = $this->actual_obj[1][1]['/Length'][1];
460
    		        }
461
 
462
    		        if ($length > 0) {
463
        		        $c->reset($startpos + $e,$length);
464
        		        $v = $c->buffer;
465
    		        } else {
466
    		            $v = '';
467
    		        }
468
    		        $c->reset($startpos + $e + $length + 9); // 9 = strlen("endstream")
469
 
470
    		        return array(PDF_TYPE_STREAM, $v);
471
 
472
    	        default	:
473
                	if (is_numeric ($token)) {
474
                        // A numeric token. Make sure that
475
        				// it is not part of something else.
476
        				if (($tok2 = $this->pdf_read_token ($c)) !== false) {
477
                            if (is_numeric ($tok2)) {
478
 
479
        						// Two numeric tokens in a row.
480
        						// In this case, we're probably in
481
        						// front of either an object reference
482
        						// or an object specification.
483
        						// Determine the case and return the data
484
        						if (($tok3 = $this->pdf_read_token ($c)) !== false) {
485
                                    switch ($tok3) {
486
        								case 'obj':
487
                                            return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2);
488
        								case 'R':
489