Subversion-Projekte lars-tiefland.ci

Revision

Revision 2242 | Revision 2257 | Zur aktuellen Revision | Details | Vergleich mit vorheriger | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
68 lars 1
<?php
2
/**
3
 * CodeIgniter
4
 *
5
 * An open source application development framework for PHP
6
 *
7
 * This content is released under the MIT License (MIT)
8
 *
2254 lars 9
 * Copyright (c) 2014 - 2017, British Columbia Institute of Technology
68 lars 10
 *
11
 * Permission is hereby granted, free of charge, to any person obtaining a copy
12
 * of this software and associated documentation files (the "Software"), to deal
13
 * in the Software without restriction, including without limitation the rights
14
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
 * copies of the Software, and to permit persons to whom the Software is
16
 * furnished to do so, subject to the following conditions:
17
 *
18
 * The above copyright notice and this permission notice shall be included in
19
 * all copies or substantial portions of the Software.
20
 *
21
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
 * THE SOFTWARE.
28
 *
29
 * @package	CodeIgniter
30
 * @author	EllisLab Dev Team
31
 * @copyright	Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
2254 lars 32
 * @copyright	Copyright (c) 2014 - 2017, British Columbia Institute of Technology (http://bcit.ca/)
68 lars 33
 * @license	http://opensource.org/licenses/MIT	MIT License
34
 * @link	https://codeigniter.com
35
 * @since	Version 1.0.0
36
 * @filesource
37
 */
38
defined('BASEPATH') OR exit('No direct script access allowed');
39
 
40
/**
41
 * Security Class
42
 *
43
 * @package		CodeIgniter
44
 * @subpackage	Libraries
45
 * @category	Security
46
 * @author		EllisLab Dev Team
47
 * @link		https://codeigniter.com/user_guide/libraries/security.html
48
 */
49
class CI_Security {
50
 
51
	/**
52
	 * List of sanitize filename strings
53
	 *
54
	 * @var	array
55
	 */
56
	public $filename_bad_chars =	array(
57
		'../', '<!--', '-->', '<', '>',
58
		"'", '"', '&', '$', '#',
59
		'{', '}', '[', ']', '=',
60
		';', '?', '%20', '%22',
61
		'%3c',		// <
62
		'%253c',	// <
63
		'%3e',		// >
64
		'%0e',		// >
65
		'%28',		// (
66
		'%29',		// )
67
		'%2528',	// (
68
		'%26',		// &
69
		'%24',		// $
70
		'%3f',		// ?
71
		'%3b',		// ;
72
		'%3d'		// =
73
	);
74
 
75
	/**
76
	 * Character set
77
	 *
78
	 * Will be overridden by the constructor.
79
	 *
80
	 * @var	string
81
	 */
82
	public $charset = 'UTF-8';
83
 
84
	/**
85
	 * XSS Hash
86
	 *
87
	 * Random Hash for protecting URLs.
88
	 *
89
	 * @var	string
90
	 */
91
	protected $_xss_hash;
92
 
93
	/**
94
	 * CSRF Hash
95
	 *
96
	 * Random hash for Cross Site Request Forgery protection cookie
97
	 *
98
	 * @var	string
99
	 */
100
	protected $_csrf_hash;
101
 
102
	/**
103
	 * CSRF Expire time
104
	 *
105
	 * Expiration time for Cross Site Request Forgery protection cookie.
106
	 * Defaults to two hours (in seconds).
107
	 *
108
	 * @var	int
109
	 */
110
	protected $_csrf_expire =	7200;
111
 
112
	/**
113
	 * CSRF Token name
114
	 *
115
	 * Token name for Cross Site Request Forgery protection cookie.
116
	 *
117
	 * @var	string
118
	 */
119
	protected $_csrf_token_name =	'ci_csrf_token';
120
 
121
	/**
122
	 * CSRF Cookie name
123
	 *
124
	 * Cookie name for Cross Site Request Forgery protection cookie.
125
	 *
126
	 * @var	string
127
	 */
128
	protected $_csrf_cookie_name =	'ci_csrf_token';
129
 
130
	/**
131
	 * List of never allowed strings
132
	 *
133
	 * @var	array
134
	 */
135
	protected $_never_allowed_str =	array(
1257 lars 136
		'document.cookie' => '[removed]',
137
		'document.write'  => '[removed]',
138
		'.parentNode'     => '[removed]',
139
		'.innerHTML'      => '[removed]',
140
		'-moz-binding'    => '[removed]',
141
		'<!--'            => '&lt;!--',
142
		'-->'             => '--&gt;',
143
		'<![CDATA['       => '&lt;![CDATA[',
144
		'<comment>'	  => '&lt;comment&gt;',
145
		'<%'              => '&lt;&#37;'
68 lars 146
	);
147
 
148
	/**
149
	 * List of never allowed regex replacements
150
	 *
151
	 * @var	array
152
	 */
153
	protected $_never_allowed_regex = array(
154
		'javascript\s*:',
2254 lars 155
		'(document|(document\.)?window)\.(location|on\w*)',
68 lars 156
		'expression\s*(\(|&\#40;)', // CSS and IE
157
		'vbscript\s*:', // IE, surprise!
158
		'wscript\s*:', // IE
159
		'jscript\s*:', // IE
160
		'vbs\s*:', // IE
161
		'Redirect\s+30\d',
162
		"([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
163
	);
164
 
165
	/**
166
	 * Class constructor
167
	 *
168
	 * @return	void
169
	 */
170
	public function __construct()
171
	{
172
		// Is CSRF protection enabled?
173
		if (config_item('csrf_protection'))
174
		{
175
			// CSRF config
176
			foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
177
			{
178
				if (NULL !== ($val = config_item($key)))
179
				{
180
					$this->{'_'.$key} = $val;
181
				}
182
			}
183
 
184
			// Append application specific cookie prefix
185
			if ($cookie_prefix = config_item('cookie_prefix'))
186
			{
187
				$this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
188
			}
189
 
190
			// Set the CSRF hash
191
			$this->_csrf_set_hash();
192
		}
193
 
194
		$this->charset = strtoupper(config_item('charset'));
195
 
196
		log_message('info', 'Security Class Initialized');
197
	}
198
 
199
	// --------------------------------------------------------------------
200
 
201
	/**
202
	 * CSRF Verify
203
	 *
204
	 * @return	CI_Security
205
	 */
206
	public function csrf_verify()
207
	{
208
		// If it's not a POST request we will set the CSRF cookie
209
		if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
210
		{
211
			return $this->csrf_set_cookie();
212
		}
213
 
214
		// Check if URI has been whitelisted from CSRF checks
215
		if ($exclude_uris = config_item('csrf_exclude_uris'))
216
		{
217
			$uri = load_class('URI', 'core');
218
			foreach ($exclude_uris as $excluded)
219
			{
220
				if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
221
				{
222
					return $this;
223
				}
224
			}
225
		}
226
 
2049 lars 227
		// Check CSRF token validity, but don't error on mismatch just yet - we'll want to regenerate
228
		$valid = isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
229
			&& hash_equals($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name]);
68 lars 230
 
1257 lars 231
		// We kill this since we're done and we don't want to pollute the _POST array
68 lars 232
		unset($_POST[$this->_csrf_token_name]);
233
 
234
		// Regenerate on every submission?
235
		if (config_item('csrf_regenerate'))
236
		{
237
			// Nothing should last forever
238
			unset($_COOKIE[$this->_csrf_cookie_name]);
239
			$this->_csrf_hash = NULL;
240
		}
241
 
242
		$this->_csrf_set_hash();
243
		$this->csrf_set_cookie();
244
 
2049 lars 245
		if ($valid !== TRUE)
246
		{
247
			$this->csrf_show_error();
248
		}
249
 
68 lars 250
		log_message('info', 'CSRF token verified');
251
		return $this;
252
	}
253
 
254
	// --------------------------------------------------------------------
255
 
256
	/**
257
	 * CSRF Set Cookie
258
	 *
259
	 * @codeCoverageIgnore
260
	 * @return	CI_Security
261
	 */
262
	public function csrf_set_cookie()
263
	{
264
		$expire = time() + $this->_csrf_expire;
265
		$secure_cookie = (bool) config_item('cookie_secure');
266
 
267
		if ($secure_cookie && ! is_https())
268
		{
269
			return FALSE;
270
		}
271
 
272
		setcookie(
273
			$this->_csrf_cookie_name,
274
			$this->_csrf_hash,
275
			$expire,
276
			config_item('cookie_path'),
277
			config_item('cookie_domain'),
278
			$secure_cookie,
279
			config_item('cookie_httponly')
280
		);
281
		log_message('info', 'CSRF cookie sent');
282
 
283
		return $this;
284
	}
285
 
286
	// --------------------------------------------------------------------
287
 
288
	/**
289
	 * Show CSRF Error
290
	 *
291
	 * @return	void
292
	 */
293
	public function csrf_show_error()
294
	{
295
		show_error('The action you have requested is not allowed.', 403);
296
	}
297
 
298
	// --------------------------------------------------------------------
299
 
300
	/**
301
	 * Get CSRF Hash
302
	 *
303
	 * @see		CI_Security::$_csrf_hash
304
	 * @return 	string	CSRF hash
305
	 */
306
	public function get_csrf_hash()
307
	{
308
		return $this->_csrf_hash;
309
	}
310
 
311
	// --------------------------------------------------------------------
312
 
313
	/**
314
	 * Get CSRF Token Name
315
	 *
316
	 * @see		CI_Security::$_csrf_token_name
317
	 * @return	string	CSRF token name
318
	 */
319
	public function get_csrf_token_name()
320
	{
321
		return $this->_csrf_token_name;
322
	}
323
 
324
	// --------------------------------------------------------------------
325
 
326
	/**
327
	 * XSS Clean
328
	 *
329
	 * Sanitizes data so that Cross Site Scripting Hacks can be
330
	 * prevented.  This method does a fair amount of work but
331
	 * it is extremely thorough, designed to prevent even the
332
	 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
333
	 * of course, but I haven't been able to get anything passed
334
	 * the filter.
335
	 *
336
	 * Note: Should only be used to deal with data upon submission.
337
	 *	 It's not something that should be used for general
338
	 *	 runtime processing.
339
	 *
340
	 * @link	http://channel.bitflux.ch/wiki/XSS_Prevention
341
	 * 		Based in part on some code and ideas from Bitflux.
342
	 *
343
	 * @link	http://ha.ckers.org/xss.html
344
	 * 		To help develop this script I used this great list of
345
	 *		vulnerabilities along with a few other hacks I've
346
	 *		harvested from examining vulnerabilities in other programs.
347
	 *
348
	 * @param	string|string[]	$str		Input data
349
	 * @param 	bool		$is_image	Whether the input is an image
350
	 * @return	string
351
	 */
352
	public function xss_clean($str, $is_image = FALSE)
353
	{
354
		// Is the string an array?
355
		if (is_array($str))
356
		{
2107 lars 357
			foreach ($str as $key => &$value)
68 lars 358
			{
2107 lars 359
				$str[$key] = $this->xss_clean($value);
68 lars 360
			}
361
 
362
			return $str;
363
		}
364
 
365
		// Remove Invisible Characters
366
		$str = remove_invisible_characters($str);
367
 
368
		/*
369
		 * URL Decode
370
		 *
371
		 * Just in case stuff like this is submitted:
372
		 *
373
		 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
374
		 *
375
		 * Note: Use rawurldecode() so it does not remove plus signs
376
		 */
1257 lars 377
		if (stripos($str, '%') !== false)
68 lars 378
		{
1257 lars 379
			do
380
			{
381
				$oldstr = $str;
382
				$str = rawurldecode($str);
383
				$str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
384
			}
385
			while ($oldstr !== $str);
386
			unset($oldstr);
68 lars 387
		}
388
 
389
		/*
390
		 * Convert character entities to ASCII
391
		 *
392
		 * This permits our tests below to work reliably.
393
		 * We only convert entities that are within tags since
394
		 * these are the ones that will pose security problems.
395
		 */
396
		$str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
397
		$str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
398
 
399
		// Remove Invisible Characters Again!
400
		$str = remove_invisible_characters($str);
401
 
402
		/*
403
		 * Convert all tabs to spaces
404
		 *
405
		 * This prevents strings like this: ja	vascript
406
		 * NOTE: we deal with spaces between characters later.
407
		 * NOTE: preg_replace was found to be amazingly slow here on
408
		 * large blocks of data, so we use str_replace.
409
		 */
410
		$str = str_replace("\t", ' ', $str);
411
 
412
		// Capture converted string for later comparison
413
		$converted_string = $str;
414
 
415
		// Remove Strings that are never allowed
416
		$str = $this->_do_never_allowed($str);
417
 
418
		/*
419
		 * Makes PHP tags safe
420
		 *
421
		 * Note: XML tags are inadvertently replaced too:
422
		 *
423
		 * <?xml
424
		 *
425
		 * But it doesn't seem to pose a problem.
426
		 */
427
		if ($is_image === TRUE)
428
		{
429
			// Images have a tendency to have the PHP short opening and
430
			// closing tags every so often so we skip those and only
431
			// do the long opening tags.
432
			$str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
433
		}
434
		else
435
		{
436
			$str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
437
		}
438
 
439
		/*
440
		 * Compact any exploded words
441
		 *
442
		 * This corrects words like:  j a v a s c r i p t
443
		 * These words are compacted back to their correct state.
444
		 */
445
		$words = array(
446
			'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
447
			'vbs', 'script', 'base64', 'applet', 'alert', 'document',
448
			'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
449
		);
450
 
451
		foreach ($words as $word)
452
		{
453
			$word = implode('\s*', str_split($word)).'\s*';
454
 
455
			// We only want to do this when it is followed by a non-word character
456
			// That way valid stuff like "dealer to" does not become "dealerto"
457
			$str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
458
		}
459
 
460
		/*
461
		 * Remove disallowed Javascript in links or img tags
462
		 * We used to do some version comparisons and use of stripos(),
463
		 * but it is dog slow compared to these simplified non-capturing
464
		 * preg_match(), especially if the pattern exists in the string
465
		 *
466
		 * Note: It was reported that not only space characters, but all in
467
		 * the following pattern can be parsed as separators between a tag name
468
		 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
469
		 * ... however, remove_invisible_characters() above already strips the
470
		 * hex-encoded ones, so we'll skip them below.
471
		 */
472
		do
473
		{
474
			$original = $str;
475
 
476
			if (preg_match('/<a/i', $str))
477
			{
1257 lars 478
				$str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
68 lars 479
			}
480
 
481
			if (preg_match('/<img/i', $str))
482
			{
483
				$str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
484
			}
485
 
486
			if (preg_match('/script|xss/i', $str))
487
			{
488
				$str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
489
			}
490
		}
491
		while ($original !== $str);
492
		unset($original);
493
 
494
		/*
495
		 * Sanitize naughty HTML elements
496
		 *
497
		 * If a tag containing any of the words in the list
498
		 * below is found, the tag gets converted to entities.
499
		 *
500
		 * So this: <blink>
501
		 * Becomes: &lt;blink&gt;
502
		 */
503
		$pattern = '#'
2049 lars 504
			.'<((?<slash>/*\s*)((?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
68 lars 505
			.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
506
			// optional attributes
507
			.'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
508
			.'[^\s\042\047>/=]+' // attribute characters
509
			// optional attribute-value
510
				.'(?:\s*=' // attribute-value separator
511
					.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
512
				.')?' // end optional attribute-value group
513
			.')*)' // end optional attributes group
514
			.'[^>]*)(?<closeTag>\>)?#isS';
515
 
516
		// Note: It would be nice to optimize this for speed, BUT
517
		//       only matching the naughty elements here results in
518
		//       false positives and in turn - vulnerabilities!
519
		do
520
		{
521
			$old_str = $str;
522
			$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
523
		}
524
		while ($old_str !== $str);
525
		unset($old_str);
526
 
527
		/*
528
		 * Sanitize naughty scripting elements
529
		 *
530
		 * Similar to above, only instead of looking for
531
		 * tags it looks for PHP and JavaScript commands
532
		 * that are disallowed. Rather than removing the
533
		 * code, it simply converts the parenthesis to entities
534
		 * rendering the code un-executable.
535
		 *
536
		 * For example:	eval('some code')
537
		 * Becomes:	eval&#40;'some code'&#41;
538
		 */
539
		$str = preg_replace(
540
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
541
			'\\1\\2&#40;\\3&#41;',
542
			$str
543
		);
544
 
545
		// Final clean up
546
		// This adds a bit of extra precaution in case
547
		// something got through the above filters
548
		$str = $this->_do_never_allowed($str);
549
 
550
		/*
551
		 * Images are Handled in a Special Way
552
		 * - Essentially, we want to know that after all of the character
553
		 * conversion is done whether any unwanted, likely XSS, code was found.
554
		 * If not, we return TRUE, as the image is clean.
555
		 * However, if the string post-conversion does not matched the
556
		 * string post-removal of XSS, then it fails, as there was unwanted XSS
557
		 * code found and removed/changed during processing.
558
		 */
559
		if ($is_image === TRUE)
560
		{
561
			return ($str === $converted_string);
562
		}
563
 
564
		return $str;
565
	}
566
 
567
	// --------------------------------------------------------------------
568
 
569
	/**
570
	 * XSS Hash
571
	 *
572
	 * Generates the XSS hash if needed and returns it.
573
	 *
574
	 * @see		CI_Security::$_xss_hash
575
	 * @return	string	XSS hash
576
	 */
577
	public function xss_hash()
578
	{
579
		if ($this->_xss_hash === NULL)
580
		{
581
			$rand = $this->get_random_bytes(16);
582
			$this->_xss_hash = ($rand === FALSE)
583
				? md5(uniqid(mt_rand(), TRUE))
584
				: bin2hex($rand);
585
		}
586
 
587
		return $this->_xss_hash;
588
	}
589
 
590
	// --------------------------------------------------------------------
591
 
592
	/**
593
	 * Get random bytes
594
	 *
595
	 * @param	int	$length	Output length
596
	 * @return	string
597
	 */
598
	public function get_random_bytes($length)
599
	{
600
		if (empty($length) OR ! ctype_digit((string) $length))
601
		{
602
			return FALSE;
603
		}
604
 
605
		if (function_exists('random_bytes'))
606
		{
607
			try
608
			{
609
				// The cast is required to avoid TypeError
610
				return random_bytes((int) $length);
611
			}
612
			catch (Exception $e)
613
			{
614
				// If random_bytes() can't do the job, we can't either ...
615
				// There's no point in using fallbacks.
616
				log_message('error', $e->getMessage());
617
				return FALSE;
618
			}
619
		}
620
 
621
		// Unfortunately, none of the following PRNGs is guaranteed to exist ...
622
		if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
623
		{
624
			return $output;
625
		}
626
 
627
 
628
		if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
629
		{
630
			// Try not to waste entropy ...
631
			is_php('5.4') && stream_set_chunk_size($fp, $length);
632
			$output = fread($fp, $length);
633
			fclose($fp);
634
			if ($output !== FALSE)
635
			{
636
				return $output;
637
			}
638
		}
639
 
640
		if (function_exists('openssl_random_pseudo_bytes'))
641
		{
642
			return openssl_random_pseudo_bytes($length);
643
		}
644
 
645
		return FALSE;
646
	}
647
 
648
	// --------------------------------------------------------------------
649
 
650
	/**
651
	 * HTML Entities Decode
652
	 *
653
	 * A replacement for html_entity_decode()
654
	 *
655
	 * The reason we are not using html_entity_decode() by itself is because
656
	 * while it is not technically correct to leave out the semicolon
657
	 * at the end of an entity most browsers will still interpret the entity
658
	 * correctly. html_entity_decode() does not convert entities without
659
	 * semicolons, so we are left with our own little solution here. Bummer.
660
	 *
661
	 * @link	http://php.net/html-entity-decode
662
	 *
663
	 * @param	string	$str		Input
664
	 * @param	string	$charset	Character set
665
	 * @return	string
666
	 */
667
	public function entity_decode($str, $charset = NULL)
668
	{
669
		if (strpos($str, '&') === FALSE)
670
		{
671
			return $str;
672
		}
673
 
674
		static $_entities;
675
 
676
		isset($charset) OR $charset = $this->charset;
677
		$flag = is_php('5.4')
678
			? ENT_COMPAT | ENT_HTML5
679
			: ENT_COMPAT;
680
 
1257 lars 681
		if ( ! isset($_entities))
682
		{
683
			$_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
684
 
685
			// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
686
			// entities to the array manually
687
			if ($flag === ENT_COMPAT)
688
			{
689
				$_entities[':'] = '&colon;';
690
				$_entities['('] = '&lpar;';
691
				$_entities[')'] = '&rpar;';
692
				$_entities["\n"] = '&NewLine;';
693
				$_entities["\t"] = '&Tab;';
694
			}
695
		}
696
 
68 lars 697
		do
698
		{
699
			$str_compare = $str;
700
 
701
			// Decode standard entities, avoiding false positives
702
			if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
703
			{
704
				$replace = array();
705
				$matches = array_unique(array_map('strtolower', $matches[0]));
706
				foreach ($matches as &$match)
707
				{
708
					if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
709
					{
710
						$replace[$match] = $char;
711
					}
712
				}
713
 
1257 lars 714
				$str = str_replace(array_keys($replace), array_values($replace), $str);
68 lars 715
			}
716
 
717
			// Decode numeric & UTF16 two byte entities
718
			$str = html_entity_decode(
719
				preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
720
				$flag,
721
				$charset
722
			);
1257 lars 723
 
724
			if ($flag === ENT_COMPAT)
725
			{
726
				$str = str_replace(array_values($_entities), array_keys($_entities), $str);
727
			}
68 lars 728
		}
729
		while ($str_compare !== $str);
730
		return $str;
731
	}
732
 
733
	// --------------------------------------------------------------------
734
 
735
	/**
736
	 * Sanitize Filename
737
	 *
738
	 * @param	string	$str		Input file name
739
	 * @param 	bool	$relative_path	Whether to preserve paths
740
	 * @return	string
741
	 */
742
	public function sanitize_filename($str, $relative_path = FALSE)
743
	{
744
		$bad = $this->filename_bad_chars;
745
 
746
		if ( ! $relative_path)
747
		{
748
			$bad[] = './';
749
			$bad[] = '/';
750
		}
751
 
752
		$str = remove_invisible_characters($str, FALSE);
753
 
754
		do
755
		{
756
			$old = $str;
757
			$str = str_replace($bad, '', $str);
758
		}
759
		while ($old !== $str);
760
 
761
		return stripslashes($str);
762
	}
763
 
764
	// ----------------------------------------------------------------
765
 
766
	/**
767
	 * Strip Image Tags
768
	 *
769
	 * @param	string	$str
770
	 * @return	string
771
	 */
772
	public function strip_image_tags($str)
773
	{
774
		return preg_replace(
775
			array(
776
				'#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
777
				'#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
778
			),
779
			'\\2',
780
			$str
781
		);
782
	}
783
 
784
	// ----------------------------------------------------------------
785
 
786
	/**
1257 lars 787
	 * URL-decode taking spaces into account
788
	 *
789
	 * @see		https://github.com/bcit-ci/CodeIgniter/issues/4877
790
	 * @param	array	$matches
791
	 * @return	string
792
	 */
793
	protected function _urldecodespaces($matches)
794
	{
795
		$input    = $matches[0];
796
		$nospaces = preg_replace('#\s+#', '', $input);
797
		return ($nospaces === $input)
798
			? $input
799
			: rawurldecode($nospaces);
800
	}
801
 
802
	// ----------------------------------------------------------------
803
 
804
	/**
68 lars 805
	 * Compact Exploded Words
806
	 *
807
	 * Callback method for xss_clean() to remove whitespace from
808
	 * things like 'j a v a s c r i p t'.
809
	 *
810
	 * @used-by	CI_Security::xss_clean()
811
	 * @param	array	$matches
812
	 * @return	string
813
	 */
814
	protected function _compact_exploded_words($matches)
815
	{
816
		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
817
	}
818
 
819
	// --------------------------------------------------------------------
820
 
821
	/**
822
	 * Sanitize Naughty HTML
823
	 *
824
	 * Callback method for xss_clean() to remove naughty HTML elements.
825
	 *
826
	 * @used-by	CI_Security::xss_clean()
827
	 * @param	array	$matches
828
	 * @return	string
829
	 */
830
	protected function _sanitize_naughty_html($matches)
831
	{
832
		static $naughty_tags    = array(
1257 lars 833
			'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
68 lars 834
			'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
835
			'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
836
			'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
837
		);
838
 
839
		static $evil_attributes = array(
840
			'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
841
		);
842
 
843
		// First, escape unclosed tags
844
		if (empty($matches['closeTag']))
845
		{
846
			return '&lt;'.$matches[1];
847
		}
848
		// Is the element that we caught naughty? If so, escape it
849
		elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
850
		{
851
			return '&lt;'.$matches[1].'&gt;';
852
		}
853
		// For other tags, see if their attributes are "evil" and strip those
854
		elseif (isset($matches['attributes']))
855
		{
2254 lars 856
			// We'll store the already fitlered attributes here
68 lars 857
			$attributes = array();
858
 
859
			// Attribute-catching pattern
860
			$attributes_pattern = '#'
861
				.'(?<name>[^\s\042\047>/=]+)' // attribute characters
862
				// optional attribute-value
863
				.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
864
				.'#i';
865
 
866
			// Blacklist pattern for evil attribute names
867
			$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
868
 
869
			// Each iteration filters a single attribute
870
			do
871
			{
2107 lars 872
				// Strip any non-alpha characters that may precede an attribute.
68 lars 873
				// Browsers often parse these incorrectly and that has been a
874
				// of numerous XSS issues we've had.
875
				$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
876
 
877
				if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
878
				{
879
					// No (valid) attribute found? Discard everything else inside the tag
880
					break;
881
				}
882
 
883
				if (
884
					// Is it indeed an "evil" attribute?
885
					preg_match($is_evil_pattern, $attribute['name'][0])
886
					// Or does it have an equals sign, but no value and not quoted? Strip that too!
887
					OR (trim($attribute['value'][0]) === '')
888
				)
889
				{
890
					$attributes[] = 'xss=removed';
891
				}
892
				else
893
				{
894
					$attributes[] = $attribute[0][0];
895
				}
896
 
897
				$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
898
			}
899
			while ($matches['attributes'] !== '');
900
 
901
			$attributes = empty($attributes)
902
				? ''
903
				: ' '.implode(' ', $attributes);
904
			return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
905
		}
906
 
907
		return $matches[0];
908
	}
909
 
910
	// --------------------------------------------------------------------
911
 
912
	/**
913
	 * JS Link Removal
914
	 *
915
	 * Callback method for xss_clean() to sanitize links.
916
	 *
917
	 * This limits the PCRE backtracks, making it more performance friendly
918
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
919
	 * PHP 5.2+ on link-heavy strings.
920
	 *
921
	 * @used-by	CI_Security::xss_clean()
922
	 * @param	array	$match
923
	 * @return	string
924
	 */
925
	protected function _js_link_removal($match)
926
	{
927
		return str_replace(
928
			$match[1],
929
			preg_replace(
2254 lars 930
				'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
68 lars 931
				'',
932
				$this->_filter_attributes($match[1])
933
			),
934
			$match[0]
935
		);
936
	}
937
 
938
	// --------------------------------------------------------------------
939
 
940
	/**
941
	 * JS Image Removal
942
	 *
943
	 * Callback method for xss_clean() to sanitize image tags.
944
	 *
945
	 * This limits the PCRE backtracks, making it more performance friendly
946
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
947
	 * PHP 5.2+ on image tag heavy strings.
948
	 *
949
	 * @used-by	CI_Security::xss_clean()
950
	 * @param	array	$match
951
	 * @return	string
952
	 */
953
	protected function _js_img_removal($match)
954
	{
955
		return str_replace(
956
			$match[1],
957
			preg_replace(
2254 lars 958
				'#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
68 lars 959
				'',
960
				$this->_filter_attributes($match[1])
961
			),
962
			$match[0]
963
		);
964
	}
965
 
966
	// --------------------------------------------------------------------
967
 
968
	/**
969
	 * Attribute Conversion
970
	 *
971
	 * @used-by	CI_Security::xss_clean()
972
	 * @param	array	$match
973
	 * @return	string
974
	 */
975
	protected function _convert_attribute($match)
976
	{
977
		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
978
	}
979
 
980
	// --------------------------------------------------------------------
981
 
982
	/**
983
	 * Filter Attributes
984
	 *
985
	 * Filters tag attributes for consistency and safety.
986
	 *
987
	 * @used-by	CI_Security::_js_img_removal()
988
	 * @used-by	CI_Security::_js_link_removal()
989
	 * @param	string	$str
990
	 * @return	string
991
	 */
992
	protected function _filter_attributes($str)
993
	{
994
		$out = '';
995
		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
996
		{
997
			foreach ($matches[0] as $match)
998
			{
999
				$out .= preg_replace('#/\*.*?\*/#s', '', $match);
1000
			}
1001
		}
1002
 
1003
		return $out;
1004
	}
1005
 
1006
	// --------------------------------------------------------------------
1007
 
1008
	/**
1009
	 * HTML Entity Decode Callback
1010
	 *
1011
	 * @used-by	CI_Security::xss_clean()
1012
	 * @param	array	$match
1013
	 * @return	string
1014
	 */
1015
	protected function _decode_entity($match)
1016
	{
1017
		// Protect GET variables in URLs
1018
		// 901119URL5918AMP18930PROTECT8198
1019
		$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
1020
 
1021
		// Decode, then un-protect URL GET vars
1022
		return str_replace(
1023
			$this->xss_hash(),
1024
			'&',
1025
			$this->entity_decode($match, $this->charset)
1026
		);
1027
	}
1028
 
1029
	// --------------------------------------------------------------------
1030
 
1031
	/**
1032
	 * Do Never Allowed
1033
	 *
1034
	 * @used-by	CI_Security::xss_clean()
1035
	 * @param 	string
1036
	 * @return 	string
1037
	 */
1038
	protected function _do_never_allowed($str)
1039
	{
1040
		$str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
1041
 
1042
		foreach ($this->_never_allowed_regex as $regex)
1043
		{
1044
			$str = preg_replace('#'.$regex.'#is', '[removed]', $str);
1045
		}
1046
 
1047
		return $str;
1048
	}
1049
 
1050
	// --------------------------------------------------------------------
1051
 
1052
	/**
1053
	 * Set CSRF Hash and Cookie
1054
	 *
1055
	 * @return	string
1056
	 */
1057
	protected function _csrf_set_hash()
1058
	{
1059
		if ($this->_csrf_hash === NULL)
1060
		{
1061
			// If the cookie exists we will use its value.
1062
			// We don't necessarily want to regenerate it with
1063
			// each page load since a page could contain embedded
1064
			// sub-pages causing this feature to fail
1065
			if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1066
				&& preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
1067
			{
1068
				return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1069
			}
1070
 
1071
			$rand = $this->get_random_bytes(16);
1072
			$this->_csrf_hash = ($rand === FALSE)
1073
				? md5(uniqid(mt_rand(), TRUE))
1074
				: bin2hex($rand);
1075
		}
1076
 
1077
		return $this->_csrf_hash;
1078
	}
1079
 
1080
}