Subversion-Projekte lars-tiefland.ci

Revision

Revision 2257 | Details | Vergleich mit vorheriger | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
68 lars 1
<?php
2
/**
3
 * CodeIgniter
4
 *
5
 * An open source application development framework for PHP
6
 *
7
 * This content is released under the MIT License (MIT)
8
 *
2414 lars 9
 * Copyright (c) 2014 - 2019, British Columbia Institute of Technology
68 lars 10
 *
11
 * Permission is hereby granted, free of charge, to any person obtaining a copy
12
 * of this software and associated documentation files (the "Software"), to deal
13
 * in the Software without restriction, including without limitation the rights
14
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
 * copies of the Software, and to permit persons to whom the Software is
16
 * furnished to do so, subject to the following conditions:
17
 *
18
 * The above copyright notice and this permission notice shall be included in
19
 * all copies or substantial portions of the Software.
20
 *
21
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
 * THE SOFTWARE.
28
 *
29
 * @package	CodeIgniter
30
 * @author	EllisLab Dev Team
31
 * @copyright	Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
2414 lars 32
 * @copyright	Copyright (c) 2014 - 2019, British Columbia Institute of Technology (https://bcit.ca/)
33
 * @license	https://opensource.org/licenses/MIT	MIT License
68 lars 34
 * @link	https://codeigniter.com
35
 * @since	Version 1.0.0
36
 * @filesource
37
 */
38
defined('BASEPATH') OR exit('No direct script access allowed');
39
 
40
/**
41
 * Security Class
42
 *
43
 * @package		CodeIgniter
44
 * @subpackage	Libraries
45
 * @category	Security
46
 * @author		EllisLab Dev Team
47
 * @link		https://codeigniter.com/user_guide/libraries/security.html
48
 */
49
class CI_Security {
50
 
51
	/**
52
	 * List of sanitize filename strings
53
	 *
54
	 * @var	array
55
	 */
56
	public $filename_bad_chars =	array(
57
		'../', '<!--', '-->', '<', '>',
58
		"'", '"', '&', '$', '#',
59
		'{', '}', '[', ']', '=',
60
		';', '?', '%20', '%22',
61
		'%3c',		// <
62
		'%253c',	// <
63
		'%3e',		// >
64
		'%0e',		// >
65
		'%28',		// (
66
		'%29',		// )
67
		'%2528',	// (
68
		'%26',		// &
69
		'%24',		// $
70
		'%3f',		// ?
71
		'%3b',		// ;
72
		'%3d'		// =
73
	);
74
 
75
	/**
76
	 * Character set
77
	 *
78
	 * Will be overridden by the constructor.
79
	 *
80
	 * @var	string
81
	 */
82
	public $charset = 'UTF-8';
83
 
84
	/**
85
	 * XSS Hash
86
	 *
87
	 * Random Hash for protecting URLs.
88
	 *
89
	 * @var	string
90
	 */
91
	protected $_xss_hash;
92
 
93
	/**
94
	 * CSRF Hash
95
	 *
96
	 * Random hash for Cross Site Request Forgery protection cookie
97
	 *
98
	 * @var	string
99
	 */
100
	protected $_csrf_hash;
101
 
102
	/**
103
	 * CSRF Expire time
104
	 *
105
	 * Expiration time for Cross Site Request Forgery protection cookie.
106
	 * Defaults to two hours (in seconds).
107
	 *
108
	 * @var	int
109
	 */
110
	protected $_csrf_expire =	7200;
111
 
112
	/**
113
	 * CSRF Token name
114
	 *
115
	 * Token name for Cross Site Request Forgery protection cookie.
116
	 *
117
	 * @var	string
118
	 */
119
	protected $_csrf_token_name =	'ci_csrf_token';
120
 
121
	/**
122
	 * CSRF Cookie name
123
	 *
124
	 * Cookie name for Cross Site Request Forgery protection cookie.
125
	 *
126
	 * @var	string
127
	 */
128
	protected $_csrf_cookie_name =	'ci_csrf_token';
129
 
130
	/**
131
	 * List of never allowed strings
132
	 *
133
	 * @var	array
134
	 */
135
	protected $_never_allowed_str =	array(
1257 lars 136
		'document.cookie' => '[removed]',
2257 lars 137
		'(document).cookie' => '[removed]',
1257 lars 138
		'document.write'  => '[removed]',
2257 lars 139
		'(document).write'  => '[removed]',
1257 lars 140
		'.parentNode'     => '[removed]',
141
		'.innerHTML'      => '[removed]',
142
		'-moz-binding'    => '[removed]',
143
		'<!--'            => '&lt;!--',
144
		'-->'             => '--&gt;',
145
		'<![CDATA['       => '&lt;![CDATA[',
146
		'<comment>'	  => '&lt;comment&gt;',
147
		'<%'              => '&lt;&#37;'
68 lars 148
	);
149
 
150
	/**
151
	 * List of never allowed regex replacements
152
	 *
153
	 * @var	array
154
	 */
155
	protected $_never_allowed_regex = array(
156
		'javascript\s*:',
2257 lars 157
		'(\(?document\)?|\(?window\)?(\.document)?)\.(location|on\w*)',
68 lars 158
		'expression\s*(\(|&\#40;)', // CSS and IE
159
		'vbscript\s*:', // IE, surprise!
160
		'wscript\s*:', // IE
161
		'jscript\s*:', // IE
162
		'vbs\s*:', // IE
163
		'Redirect\s+30\d',
164
		"([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
165
	);
166
 
167
	/**
168
	 * Class constructor
169
	 *
170
	 * @return	void
171
	 */
172
	public function __construct()
173
	{
174
		// Is CSRF protection enabled?
175
		if (config_item('csrf_protection'))
176
		{
177
			// CSRF config
178
			foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
179
			{
180
				if (NULL !== ($val = config_item($key)))
181
				{
182
					$this->{'_'.$key} = $val;
183
				}
184
			}
185
 
186
			// Append application specific cookie prefix
187
			if ($cookie_prefix = config_item('cookie_prefix'))
188
			{
189
				$this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
190
			}
191
 
192
			// Set the CSRF hash
193
			$this->_csrf_set_hash();
194
		}
195
 
196
		$this->charset = strtoupper(config_item('charset'));
197
 
198
		log_message('info', 'Security Class Initialized');
199
	}
200
 
201
	// --------------------------------------------------------------------
202
 
203
	/**
204
	 * CSRF Verify
205
	 *
206
	 * @return	CI_Security
207
	 */
208
	public function csrf_verify()
209
	{
210
		// If it's not a POST request we will set the CSRF cookie
211
		if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
212
		{
213
			return $this->csrf_set_cookie();
214
		}
215
 
216
		// Check if URI has been whitelisted from CSRF checks
217
		if ($exclude_uris = config_item('csrf_exclude_uris'))
218
		{
219
			$uri = load_class('URI', 'core');
220
			foreach ($exclude_uris as $excluded)
221
			{
222
				if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
223
				{
224
					return $this;
225
				}
226
			}
227
		}
228
 
2049 lars 229
		// Check CSRF token validity, but don't error on mismatch just yet - we'll want to regenerate
230
		$valid = isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
2414 lars 231
			&& is_string($_POST[$this->_csrf_token_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
2049 lars 232
			&& hash_equals($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name]);
68 lars 233
 
1257 lars 234
		// We kill this since we're done and we don't want to pollute the _POST array
68 lars 235
		unset($_POST[$this->_csrf_token_name]);
236
 
237
		// Regenerate on every submission?
238
		if (config_item('csrf_regenerate'))
239
		{
240
			// Nothing should last forever
241
			unset($_COOKIE[$this->_csrf_cookie_name]);
242
			$this->_csrf_hash = NULL;
243
		}
244
 
245
		$this->_csrf_set_hash();
246
		$this->csrf_set_cookie();
247
 
2049 lars 248
		if ($valid !== TRUE)
249
		{
250
			$this->csrf_show_error();
251
		}
252
 
68 lars 253
		log_message('info', 'CSRF token verified');
254
		return $this;
255
	}
256
 
257
	// --------------------------------------------------------------------
258
 
259
	/**
260
	 * CSRF Set Cookie
261
	 *
262
	 * @codeCoverageIgnore
263
	 * @return	CI_Security
264
	 */
265
	public function csrf_set_cookie()
266
	{
267
		$expire = time() + $this->_csrf_expire;
268
		$secure_cookie = (bool) config_item('cookie_secure');
269
 
270
		if ($secure_cookie && ! is_https())
271
		{
272
			return FALSE;
273
		}
274
 
275
		setcookie(
276
			$this->_csrf_cookie_name,
277
			$this->_csrf_hash,
278
			$expire,
279
			config_item('cookie_path'),
280
			config_item('cookie_domain'),
281
			$secure_cookie,
282
			config_item('cookie_httponly')
283
		);
284
		log_message('info', 'CSRF cookie sent');
285
 
286
		return $this;
287
	}
288
 
289
	// --------------------------------------------------------------------
290
 
291
	/**
292
	 * Show CSRF Error
293
	 *
294
	 * @return	void
295
	 */
296
	public function csrf_show_error()
297
	{
298
		show_error('The action you have requested is not allowed.', 403);
299
	}
300
 
301
	// --------------------------------------------------------------------
302
 
303
	/**
304
	 * Get CSRF Hash
305
	 *
306
	 * @see		CI_Security::$_csrf_hash
307
	 * @return 	string	CSRF hash
308
	 */
309
	public function get_csrf_hash()
310
	{
311
		return $this->_csrf_hash;
312
	}
313
 
314
	// --------------------------------------------------------------------
315
 
316
	/**
317
	 * Get CSRF Token Name
318
	 *
319
	 * @see		CI_Security::$_csrf_token_name
320
	 * @return	string	CSRF token name
321
	 */
322
	public function get_csrf_token_name()
323
	{
324
		return $this->_csrf_token_name;
325
	}
326
 
327
	// --------------------------------------------------------------------
328
 
329
	/**
330
	 * XSS Clean
331
	 *
332
	 * Sanitizes data so that Cross Site Scripting Hacks can be
333
	 * prevented.  This method does a fair amount of work but
334
	 * it is extremely thorough, designed to prevent even the
335
	 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
336
	 * of course, but I haven't been able to get anything passed
337
	 * the filter.
338
	 *
339
	 * Note: Should only be used to deal with data upon submission.
340
	 *	 It's not something that should be used for general
341
	 *	 runtime processing.
342
	 *
343
	 * @link	http://channel.bitflux.ch/wiki/XSS_Prevention
344
	 * 		Based in part on some code and ideas from Bitflux.
345
	 *
346
	 * @link	http://ha.ckers.org/xss.html
347
	 * 		To help develop this script I used this great list of
348
	 *		vulnerabilities along with a few other hacks I've
349
	 *		harvested from examining vulnerabilities in other programs.
350
	 *
351
	 * @param	string|string[]	$str		Input data
352
	 * @param 	bool		$is_image	Whether the input is an image
353
	 * @return	string
354
	 */
355
	public function xss_clean($str, $is_image = FALSE)
356
	{
357
		// Is the string an array?
358
		if (is_array($str))
359
		{
2107 lars 360
			foreach ($str as $key => &$value)
68 lars 361
			{
2107 lars 362
				$str[$key] = $this->xss_clean($value);
68 lars 363
			}
364
 
365
			return $str;
366
		}
367
 
368
		// Remove Invisible Characters
369
		$str = remove_invisible_characters($str);
370
 
371
		/*
372
		 * URL Decode
373
		 *
374
		 * Just in case stuff like this is submitted:
375
		 *
376
		 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
377
		 *
378
		 * Note: Use rawurldecode() so it does not remove plus signs
379
		 */
1257 lars 380
		if (stripos($str, '%') !== false)
68 lars 381
		{
1257 lars 382
			do
383
			{
384
				$oldstr = $str;
385
				$str = rawurldecode($str);
386
				$str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
387
			}
388
			while ($oldstr !== $str);
389
			unset($oldstr);
68 lars 390
		}
391
 
392
		/*
393
		 * Convert character entities to ASCII
394
		 *
395
		 * This permits our tests below to work reliably.
396
		 * We only convert entities that are within tags since
397
		 * these are the ones that will pose security problems.
398
		 */
399
		$str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
400
		$str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
401
 
402
		// Remove Invisible Characters Again!
403
		$str = remove_invisible_characters($str);
404
 
405
		/*
406
		 * Convert all tabs to spaces
407
		 *
408
		 * This prevents strings like this: ja	vascript
409
		 * NOTE: we deal with spaces between characters later.
410
		 * NOTE: preg_replace was found to be amazingly slow here on
411
		 * large blocks of data, so we use str_replace.
412
		 */
413
		$str = str_replace("\t", ' ', $str);
414
 
415
		// Capture converted string for later comparison
416
		$converted_string = $str;
417
 
418
		// Remove Strings that are never allowed
419
		$str = $this->_do_never_allowed($str);
420
 
421
		/*
422
		 * Makes PHP tags safe
423
		 *
424
		 * Note: XML tags are inadvertently replaced too:
425
		 *
426
		 * <?xml
427
		 *
428
		 * But it doesn't seem to pose a problem.
429
		 */
430
		if ($is_image === TRUE)
431
		{
432
			// Images have a tendency to have the PHP short opening and
433
			// closing tags every so often so we skip those and only
434
			// do the long opening tags.
435
			$str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
436
		}
437
		else
438
		{
439
			$str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
440
		}
441
 
442
		/*
443
		 * Compact any exploded words
444
		 *
445
		 * This corrects words like:  j a v a s c r i p t
446
		 * These words are compacted back to their correct state.
447
		 */
448
		$words = array(
449
			'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
450
			'vbs', 'script', 'base64', 'applet', 'alert', 'document',
451
			'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
452
		);
453
 
454
		foreach ($words as $word)
455
		{
456
			$word = implode('\s*', str_split($word)).'\s*';
457
 
458
			// We only want to do this when it is followed by a non-word character
459
			// That way valid stuff like "dealer to" does not become "dealerto"
460
			$str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
461
		}
462
 
463
		/*
464
		 * Remove disallowed Javascript in links or img tags
465
		 * We used to do some version comparisons and use of stripos(),
466
		 * but it is dog slow compared to these simplified non-capturing
467
		 * preg_match(), especially if the pattern exists in the string
468
		 *
469
		 * Note: It was reported that not only space characters, but all in
470
		 * the following pattern can be parsed as separators between a tag name
471
		 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
472
		 * ... however, remove_invisible_characters() above already strips the
473
		 * hex-encoded ones, so we'll skip them below.
474
		 */
475
		do
476
		{
477
			$original = $str;
478
 
479
			if (preg_match('/<a/i', $str))
480
			{
1257 lars 481
				$str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
68 lars 482
			}
483
 
484
			if (preg_match('/<img/i', $str))
485
			{
486
				$str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
487
			}
488
 
489
			if (preg_match('/script|xss/i', $str))
490
			{
491
				$str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
492
			}
493
		}
494
		while ($original !== $str);
495
		unset($original);
496
 
497
		/*
498
		 * Sanitize naughty HTML elements
499
		 *
500
		 * If a tag containing any of the words in the list
501
		 * below is found, the tag gets converted to entities.
502
		 *
503
		 * So this: <blink>
504
		 * Becomes: &lt;blink&gt;
505
		 */
506
		$pattern = '#'
2049 lars 507
			.'<((?<slash>/*\s*)((?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
68 lars 508
			.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
509
			// optional attributes
510
			.'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
511
			.'[^\s\042\047>/=]+' // attribute characters
512
			// optional attribute-value
513
				.'(?:\s*=' // attribute-value separator
514
					.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
515
				.')?' // end optional attribute-value group
516
			.')*)' // end optional attributes group
517
			.'[^>]*)(?<closeTag>\>)?#isS';
518
 
519
		// Note: It would be nice to optimize this for speed, BUT
520
		//       only matching the naughty elements here results in
521
		//       false positives and in turn - vulnerabilities!
522
		do
523
		{
524
			$old_str = $str;
525
			$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
526
		}
527
		while ($old_str !== $str);
528
		unset($old_str);
529
 
530
		/*
531
		 * Sanitize naughty scripting elements
532
		 *
533
		 * Similar to above, only instead of looking for
534
		 * tags it looks for PHP and JavaScript commands
535
		 * that are disallowed. Rather than removing the
536
		 * code, it simply converts the parenthesis to entities
537
		 * rendering the code un-executable.
538
		 *
539
		 * For example:	eval('some code')
540
		 * Becomes:	eval&#40;'some code'&#41;
541
		 */
542
		$str = preg_replace(
543
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
544
			'\\1\\2&#40;\\3&#41;',
545
			$str
546
		);
547
 
2257 lars 548
		// Same thing, but for "tag functions" (e.g. eval`some code`)
549
		// See https://github.com/bcit-ci/CodeIgniter/issues/5420
550
		$str = preg_replace(
551
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)`(.*?)`#si',
552
			'\\1\\2&#96;\\3&#96;',
553
			$str
554
		);
555
 
68 lars 556
		// Final clean up
557
		// This adds a bit of extra precaution in case
558
		// something got through the above filters
559
		$str = $this->_do_never_allowed($str);
560
 
561
		/*
562
		 * Images are Handled in a Special Way
563
		 * - Essentially, we want to know that after all of the character
564
		 * conversion is done whether any unwanted, likely XSS, code was found.
565
		 * If not, we return TRUE, as the image is clean.
566
		 * However, if the string post-conversion does not matched the
567
		 * string post-removal of XSS, then it fails, as there was unwanted XSS
568
		 * code found and removed/changed during processing.
569
		 */
570
		if ($is_image === TRUE)
571
		{
572
			return ($str === $converted_string);
573
		}
574
 
575
		return $str;
576
	}
577
 
578
	// --------------------------------------------------------------------
579
 
580
	/**
581
	 * XSS Hash
582
	 *
583
	 * Generates the XSS hash if needed and returns it.
584
	 *
585
	 * @see		CI_Security::$_xss_hash
586
	 * @return	string	XSS hash
587
	 */
588
	public function xss_hash()
589
	{
590
		if ($this->_xss_hash === NULL)
591
		{
592
			$rand = $this->get_random_bytes(16);
593
			$this->_xss_hash = ($rand === FALSE)
594
				? md5(uniqid(mt_rand(), TRUE))
595
				: bin2hex($rand);
596
		}
597
 
598
		return $this->_xss_hash;
599
	}
600
 
601
	// --------------------------------------------------------------------
602
 
603
	/**
604
	 * Get random bytes
605
	 *
606
	 * @param	int	$length	Output length
607
	 * @return	string
608
	 */
609
	public function get_random_bytes($length)
610
	{
611
		if (empty($length) OR ! ctype_digit((string) $length))
612
		{
613
			return FALSE;
614
		}
615
 
616
		if (function_exists('random_bytes'))
617
		{
618
			try
619
			{
620
				// The cast is required to avoid TypeError
621
				return random_bytes((int) $length);
622
			}
623
			catch (Exception $e)
624
			{
625
				// If random_bytes() can't do the job, we can't either ...
626
				// There's no point in using fallbacks.
627
				log_message('error', $e->getMessage());
628
				return FALSE;
629
			}
630
		}
631
 
632
		// Unfortunately, none of the following PRNGs is guaranteed to exist ...
633
		if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
634
		{
635
			return $output;
636
		}
637
 
638
 
639
		if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
640
		{
641
			// Try not to waste entropy ...
642
			is_php('5.4') && stream_set_chunk_size($fp, $length);
643
			$output = fread($fp, $length);
644
			fclose($fp);
645
			if ($output !== FALSE)
646
			{
647
				return $output;
648
			}
649
		}
650
 
651
		if (function_exists('openssl_random_pseudo_bytes'))
652
		{
653
			return openssl_random_pseudo_bytes($length);
654
		}
655
 
656
		return FALSE;
657
	}
658
 
659
	// --------------------------------------------------------------------
660
 
661
	/**
662
	 * HTML Entities Decode
663
	 *
664
	 * A replacement for html_entity_decode()
665
	 *
666
	 * The reason we are not using html_entity_decode() by itself is because
667
	 * while it is not technically correct to leave out the semicolon
668
	 * at the end of an entity most browsers will still interpret the entity
669
	 * correctly. html_entity_decode() does not convert entities without
670
	 * semicolons, so we are left with our own little solution here. Bummer.
671
	 *
672
	 * @link	http://php.net/html-entity-decode
673
	 *
674
	 * @param	string	$str		Input
675
	 * @param	string	$charset	Character set
676
	 * @return	string
677
	 */
678
	public function entity_decode($str, $charset = NULL)
679
	{
680
		if (strpos($str, '&') === FALSE)
681
		{
682
			return $str;
683
		}
684
 
685
		static $_entities;
686
 
687
		isset($charset) OR $charset = $this->charset;
688
		$flag = is_php('5.4')
689
			? ENT_COMPAT | ENT_HTML5
690
			: ENT_COMPAT;
691
 
1257 lars 692
		if ( ! isset($_entities))
693
		{
694
			$_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
695
 
696
			// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
697
			// entities to the array manually
698
			if ($flag === ENT_COMPAT)
699
			{
700
				$_entities[':'] = '&colon;';
701
				$_entities['('] = '&lpar;';
702
				$_entities[')'] = '&rpar;';
703
				$_entities["\n"] = '&NewLine;';
704
				$_entities["\t"] = '&Tab;';
705
			}
706
		}
707
 
68 lars 708
		do
709
		{
710
			$str_compare = $str;
711
 
712
			// Decode standard entities, avoiding false positives
713
			if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
714
			{
715
				$replace = array();
716
				$matches = array_unique(array_map('strtolower', $matches[0]));
717
				foreach ($matches as &$match)
718
				{
719
					if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
720
					{
721
						$replace[$match] = $char;
722
					}
723
				}
724
 
1257 lars 725
				$str = str_replace(array_keys($replace), array_values($replace), $str);
68 lars 726
			}
727
 
728
			// Decode numeric & UTF16 two byte entities
729
			$str = html_entity_decode(
730
				preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
731
				$flag,
732
				$charset
733
			);
1257 lars 734
 
735
			if ($flag === ENT_COMPAT)
736
			{
737
				$str = str_replace(array_values($_entities), array_keys($_entities), $str);
738
			}
68 lars 739
		}
740
		while ($str_compare !== $str);
741
		return $str;
742
	}
743
 
744
	// --------------------------------------------------------------------
745
 
746
	/**
747
	 * Sanitize Filename
748
	 *
749
	 * @param	string	$str		Input file name
750
	 * @param 	bool	$relative_path	Whether to preserve paths
751
	 * @return	string
752
	 */
753
	public function sanitize_filename($str, $relative_path = FALSE)
754
	{
755
		$bad = $this->filename_bad_chars;
756
 
757
		if ( ! $relative_path)
758
		{
759
			$bad[] = './';
760
			$bad[] = '/';
761
		}
762
 
763
		$str = remove_invisible_characters($str, FALSE);
764
 
765
		do
766
		{
767
			$old = $str;
768
			$str = str_replace($bad, '', $str);
769
		}
770
		while ($old !== $str);
771
 
772
		return stripslashes($str);
773
	}
774
 
775
	// ----------------------------------------------------------------
776
 
777
	/**
778
	 * Strip Image Tags
779
	 *
780
	 * @param	string	$str
781
	 * @return	string
782
	 */
783
	public function strip_image_tags($str)
784
	{
785
		return preg_replace(
786
			array(
787
				'#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
788
				'#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
789
			),
790
			'\\2',
791
			$str
792
		);
793
	}
794
 
795
	// ----------------------------------------------------------------
796
 
797
	/**
1257 lars 798
	 * URL-decode taking spaces into account
799
	 *
800
	 * @see		https://github.com/bcit-ci/CodeIgniter/issues/4877
801
	 * @param	array	$matches
802
	 * @return	string
803
	 */
804
	protected function _urldecodespaces($matches)
805
	{
806
		$input    = $matches[0];
807
		$nospaces = preg_replace('#\s+#', '', $input);
808
		return ($nospaces === $input)
809
			? $input
810
			: rawurldecode($nospaces);
811
	}
812
 
813
	// ----------------------------------------------------------------
814
 
815
	/**
68 lars 816
	 * Compact Exploded Words
817
	 *
818
	 * Callback method for xss_clean() to remove whitespace from
819
	 * things like 'j a v a s c r i p t'.
820
	 *
821
	 * @used-by	CI_Security::xss_clean()
822
	 * @param	array	$matches
823
	 * @return	string
824
	 */
825
	protected function _compact_exploded_words($matches)
826
	{
827
		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
828
	}
829
 
830
	// --------------------------------------------------------------------
831
 
832
	/**
833
	 * Sanitize Naughty HTML
834
	 *
835
	 * Callback method for xss_clean() to remove naughty HTML elements.
836
	 *
837
	 * @used-by	CI_Security::xss_clean()
838
	 * @param	array	$matches
839
	 * @return	string
840
	 */
841
	protected function _sanitize_naughty_html($matches)
842
	{
843
		static $naughty_tags    = array(
1257 lars 844
			'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
68 lars 845
			'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
846
			'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
847
			'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
848
		);
849
 
850
		static $evil_attributes = array(
851
			'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
852
		);
853
 
854
		// First, escape unclosed tags
855
		if (empty($matches['closeTag']))
856
		{
857
			return '&lt;'.$matches[1];
858
		}
859
		// Is the element that we caught naughty? If so, escape it
860
		elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
861
		{
862
			return '&lt;'.$matches[1].'&gt;';
863
		}
864
		// For other tags, see if their attributes are "evil" and strip those
865
		elseif (isset($matches['attributes']))
866
		{
2257 lars 867
			// We'll store the already filtered attributes here
68 lars 868
			$attributes = array();
869
 
870
			// Attribute-catching pattern
871
			$attributes_pattern = '#'
872
				.'(?<name>[^\s\042\047>/=]+)' // attribute characters
873
				// optional attribute-value
874
				.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
875
				.'#i';
876
 
877
			// Blacklist pattern for evil attribute names
878
			$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
879
 
880
			// Each iteration filters a single attribute
881
			do
882
			{
2107 lars 883
				// Strip any non-alpha characters that may precede an attribute.
68 lars 884
				// Browsers often parse these incorrectly and that has been a
885
				// of numerous XSS issues we've had.
886
				$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
887
 
888
				if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
889
				{
890
					// No (valid) attribute found? Discard everything else inside the tag
891
					break;
892
				}
893
 
894
				if (
895
					// Is it indeed an "evil" attribute?
896
					preg_match($is_evil_pattern, $attribute['name'][0])
897
					// Or does it have an equals sign, but no value and not quoted? Strip that too!
898
					OR (trim($attribute['value'][0]) === '')
899
				)
900
				{
901
					$attributes[] = 'xss=removed';
902
				}
903
				else
904
				{
905
					$attributes[] = $attribute[0][0];
906
				}
907
 
908
				$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
909
			}
910
			while ($matches['attributes'] !== '');
911
 
912
			$attributes = empty($attributes)
913
				? ''
914
				: ' '.implode(' ', $attributes);
915
			return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
916
		}
917
 
918
		return $matches[0];
919
	}
920
 
921
	// --------------------------------------------------------------------
922
 
923
	/**
924
	 * JS Link Removal
925
	 *
926
	 * Callback method for xss_clean() to sanitize links.
927
	 *
928
	 * This limits the PCRE backtracks, making it more performance friendly
929
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
930
	 * PHP 5.2+ on link-heavy strings.
931
	 *
932
	 * @used-by	CI_Security::xss_clean()
933
	 * @param	array	$match
934
	 * @return	string
935
	 */
936
	protected function _js_link_removal($match)
937
	{
938
		return str_replace(
939
			$match[1],
940
			preg_replace(
2257 lars 941
				'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
68 lars 942
				'',
943
				$this->_filter_attributes($match[1])
944
			),
945
			$match[0]
946
		);
947
	}
948
 
949
	// --------------------------------------------------------------------
950
 
951
	/**
952
	 * JS Image Removal
953
	 *
954
	 * Callback method for xss_clean() to sanitize image tags.
955
	 *
956
	 * This limits the PCRE backtracks, making it more performance friendly
957
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
958
	 * PHP 5.2+ on image tag heavy strings.
959
	 *
960
	 * @used-by	CI_Security::xss_clean()
961
	 * @param	array	$match
962
	 * @return	string
963
	 */
964
	protected function _js_img_removal($match)
965
	{
966
		return str_replace(
967
			$match[1],
968
			preg_replace(
2257 lars 969
				'#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|base64\s*,)#si',
68 lars 970
				'',
971
				$this->_filter_attributes($match[1])
972
			),
973
			$match[0]
974
		);
975
	}
976
 
977
	// --------------------------------------------------------------------
978
 
979
	/**
980
	 * Attribute Conversion
981
	 *
982
	 * @used-by	CI_Security::xss_clean()
983
	 * @param	array	$match
984
	 * @return	string
985
	 */
986
	protected function _convert_attribute($match)
987
	{
988
		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
989
	}
990
 
991
	// --------------------------------------------------------------------
992
 
993
	/**
994
	 * Filter Attributes
995
	 *
996
	 * Filters tag attributes for consistency and safety.
997
	 *
998
	 * @used-by	CI_Security::_js_img_removal()
999
	 * @used-by	CI_Security::_js_link_removal()
1000
	 * @param	string	$str
1001
	 * @return	string
1002
	 */
1003
	protected function _filter_attributes($str)
1004
	{
1005
		$out = '';
1006
		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
1007
		{
1008
			foreach ($matches[0] as $match)
1009
			{
1010
				$out .= preg_replace('#/\*.*?\*/#s', '', $match);
1011
			}
1012
		}
1013
 
1014
		return $out;
1015
	}
1016
 
1017
	// --------------------------------------------------------------------
1018
 
1019
	/**
1020
	 * HTML Entity Decode Callback
1021
	 *
1022
	 * @used-by	CI_Security::xss_clean()
1023
	 * @param	array	$match
1024
	 * @return	string
1025
	 */
1026
	protected function _decode_entity($match)
1027
	{
1028
		// Protect GET variables in URLs
1029
		// 901119URL5918AMP18930PROTECT8198
1030
		$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
1031
 
1032
		// Decode, then un-protect URL GET vars
1033
		return str_replace(
1034
			$this->xss_hash(),
1035
			'&',
1036
			$this->entity_decode($match, $this->charset)
1037
		);
1038
	}
1039
 
1040
	// --------------------------------------------------------------------
1041
 
1042
	/**
1043
	 * Do Never Allowed
1044
	 *
1045
	 * @used-by	CI_Security::xss_clean()
1046
	 * @param 	string
1047
	 * @return 	string
1048
	 */
1049
	protected function _do_never_allowed($str)
1050
	{
1051
		$str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
1052
 
1053
		foreach ($this->_never_allowed_regex as $regex)
1054
		{
1055
			$str = preg_replace('#'.$regex.'#is', '[removed]', $str);
1056
		}
1057
 
1058
		return $str;
1059
	}
1060
 
1061
	// --------------------------------------------------------------------
1062
 
1063
	/**
1064
	 * Set CSRF Hash and Cookie
1065
	 *
1066
	 * @return	string
1067
	 */
1068
	protected function _csrf_set_hash()
1069
	{
1070
		if ($this->_csrf_hash === NULL)
1071
		{
1072
			// If the cookie exists we will use its value.
1073
			// We don't necessarily want to regenerate it with
1074
			// each page load since a page could contain embedded
1075
			// sub-pages causing this feature to fail
1076
			if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1077
				&& preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
1078
			{
1079
				return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1080
			}
1081
 
1082
			$rand = $this->get_random_bytes(16);
1083
			$this->_csrf_hash = ($rand === FALSE)
1084
				? md5(uniqid(mt_rand(), TRUE))
1085
				: bin2hex($rand);
1086
		}
1087
 
1088
		return $this->_csrf_hash;
1089
	}
1090
 
1091
}