Subversion-Projekte lars-tiefland.ci

Revision

Revision 2254 | Zur aktuellen Revision | Details | Vergleich mit vorheriger | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
68 lars 1
<?php
2
/**
3
 * CodeIgniter
4
 *
5
 * An open source application development framework for PHP
6
 *
7
 * This content is released under the MIT License (MIT)
8
 *
2257 lars 9
 * Copyright (c) 2014 - 2018, British Columbia Institute of Technology
68 lars 10
 *
11
 * Permission is hereby granted, free of charge, to any person obtaining a copy
12
 * of this software and associated documentation files (the "Software"), to deal
13
 * in the Software without restriction, including without limitation the rights
14
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
 * copies of the Software, and to permit persons to whom the Software is
16
 * furnished to do so, subject to the following conditions:
17
 *
18
 * The above copyright notice and this permission notice shall be included in
19
 * all copies or substantial portions of the Software.
20
 *
21
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
 * THE SOFTWARE.
28
 *
29
 * @package	CodeIgniter
30
 * @author	EllisLab Dev Team
31
 * @copyright	Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
2257 lars 32
 * @copyright	Copyright (c) 2014 - 2018, British Columbia Institute of Technology (http://bcit.ca/)
68 lars 33
 * @license	http://opensource.org/licenses/MIT	MIT License
34
 * @link	https://codeigniter.com
35
 * @since	Version 1.0.0
36
 * @filesource
37
 */
38
defined('BASEPATH') OR exit('No direct script access allowed');
39
 
40
/**
41
 * Security Class
42
 *
43
 * @package		CodeIgniter
44
 * @subpackage	Libraries
45
 * @category	Security
46
 * @author		EllisLab Dev Team
47
 * @link		https://codeigniter.com/user_guide/libraries/security.html
48
 */
49
class CI_Security {
50
 
51
	/**
52
	 * List of sanitize filename strings
53
	 *
54
	 * @var	array
55
	 */
56
	public $filename_bad_chars =	array(
57
		'../', '<!--', '-->', '<', '>',
58
		"'", '"', '&', '$', '#',
59
		'{', '}', '[', ']', '=',
60
		';', '?', '%20', '%22',
61
		'%3c',		// <
62
		'%253c',	// <
63
		'%3e',		// >
64
		'%0e',		// >
65
		'%28',		// (
66
		'%29',		// )
67
		'%2528',	// (
68
		'%26',		// &
69
		'%24',		// $
70
		'%3f',		// ?
71
		'%3b',		// ;
72
		'%3d'		// =
73
	);
74
 
75
	/**
76
	 * Character set
77
	 *
78
	 * Will be overridden by the constructor.
79
	 *
80
	 * @var	string
81
	 */
82
	public $charset = 'UTF-8';
83
 
84
	/**
85
	 * XSS Hash
86
	 *
87
	 * Random Hash for protecting URLs.
88
	 *
89
	 * @var	string
90
	 */
91
	protected $_xss_hash;
92
 
93
	/**
94
	 * CSRF Hash
95
	 *
96
	 * Random hash for Cross Site Request Forgery protection cookie
97
	 *
98
	 * @var	string
99
	 */
100
	protected $_csrf_hash;
101
 
102
	/**
103
	 * CSRF Expire time
104
	 *
105
	 * Expiration time for Cross Site Request Forgery protection cookie.
106
	 * Defaults to two hours (in seconds).
107
	 *
108
	 * @var	int
109
	 */
110
	protected $_csrf_expire =	7200;
111
 
112
	/**
113
	 * CSRF Token name
114
	 *
115
	 * Token name for Cross Site Request Forgery protection cookie.
116
	 *
117
	 * @var	string
118
	 */
119
	protected $_csrf_token_name =	'ci_csrf_token';
120
 
121
	/**
122
	 * CSRF Cookie name
123
	 *
124
	 * Cookie name for Cross Site Request Forgery protection cookie.
125
	 *
126
	 * @var	string
127
	 */
128
	protected $_csrf_cookie_name =	'ci_csrf_token';
129
 
130
	/**
131
	 * List of never allowed strings
132
	 *
133
	 * @var	array
134
	 */
135
	protected $_never_allowed_str =	array(
1257 lars 136
		'document.cookie' => '[removed]',
2257 lars 137
		'(document).cookie' => '[removed]',
1257 lars 138
		'document.write'  => '[removed]',
2257 lars 139
		'(document).write'  => '[removed]',
1257 lars 140
		'.parentNode'     => '[removed]',
141
		'.innerHTML'      => '[removed]',
142
		'-moz-binding'    => '[removed]',
143
		'<!--'            => '&lt;!--',
144
		'-->'             => '--&gt;',
145
		'<![CDATA['       => '&lt;![CDATA[',
146
		'<comment>'	  => '&lt;comment&gt;',
147
		'<%'              => '&lt;&#37;'
68 lars 148
	);
149
 
150
	/**
151
	 * List of never allowed regex replacements
152
	 *
153
	 * @var	array
154
	 */
155
	protected $_never_allowed_regex = array(
156
		'javascript\s*:',
2257 lars 157
		'(\(?document\)?|\(?window\)?(\.document)?)\.(location|on\w*)',
68 lars 158
		'expression\s*(\(|&\#40;)', // CSS and IE
159
		'vbscript\s*:', // IE, surprise!
160
		'wscript\s*:', // IE
161
		'jscript\s*:', // IE
162
		'vbs\s*:', // IE
163
		'Redirect\s+30\d',
164
		"([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
165
	);
166
 
167
	/**
168
	 * Class constructor
169
	 *
170
	 * @return	void
171
	 */
172
	public function __construct()
173
	{
174
		// Is CSRF protection enabled?
175
		if (config_item('csrf_protection'))
176
		{
177
			// CSRF config
178
			foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
179
			{
180
				if (NULL !== ($val = config_item($key)))
181
				{
182
					$this->{'_'.$key} = $val;
183
				}
184
			}
185
 
186
			// Append application specific cookie prefix
187
			if ($cookie_prefix = config_item('cookie_prefix'))
188
			{
189
				$this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
190
			}
191
 
192
			// Set the CSRF hash
193
			$this->_csrf_set_hash();
194
		}
195
 
196
		$this->charset = strtoupper(config_item('charset'));
197
 
198
		log_message('info', 'Security Class Initialized');
199
	}
200
 
201
	// --------------------------------------------------------------------
202
 
203
	/**
204
	 * CSRF Verify
205
	 *
206
	 * @return	CI_Security
207
	 */
208
	public function csrf_verify()
209
	{
210
		// If it's not a POST request we will set the CSRF cookie
211
		if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
212
		{
213
			return $this->csrf_set_cookie();
214
		}
215
 
216
		// Check if URI has been whitelisted from CSRF checks
217
		if ($exclude_uris = config_item('csrf_exclude_uris'))
218
		{
219
			$uri = load_class('URI', 'core');
220
			foreach ($exclude_uris as $excluded)
221
			{
222
				if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
223
				{
224
					return $this;
225
				}
226
			}
227
		}
228
 
2049 lars 229
		// Check CSRF token validity, but don't error on mismatch just yet - we'll want to regenerate
230
		$valid = isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
231
			&& hash_equals($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name]);
68 lars 232
 
1257 lars 233
		// We kill this since we're done and we don't want to pollute the _POST array
68 lars 234
		unset($_POST[$this->_csrf_token_name]);
235
 
236
		// Regenerate on every submission?
237
		if (config_item('csrf_regenerate'))
238
		{
239
			// Nothing should last forever
240
			unset($_COOKIE[$this->_csrf_cookie_name]);
241
			$this->_csrf_hash = NULL;
242
		}
243
 
244
		$this->_csrf_set_hash();
245
		$this->csrf_set_cookie();
246
 
2049 lars 247
		if ($valid !== TRUE)
248
		{
249
			$this->csrf_show_error();
250
		}
251
 
68 lars 252
		log_message('info', 'CSRF token verified');
253
		return $this;
254
	}
255
 
256
	// --------------------------------------------------------------------
257
 
258
	/**
259
	 * CSRF Set Cookie
260
	 *
261
	 * @codeCoverageIgnore
262
	 * @return	CI_Security
263
	 */
264
	public function csrf_set_cookie()
265
	{
266
		$expire = time() + $this->_csrf_expire;
267
		$secure_cookie = (bool) config_item('cookie_secure');
268
 
269
		if ($secure_cookie && ! is_https())
270
		{
271
			return FALSE;
272
		}
273
 
274
		setcookie(
275
			$this->_csrf_cookie_name,
276
			$this->_csrf_hash,
277
			$expire,
278
			config_item('cookie_path'),
279
			config_item('cookie_domain'),
280
			$secure_cookie,
281
			config_item('cookie_httponly')
282
		);
283
		log_message('info', 'CSRF cookie sent');
284
 
285
		return $this;
286
	}
287
 
288
	// --------------------------------------------------------------------
289
 
290
	/**
291
	 * Show CSRF Error
292
	 *
293
	 * @return	void
294
	 */
295
	public function csrf_show_error()
296
	{
297
		show_error('The action you have requested is not allowed.', 403);
298
	}
299
 
300
	// --------------------------------------------------------------------
301
 
302
	/**
303
	 * Get CSRF Hash
304
	 *
305
	 * @see		CI_Security::$_csrf_hash
306
	 * @return 	string	CSRF hash
307
	 */
308
	public function get_csrf_hash()
309
	{
310
		return $this->_csrf_hash;
311
	}
312
 
313
	// --------------------------------------------------------------------
314
 
315
	/**
316
	 * Get CSRF Token Name
317
	 *
318
	 * @see		CI_Security::$_csrf_token_name
319
	 * @return	string	CSRF token name
320
	 */
321
	public function get_csrf_token_name()
322
	{
323
		return $this->_csrf_token_name;
324
	}
325
 
326
	// --------------------------------------------------------------------
327
 
328
	/**
329
	 * XSS Clean
330
	 *
331
	 * Sanitizes data so that Cross Site Scripting Hacks can be
332
	 * prevented.  This method does a fair amount of work but
333
	 * it is extremely thorough, designed to prevent even the
334
	 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
335
	 * of course, but I haven't been able to get anything passed
336
	 * the filter.
337
	 *
338
	 * Note: Should only be used to deal with data upon submission.
339
	 *	 It's not something that should be used for general
340
	 *	 runtime processing.
341
	 *
342
	 * @link	http://channel.bitflux.ch/wiki/XSS_Prevention
343
	 * 		Based in part on some code and ideas from Bitflux.
344
	 *
345
	 * @link	http://ha.ckers.org/xss.html
346
	 * 		To help develop this script I used this great list of
347
	 *		vulnerabilities along with a few other hacks I've
348
	 *		harvested from examining vulnerabilities in other programs.
349
	 *
350
	 * @param	string|string[]	$str		Input data
351
	 * @param 	bool		$is_image	Whether the input is an image
352
	 * @return	string
353
	 */
354
	public function xss_clean($str, $is_image = FALSE)
355
	{
356
		// Is the string an array?
357
		if (is_array($str))
358
		{
2107 lars 359
			foreach ($str as $key => &$value)
68 lars 360
			{
2107 lars 361
				$str[$key] = $this->xss_clean($value);
68 lars 362
			}
363
 
364
			return $str;
365
		}
366
 
367
		// Remove Invisible Characters
368
		$str = remove_invisible_characters($str);
369
 
370
		/*
371
		 * URL Decode
372
		 *
373
		 * Just in case stuff like this is submitted:
374
		 *
375
		 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
376
		 *
377
		 * Note: Use rawurldecode() so it does not remove plus signs
378
		 */
1257 lars 379
		if (stripos($str, '%') !== false)
68 lars 380
		{
1257 lars 381
			do
382
			{
383
				$oldstr = $str;
384
				$str = rawurldecode($str);
385
				$str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
386
			}
387
			while ($oldstr !== $str);
388
			unset($oldstr);
68 lars 389
		}
390
 
391
		/*
392
		 * Convert character entities to ASCII
393
		 *
394
		 * This permits our tests below to work reliably.
395
		 * We only convert entities that are within tags since
396
		 * these are the ones that will pose security problems.
397
		 */
398
		$str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
399
		$str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
400
 
401
		// Remove Invisible Characters Again!
402
		$str = remove_invisible_characters($str);
403
 
404
		/*
405
		 * Convert all tabs to spaces
406
		 *
407
		 * This prevents strings like this: ja	vascript
408
		 * NOTE: we deal with spaces between characters later.
409
		 * NOTE: preg_replace was found to be amazingly slow here on
410
		 * large blocks of data, so we use str_replace.
411
		 */
412
		$str = str_replace("\t", ' ', $str);
413
 
414
		// Capture converted string for later comparison
415
		$converted_string = $str;
416
 
417
		// Remove Strings that are never allowed
418
		$str = $this->_do_never_allowed($str);
419
 
420
		/*
421
		 * Makes PHP tags safe
422
		 *
423
		 * Note: XML tags are inadvertently replaced too:
424
		 *
425
		 * <?xml
426
		 *
427
		 * But it doesn't seem to pose a problem.
428
		 */
429
		if ($is_image === TRUE)
430
		{
431
			// Images have a tendency to have the PHP short opening and
432
			// closing tags every so often so we skip those and only
433
			// do the long opening tags.
434
			$str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
435
		}
436
		else
437
		{
438
			$str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
439
		}
440
 
441
		/*
442
		 * Compact any exploded words
443
		 *
444
		 * This corrects words like:  j a v a s c r i p t
445
		 * These words are compacted back to their correct state.
446
		 */
447
		$words = array(
448
			'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
449
			'vbs', 'script', 'base64', 'applet', 'alert', 'document',
450
			'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
451
		);
452
 
453
		foreach ($words as $word)
454
		{
455
			$word = implode('\s*', str_split($word)).'\s*';
456
 
457
			// We only want to do this when it is followed by a non-word character
458
			// That way valid stuff like "dealer to" does not become "dealerto"
459
			$str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
460
		}
461
 
462
		/*
463
		 * Remove disallowed Javascript in links or img tags
464
		 * We used to do some version comparisons and use of stripos(),
465
		 * but it is dog slow compared to these simplified non-capturing
466
		 * preg_match(), especially if the pattern exists in the string
467
		 *
468
		 * Note: It was reported that not only space characters, but all in
469
		 * the following pattern can be parsed as separators between a tag name
470
		 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
471
		 * ... however, remove_invisible_characters() above already strips the
472
		 * hex-encoded ones, so we'll skip them below.
473
		 */
474
		do
475
		{
476
			$original = $str;
477
 
478
			if (preg_match('/<a/i', $str))
479
			{
1257 lars 480
				$str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
68 lars 481
			}
482
 
483
			if (preg_match('/<img/i', $str))
484
			{
485
				$str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
486
			}
487
 
488
			if (preg_match('/script|xss/i', $str))
489
			{
490
				$str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
491
			}
492
		}
493
		while ($original !== $str);
494
		unset($original);
495
 
496
		/*
497
		 * Sanitize naughty HTML elements
498
		 *
499
		 * If a tag containing any of the words in the list
500
		 * below is found, the tag gets converted to entities.
501
		 *
502
		 * So this: <blink>
503
		 * Becomes: &lt;blink&gt;
504
		 */
505
		$pattern = '#'
2049 lars 506
			.'<((?<slash>/*\s*)((?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
68 lars 507
			.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
508
			// optional attributes
509
			.'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
510
			.'[^\s\042\047>/=]+' // attribute characters
511
			// optional attribute-value
512
				.'(?:\s*=' // attribute-value separator
513
					.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
514
				.')?' // end optional attribute-value group
515
			.')*)' // end optional attributes group
516
			.'[^>]*)(?<closeTag>\>)?#isS';
517
 
518
		// Note: It would be nice to optimize this for speed, BUT
519
		//       only matching the naughty elements here results in
520
		//       false positives and in turn - vulnerabilities!
521
		do
522
		{
523
			$old_str = $str;
524
			$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
525
		}
526
		while ($old_str !== $str);
527
		unset($old_str);
528
 
529
		/*
530
		 * Sanitize naughty scripting elements
531
		 *
532
		 * Similar to above, only instead of looking for
533
		 * tags it looks for PHP and JavaScript commands
534
		 * that are disallowed. Rather than removing the
535
		 * code, it simply converts the parenthesis to entities
536
		 * rendering the code un-executable.
537
		 *
538
		 * For example:	eval('some code')
539
		 * Becomes:	eval&#40;'some code'&#41;
540
		 */
541
		$str = preg_replace(
542
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
543
			'\\1\\2&#40;\\3&#41;',
544
			$str
545
		);
546
 
2257 lars 547
		// Same thing, but for "tag functions" (e.g. eval`some code`)
548
		// See https://github.com/bcit-ci/CodeIgniter/issues/5420
549
		$str = preg_replace(
550
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)`(.*?)`#si',
551
			'\\1\\2&#96;\\3&#96;',
552
			$str
553
		);
554
 
68 lars 555
		// Final clean up
556
		// This adds a bit of extra precaution in case
557
		// something got through the above filters
558
		$str = $this->_do_never_allowed($str);
559
 
560
		/*
561
		 * Images are Handled in a Special Way
562
		 * - Essentially, we want to know that after all of the character
563
		 * conversion is done whether any unwanted, likely XSS, code was found.
564
		 * If not, we return TRUE, as the image is clean.
565
		 * However, if the string post-conversion does not matched the
566
		 * string post-removal of XSS, then it fails, as there was unwanted XSS
567
		 * code found and removed/changed during processing.
568
		 */
569
		if ($is_image === TRUE)
570
		{
571
			return ($str === $converted_string);
572
		}
573
 
574
		return $str;
575
	}
576
 
577
	// --------------------------------------------------------------------
578
 
579
	/**
580
	 * XSS Hash
581
	 *
582
	 * Generates the XSS hash if needed and returns it.
583
	 *
584
	 * @see		CI_Security::$_xss_hash
585
	 * @return	string	XSS hash
586
	 */
587
	public function xss_hash()
588
	{
589
		if ($this->_xss_hash === NULL)
590
		{
591
			$rand = $this->get_random_bytes(16);
592
			$this->_xss_hash = ($rand === FALSE)
593
				? md5(uniqid(mt_rand(), TRUE))
594
				: bin2hex($rand);
595
		}
596
 
597
		return $this->_xss_hash;
598
	}
599
 
600
	// --------------------------------------------------------------------
601
 
602
	/**
603
	 * Get random bytes
604
	 *
605
	 * @param	int	$length	Output length
606
	 * @return	string
607
	 */
608
	public function get_random_bytes($length)
609
	{
610
		if (empty($length) OR ! ctype_digit((string) $length))
611
		{
612
			return FALSE;
613
		}
614
 
615
		if (function_exists('random_bytes'))
616
		{
617
			try
618
			{
619
				// The cast is required to avoid TypeError
620
				return random_bytes((int) $length);
621
			}
622
			catch (Exception $e)
623
			{
624
				// If random_bytes() can't do the job, we can't either ...
625
				// There's no point in using fallbacks.
626
				log_message('error', $e->getMessage());
627
				return FALSE;
628
			}
629
		}
630
 
631
		// Unfortunately, none of the following PRNGs is guaranteed to exist ...
632
		if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
633
		{
634
			return $output;
635
		}
636
 
637
 
638
		if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
639
		{
640
			// Try not to waste entropy ...
641
			is_php('5.4') && stream_set_chunk_size($fp, $length);
642
			$output = fread($fp, $length);
643
			fclose($fp);
644
			if ($output !== FALSE)
645
			{
646
				return $output;
647
			}
648
		}
649
 
650
		if (function_exists('openssl_random_pseudo_bytes'))
651
		{
652
			return openssl_random_pseudo_bytes($length);
653
		}
654
 
655
		return FALSE;
656
	}
657
 
658
	// --------------------------------------------------------------------
659
 
660
	/**
661
	 * HTML Entities Decode
662
	 *
663
	 * A replacement for html_entity_decode()
664
	 *
665
	 * The reason we are not using html_entity_decode() by itself is because
666
	 * while it is not technically correct to leave out the semicolon
667
	 * at the end of an entity most browsers will still interpret the entity
668
	 * correctly. html_entity_decode() does not convert entities without
669
	 * semicolons, so we are left with our own little solution here. Bummer.
670
	 *
671
	 * @link	http://php.net/html-entity-decode
672
	 *
673
	 * @param	string	$str		Input
674
	 * @param	string	$charset	Character set
675
	 * @return	string
676
	 */
677
	public function entity_decode($str, $charset = NULL)
678
	{
679
		if (strpos($str, '&') === FALSE)
680
		{
681
			return $str;
682
		}
683
 
684
		static $_entities;
685
 
686
		isset($charset) OR $charset = $this->charset;
687
		$flag = is_php('5.4')
688
			? ENT_COMPAT | ENT_HTML5
689
			: ENT_COMPAT;
690
 
1257 lars 691
		if ( ! isset($_entities))
692
		{
693
			$_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
694
 
695
			// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
696
			// entities to the array manually
697
			if ($flag === ENT_COMPAT)
698
			{
699
				$_entities[':'] = '&colon;';
700
				$_entities['('] = '&lpar;';
701
				$_entities[')'] = '&rpar;';
702
				$_entities["\n"] = '&NewLine;';
703
				$_entities["\t"] = '&Tab;';
704
			}
705
		}
706
 
68 lars 707
		do
708
		{
709
			$str_compare = $str;
710
 
711
			// Decode standard entities, avoiding false positives
712
			if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
713
			{
714
				$replace = array();
715
				$matches = array_unique(array_map('strtolower', $matches[0]));
716
				foreach ($matches as &$match)
717
				{
718
					if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
719
					{
720
						$replace[$match] = $char;
721
					}
722
				}
723
 
1257 lars 724
				$str = str_replace(array_keys($replace), array_values($replace), $str);
68 lars 725
			}
726
 
727
			// Decode numeric & UTF16 two byte entities
728
			$str = html_entity_decode(
729
				preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
730
				$flag,
731
				$charset
732
			);
1257 lars 733
 
734
			if ($flag === ENT_COMPAT)
735
			{
736
				$str = str_replace(array_values($_entities), array_keys($_entities), $str);
737
			}
68 lars 738
		}
739
		while ($str_compare !== $str);
740
		return $str;
741
	}
742
 
743
	// --------------------------------------------------------------------
744
 
745
	/**
746
	 * Sanitize Filename
747
	 *
748
	 * @param	string	$str		Input file name
749
	 * @param 	bool	$relative_path	Whether to preserve paths
750
	 * @return	string
751
	 */
752
	public function sanitize_filename($str, $relative_path = FALSE)
753
	{
754
		$bad = $this->filename_bad_chars;
755
 
756
		if ( ! $relative_path)
757
		{
758
			$bad[] = './';
759
			$bad[] = '/';
760
		}
761
 
762
		$str = remove_invisible_characters($str, FALSE);
763
 
764
		do
765
		{
766
			$old = $str;
767
			$str = str_replace($bad, '', $str);
768
		}
769
		while ($old !== $str);
770
 
771
		return stripslashes($str);
772
	}
773
 
774
	// ----------------------------------------------------------------
775
 
776
	/**
777
	 * Strip Image Tags
778
	 *
779
	 * @param	string	$str
780
	 * @return	string
781
	 */
782
	public function strip_image_tags($str)
783
	{
784
		return preg_replace(
785
			array(
786
				'#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
787
				'#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
788
			),
789
			'\\2',
790
			$str
791
		);
792
	}
793
 
794
	// ----------------------------------------------------------------
795
 
796
	/**
1257 lars 797
	 * URL-decode taking spaces into account
798
	 *
799
	 * @see		https://github.com/bcit-ci/CodeIgniter/issues/4877
800
	 * @param	array	$matches
801
	 * @return	string
802
	 */
803
	protected function _urldecodespaces($matches)
804
	{
805
		$input    = $matches[0];
806
		$nospaces = preg_replace('#\s+#', '', $input);
807
		return ($nospaces === $input)
808
			? $input
809
			: rawurldecode($nospaces);
810
	}
811
 
812
	// ----------------------------------------------------------------
813
 
814
	/**
68 lars 815
	 * Compact Exploded Words
816
	 *
817
	 * Callback method for xss_clean() to remove whitespace from
818
	 * things like 'j a v a s c r i p t'.
819
	 *
820
	 * @used-by	CI_Security::xss_clean()
821
	 * @param	array	$matches
822
	 * @return	string
823
	 */
824
	protected function _compact_exploded_words($matches)
825
	{
826
		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
827
	}
828
 
829
	// --------------------------------------------------------------------
830
 
831
	/**
832
	 * Sanitize Naughty HTML
833
	 *
834
	 * Callback method for xss_clean() to remove naughty HTML elements.
835
	 *
836
	 * @used-by	CI_Security::xss_clean()
837
	 * @param	array	$matches
838
	 * @return	string
839
	 */
840
	protected function _sanitize_naughty_html($matches)
841
	{
842
		static $naughty_tags    = array(
1257 lars 843
			'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
68 lars 844
			'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
845
			'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
846
			'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
847
		);
848
 
849
		static $evil_attributes = array(
850
			'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
851
		);
852
 
853
		// First, escape unclosed tags
854
		if (empty($matches['closeTag']))
855
		{
856
			return '&lt;'.$matches[1];
857
		}
858
		// Is the element that we caught naughty? If so, escape it
859
		elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
860
		{
861
			return '&lt;'.$matches[1].'&gt;';
862
		}
863
		// For other tags, see if their attributes are "evil" and strip those
864
		elseif (isset($matches['attributes']))
865
		{
2257 lars 866
			// We'll store the already filtered attributes here
68 lars 867
			$attributes = array();
868
 
869
			// Attribute-catching pattern
870
			$attributes_pattern = '#'
871
				.'(?<name>[^\s\042\047>/=]+)' // attribute characters
872
				// optional attribute-value
873
				.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
874
				.'#i';
875
 
876
			// Blacklist pattern for evil attribute names
877
			$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
878
 
879
			// Each iteration filters a single attribute
880
			do
881
			{
2107 lars 882
				// Strip any non-alpha characters that may precede an attribute.
68 lars 883
				// Browsers often parse these incorrectly and that has been a
884
				// of numerous XSS issues we've had.
885
				$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
886
 
887
				if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
888
				{
889
					// No (valid) attribute found? Discard everything else inside the tag
890
					break;
891
				}
892
 
893
				if (
894
					// Is it indeed an "evil" attribute?
895
					preg_match($is_evil_pattern, $attribute['name'][0])
896
					// Or does it have an equals sign, but no value and not quoted? Strip that too!
897
					OR (trim($attribute['value'][0]) === '')
898
				)
899
				{
900
					$attributes[] = 'xss=removed';
901
				}
902
				else
903
				{
904
					$attributes[] = $attribute[0][0];
905
				}
906
 
907
				$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
908
			}
909
			while ($matches['attributes'] !== '');
910
 
911
			$attributes = empty($attributes)
912
				? ''
913
				: ' '.implode(' ', $attributes);
914
			return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
915
		}
916
 
917
		return $matches[0];
918
	}
919
 
920
	// --------------------------------------------------------------------
921
 
922
	/**
923
	 * JS Link Removal
924
	 *
925
	 * Callback method for xss_clean() to sanitize links.
926
	 *
927
	 * This limits the PCRE backtracks, making it more performance friendly
928
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
929
	 * PHP 5.2+ on link-heavy strings.
930
	 *
931
	 * @used-by	CI_Security::xss_clean()
932
	 * @param	array	$match
933
	 * @return	string
934
	 */
935
	protected function _js_link_removal($match)
936
	{
937
		return str_replace(
938
			$match[1],
939
			preg_replace(
2257 lars 940
				'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
68 lars 941
				'',
942
				$this->_filter_attributes($match[1])
943
			),
944
			$match[0]
945
		);
946
	}
947
 
948
	// --------------------------------------------------------------------
949
 
950
	/**
951
	 * JS Image Removal
952
	 *
953
	 * Callback method for xss_clean() to sanitize image tags.
954
	 *
955
	 * This limits the PCRE backtracks, making it more performance friendly
956
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
957
	 * PHP 5.2+ on image tag heavy strings.
958
	 *
959
	 * @used-by	CI_Security::xss_clean()
960
	 * @param	array	$match
961
	 * @return	string
962
	 */
963
	protected function _js_img_removal($match)
964
	{
965
		return str_replace(
966
			$match[1],
967
			preg_replace(
2257 lars 968
				'#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|base64\s*,)#si',
68 lars 969
				'',
970
				$this->_filter_attributes($match[1])
971
			),
972
			$match[0]
973
		);
974
	}
975
 
976
	// --------------------------------------------------------------------
977
 
978
	/**
979
	 * Attribute Conversion
980
	 *
981
	 * @used-by	CI_Security::xss_clean()
982
	 * @param	array	$match
983
	 * @return	string
984
	 */
985
	protected function _convert_attribute($match)
986
	{
987
		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
988
	}
989
 
990
	// --------------------------------------------------------------------
991
 
992
	/**
993
	 * Filter Attributes
994
	 *
995
	 * Filters tag attributes for consistency and safety.
996
	 *
997
	 * @used-by	CI_Security::_js_img_removal()
998
	 * @used-by	CI_Security::_js_link_removal()
999
	 * @param	string	$str
1000
	 * @return	string
1001
	 */
1002
	protected function _filter_attributes($str)
1003
	{
1004
		$out = '';
1005
		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
1006
		{
1007
			foreach ($matches[0] as $match)
1008
			{
1009
				$out .= preg_replace('#/\*.*?\*/#s', '', $match);
1010
			}
1011
		}
1012
 
1013
		return $out;
1014
	}
1015
 
1016
	// --------------------------------------------------------------------
1017
 
1018
	/**
1019
	 * HTML Entity Decode Callback
1020
	 *
1021
	 * @used-by	CI_Security::xss_clean()
1022
	 * @param	array	$match
1023
	 * @return	string
1024
	 */
1025
	protected function _decode_entity($match)
1026
	{
1027
		// Protect GET variables in URLs
1028
		// 901119URL5918AMP18930PROTECT8198
1029
		$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
1030
 
1031
		// Decode, then un-protect URL GET vars
1032
		return str_replace(
1033
			$this->xss_hash(),
1034
			'&',
1035
			$this->entity_decode($match, $this->charset)
1036
		);
1037
	}
1038
 
1039
	// --------------------------------------------------------------------
1040
 
1041
	/**
1042
	 * Do Never Allowed
1043
	 *
1044
	 * @used-by	CI_Security::xss_clean()
1045
	 * @param 	string
1046
	 * @return 	string
1047
	 */
1048
	protected function _do_never_allowed($str)
1049
	{
1050
		$str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
1051
 
1052
		foreach ($this->_never_allowed_regex as $regex)
1053
		{
1054
			$str = preg_replace('#'.$regex.'#is', '[removed]', $str);
1055
		}
1056
 
1057
		return $str;
1058
	}
1059
 
1060
	// --------------------------------------------------------------------
1061
 
1062
	/**
1063
	 * Set CSRF Hash and Cookie
1064
	 *
1065
	 * @return	string
1066
	 */
1067
	protected function _csrf_set_hash()
1068
	{
1069
		if ($this->_csrf_hash === NULL)
1070
		{
1071
			// If the cookie exists we will use its value.
1072
			// We don't necessarily want to regenerate it with
1073
			// each page load since a page could contain embedded
1074
			// sub-pages causing this feature to fail
1075
			if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1076
				&& preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
1077
			{
1078
				return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1079
			}
1080
 
1081
			$rand = $this->get_random_bytes(16);
1082
			$this->_csrf_hash = ($rand === FALSE)
1083
				? md5(uniqid(mt_rand(), TRUE))
1084
				: bin2hex($rand);
1085
		}
1086
 
1087
		return $this->_csrf_hash;
1088
	}
1089
 
1090
}