Subversion-Projekte lars-tiefland.ci

Revision

Revision 68 | Revision 2049 | Zur aktuellen Revision | Details | Vergleich mit vorheriger | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
68 lars 1
<?php
2
/**
3
 * CodeIgniter
4
 *
5
 * An open source application development framework for PHP
6
 *
7
 * This content is released under the MIT License (MIT)
8
 *
9
 * Copyright (c) 2014 - 2016, British Columbia Institute of Technology
10
 *
11
 * Permission is hereby granted, free of charge, to any person obtaining a copy
12
 * of this software and associated documentation files (the "Software"), to deal
13
 * in the Software without restriction, including without limitation the rights
14
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
 * copies of the Software, and to permit persons to whom the Software is
16
 * furnished to do so, subject to the following conditions:
17
 *
18
 * The above copyright notice and this permission notice shall be included in
19
 * all copies or substantial portions of the Software.
20
 *
21
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
 * THE SOFTWARE.
28
 *
29
 * @package	CodeIgniter
30
 * @author	EllisLab Dev Team
31
 * @copyright	Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
32
 * @copyright	Copyright (c) 2014 - 2016, British Columbia Institute of Technology (http://bcit.ca/)
33
 * @license	http://opensource.org/licenses/MIT	MIT License
34
 * @link	https://codeigniter.com
35
 * @since	Version 1.0.0
36
 * @filesource
37
 */
38
defined('BASEPATH') OR exit('No direct script access allowed');
39
 
40
/**
41
 * Security Class
42
 *
43
 * @package		CodeIgniter
44
 * @subpackage	Libraries
45
 * @category	Security
46
 * @author		EllisLab Dev Team
47
 * @link		https://codeigniter.com/user_guide/libraries/security.html
48
 */
49
class CI_Security {
50
 
51
	/**
52
	 * List of sanitize filename strings
53
	 *
54
	 * @var	array
55
	 */
56
	public $filename_bad_chars =	array(
57
		'../', '<!--', '-->', '<', '>',
58
		"'", '"', '&', '$', '#',
59
		'{', '}', '[', ']', '=',
60
		';', '?', '%20', '%22',
61
		'%3c',		// <
62
		'%253c',	// <
63
		'%3e',		// >
64
		'%0e',		// >
65
		'%28',		// (
66
		'%29',		// )
67
		'%2528',	// (
68
		'%26',		// &
69
		'%24',		// $
70
		'%3f',		// ?
71
		'%3b',		// ;
72
		'%3d'		// =
73
	);
74
 
75
	/**
76
	 * Character set
77
	 *
78
	 * Will be overridden by the constructor.
79
	 *
80
	 * @var	string
81
	 */
82
	public $charset = 'UTF-8';
83
 
84
	/**
85
	 * XSS Hash
86
	 *
87
	 * Random Hash for protecting URLs.
88
	 *
89
	 * @var	string
90
	 */
91
	protected $_xss_hash;
92
 
93
	/**
94
	 * CSRF Hash
95
	 *
96
	 * Random hash for Cross Site Request Forgery protection cookie
97
	 *
98
	 * @var	string
99
	 */
100
	protected $_csrf_hash;
101
 
102
	/**
103
	 * CSRF Expire time
104
	 *
105
	 * Expiration time for Cross Site Request Forgery protection cookie.
106
	 * Defaults to two hours (in seconds).
107
	 *
108
	 * @var	int
109
	 */
110
	protected $_csrf_expire =	7200;
111
 
112
	/**
113
	 * CSRF Token name
114
	 *
115
	 * Token name for Cross Site Request Forgery protection cookie.
116
	 *
117
	 * @var	string
118
	 */
119
	protected $_csrf_token_name =	'ci_csrf_token';
120
 
121
	/**
122
	 * CSRF Cookie name
123
	 *
124
	 * Cookie name for Cross Site Request Forgery protection cookie.
125
	 *
126
	 * @var	string
127
	 */
128
	protected $_csrf_cookie_name =	'ci_csrf_token';
129
 
130
	/**
131
	 * List of never allowed strings
132
	 *
133
	 * @var	array
134
	 */
135
	protected $_never_allowed_str =	array(
1257 lars 136
		'document.cookie' => '[removed]',
137
		'document.write'  => '[removed]',
138
		'.parentNode'     => '[removed]',
139
		'.innerHTML'      => '[removed]',
140
		'-moz-binding'    => '[removed]',
141
		'<!--'            => '&lt;!--',
142
		'-->'             => '--&gt;',
143
		'<![CDATA['       => '&lt;![CDATA[',
144
		'<comment>'	  => '&lt;comment&gt;',
145
		'<%'              => '&lt;&#37;'
68 lars 146
	);
147
 
148
	/**
149
	 * List of never allowed regex replacements
150
	 *
151
	 * @var	array
152
	 */
153
	protected $_never_allowed_regex = array(
154
		'javascript\s*:',
155
		'(document|(document\.)?window)\.(location|on\w*)',
156
		'expression\s*(\(|&\#40;)', // CSS and IE
157
		'vbscript\s*:', // IE, surprise!
158
		'wscript\s*:', // IE
159
		'jscript\s*:', // IE
160
		'vbs\s*:', // IE
161
		'Redirect\s+30\d',
162
		"([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
163
	);
164
 
165
	/**
166
	 * Class constructor
167
	 *
168
	 * @return	void
169
	 */
170
	public function __construct()
171
	{
172
		// Is CSRF protection enabled?
173
		if (config_item('csrf_protection'))
174
		{
175
			// CSRF config
176
			foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
177
			{
178
				if (NULL !== ($val = config_item($key)))
179
				{
180
					$this->{'_'.$key} = $val;
181
				}
182
			}
183
 
184
			// Append application specific cookie prefix
185
			if ($cookie_prefix = config_item('cookie_prefix'))
186
			{
187
				$this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
188
			}
189
 
190
			// Set the CSRF hash
191
			$this->_csrf_set_hash();
192
		}
193
 
194
		$this->charset = strtoupper(config_item('charset'));
195
 
196
		log_message('info', 'Security Class Initialized');
197
	}
198
 
199
	// --------------------------------------------------------------------
200
 
201
	/**
202
	 * CSRF Verify
203
	 *
204
	 * @return	CI_Security
205
	 */
206
	public function csrf_verify()
207
	{
208
		// If it's not a POST request we will set the CSRF cookie
209
		if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
210
		{
211
			return $this->csrf_set_cookie();
212
		}
213
 
214
		// Check if URI has been whitelisted from CSRF checks
215
		if ($exclude_uris = config_item('csrf_exclude_uris'))
216
		{
217
			$uri = load_class('URI', 'core');
218
			foreach ($exclude_uris as $excluded)
219
			{
220
				if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
221
				{
222
					return $this;
223
				}
224
			}
225
		}
226
 
227
		// Do the tokens exist in both the _POST and _COOKIE arrays?
228
		if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
229
			OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
230
		{
231
			$this->csrf_show_error();
232
		}
233
 
1257 lars 234
		// We kill this since we're done and we don't want to pollute the _POST array
68 lars 235
		unset($_POST[$this->_csrf_token_name]);
236
 
237
		// Regenerate on every submission?
238
		if (config_item('csrf_regenerate'))
239
		{
240
			// Nothing should last forever
241
			unset($_COOKIE[$this->_csrf_cookie_name]);
242
			$this->_csrf_hash = NULL;
243
		}
244
 
245
		$this->_csrf_set_hash();
246
		$this->csrf_set_cookie();
247
 
248
		log_message('info', 'CSRF token verified');
249
		return $this;
250
	}
251
 
252
	// --------------------------------------------------------------------
253
 
254
	/**
255
	 * CSRF Set Cookie
256
	 *
257
	 * @codeCoverageIgnore
258
	 * @return	CI_Security
259
	 */
260
	public function csrf_set_cookie()
261
	{
262
		$expire = time() + $this->_csrf_expire;
263
		$secure_cookie = (bool) config_item('cookie_secure');
264
 
265
		if ($secure_cookie && ! is_https())
266
		{
267
			return FALSE;
268
		}
269
 
270
		setcookie(
271
			$this->_csrf_cookie_name,
272
			$this->_csrf_hash,
273
			$expire,
274
			config_item('cookie_path'),
275
			config_item('cookie_domain'),
276
			$secure_cookie,
277
			config_item('cookie_httponly')
278
		);
279
		log_message('info', 'CSRF cookie sent');
280
 
281
		return $this;
282
	}
283
 
284
	// --------------------------------------------------------------------
285
 
286
	/**
287
	 * Show CSRF Error
288
	 *
289
	 * @return	void
290
	 */
291
	public function csrf_show_error()
292
	{
293
		show_error('The action you have requested is not allowed.', 403);
294
	}
295
 
296
	// --------------------------------------------------------------------
297
 
298
	/**
299
	 * Get CSRF Hash
300
	 *
301
	 * @see		CI_Security::$_csrf_hash
302
	 * @return 	string	CSRF hash
303
	 */
304
	public function get_csrf_hash()
305
	{
306
		return $this->_csrf_hash;
307
	}
308
 
309
	// --------------------------------------------------------------------
310
 
311
	/**
312
	 * Get CSRF Token Name
313
	 *
314
	 * @see		CI_Security::$_csrf_token_name
315
	 * @return	string	CSRF token name
316
	 */
317
	public function get_csrf_token_name()
318
	{
319
		return $this->_csrf_token_name;
320
	}
321
 
322
	// --------------------------------------------------------------------
323
 
324
	/**
325
	 * XSS Clean
326
	 *
327
	 * Sanitizes data so that Cross Site Scripting Hacks can be
328
	 * prevented.  This method does a fair amount of work but
329
	 * it is extremely thorough, designed to prevent even the
330
	 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
331
	 * of course, but I haven't been able to get anything passed
332
	 * the filter.
333
	 *
334
	 * Note: Should only be used to deal with data upon submission.
335
	 *	 It's not something that should be used for general
336
	 *	 runtime processing.
337
	 *
338
	 * @link	http://channel.bitflux.ch/wiki/XSS_Prevention
339
	 * 		Based in part on some code and ideas from Bitflux.
340
	 *
341
	 * @link	http://ha.ckers.org/xss.html
342
	 * 		To help develop this script I used this great list of
343
	 *		vulnerabilities along with a few other hacks I've
344
	 *		harvested from examining vulnerabilities in other programs.
345
	 *
346
	 * @param	string|string[]	$str		Input data
347
	 * @param 	bool		$is_image	Whether the input is an image
348
	 * @return	string
349
	 */
350
	public function xss_clean($str, $is_image = FALSE)
351
	{
352
		// Is the string an array?
353
		if (is_array($str))
354
		{
355
			while (list($key) = each($str))
356
			{
357
				$str[$key] = $this->xss_clean($str[$key]);
358
			}
359
 
360
			return $str;
361
		}
362
 
363
		// Remove Invisible Characters
364
		$str = remove_invisible_characters($str);
365
 
366
		/*
367
		 * URL Decode
368
		 *
369
		 * Just in case stuff like this is submitted:
370
		 *
371
		 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
372
		 *
373
		 * Note: Use rawurldecode() so it does not remove plus signs
374
		 */
1257 lars 375
		if (stripos($str, '%') !== false)
68 lars 376
		{
1257 lars 377
			do
378
			{
379
				$oldstr = $str;
380
				$str = rawurldecode($str);
381
				$str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
382
			}
383
			while ($oldstr !== $str);
384
			unset($oldstr);
68 lars 385
		}
386
 
387
		/*
388
		 * Convert character entities to ASCII
389
		 *
390
		 * This permits our tests below to work reliably.
391
		 * We only convert entities that are within tags since
392
		 * these are the ones that will pose security problems.
393
		 */
394
		$str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
395
		$str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
396
 
397
		// Remove Invisible Characters Again!
398
		$str = remove_invisible_characters($str);
399
 
400
		/*
401
		 * Convert all tabs to spaces
402
		 *
403
		 * This prevents strings like this: ja	vascript
404
		 * NOTE: we deal with spaces between characters later.
405
		 * NOTE: preg_replace was found to be amazingly slow here on
406
		 * large blocks of data, so we use str_replace.
407
		 */
408
		$str = str_replace("\t", ' ', $str);
409
 
410
		// Capture converted string for later comparison
411
		$converted_string = $str;
412
 
413
		// Remove Strings that are never allowed
414
		$str = $this->_do_never_allowed($str);
415
 
416
		/*
417
		 * Makes PHP tags safe
418
		 *
419
		 * Note: XML tags are inadvertently replaced too:
420
		 *
421
		 * <?xml
422
		 *
423
		 * But it doesn't seem to pose a problem.
424
		 */
425
		if ($is_image === TRUE)
426
		{
427
			// Images have a tendency to have the PHP short opening and
428
			// closing tags every so often so we skip those and only
429
			// do the long opening tags.
430
			$str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
431
		}
432
		else
433
		{
434
			$str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
435
		}
436
 
437
		/*
438
		 * Compact any exploded words
439
		 *
440
		 * This corrects words like:  j a v a s c r i p t
441
		 * These words are compacted back to their correct state.
442
		 */
443
		$words = array(
444
			'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
445
			'vbs', 'script', 'base64', 'applet', 'alert', 'document',
446
			'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
447
		);
448
 
449
		foreach ($words as $word)
450
		{
451
			$word = implode('\s*', str_split($word)).'\s*';
452
 
453
			// We only want to do this when it is followed by a non-word character
454
			// That way valid stuff like "dealer to" does not become "dealerto"
455
			$str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
456
		}
457
 
458
		/*
459
		 * Remove disallowed Javascript in links or img tags
460
		 * We used to do some version comparisons and use of stripos(),
461
		 * but it is dog slow compared to these simplified non-capturing
462
		 * preg_match(), especially if the pattern exists in the string
463
		 *
464
		 * Note: It was reported that not only space characters, but all in
465
		 * the following pattern can be parsed as separators between a tag name
466
		 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
467
		 * ... however, remove_invisible_characters() above already strips the
468
		 * hex-encoded ones, so we'll skip them below.
469
		 */
470
		do
471
		{
472
			$original = $str;
473
 
474
			if (preg_match('/<a/i', $str))
475
			{
1257 lars 476
				$str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
68 lars 477
			}
478
 
479
			if (preg_match('/<img/i', $str))
480
			{
481
				$str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
482
			}
483
 
484
			if (preg_match('/script|xss/i', $str))
485
			{
486
				$str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
487
			}
488
		}
489
		while ($original !== $str);
490
		unset($original);
491
 
492
		/*
493
		 * Sanitize naughty HTML elements
494
		 *
495
		 * If a tag containing any of the words in the list
496
		 * below is found, the tag gets converted to entities.
497
		 *
498
		 * So this: <blink>
499
		 * Becomes: &lt;blink&gt;
500
		 */
501
		$pattern = '#'
502
			.'<((?<slash>/*\s*)(?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)' // tag start and name, followed by a non-tag character
503
			.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
504
			// optional attributes
505
			.'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
506
			.'[^\s\042\047>/=]+' // attribute characters
507
			// optional attribute-value
508
				.'(?:\s*=' // attribute-value separator
509
					.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
510
				.')?' // end optional attribute-value group
511
			.')*)' // end optional attributes group
512
			.'[^>]*)(?<closeTag>\>)?#isS';
513
 
514
		// Note: It would be nice to optimize this for speed, BUT
515
		//       only matching the naughty elements here results in
516
		//       false positives and in turn - vulnerabilities!
517
		do
518
		{
519
			$old_str = $str;
520
			$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
521
		}
522
		while ($old_str !== $str);
523
		unset($old_str);
524
 
525
		/*
526
		 * Sanitize naughty scripting elements
527
		 *
528
		 * Similar to above, only instead of looking for
529
		 * tags it looks for PHP and JavaScript commands
530
		 * that are disallowed. Rather than removing the
531
		 * code, it simply converts the parenthesis to entities
532
		 * rendering the code un-executable.
533
		 *
534
		 * For example:	eval('some code')
535
		 * Becomes:	eval&#40;'some code'&#41;
536
		 */
537
		$str = preg_replace(
538
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
539
			'\\1\\2&#40;\\3&#41;',
540
			$str
541
		);
542
 
543
		// Final clean up
544
		// This adds a bit of extra precaution in case
545
		// something got through the above filters
546
		$str = $this->_do_never_allowed($str);
547
 
548
		/*
549
		 * Images are Handled in a Special Way
550
		 * - Essentially, we want to know that after all of the character
551
		 * conversion is done whether any unwanted, likely XSS, code was found.
552
		 * If not, we return TRUE, as the image is clean.
553
		 * However, if the string post-conversion does not matched the
554
		 * string post-removal of XSS, then it fails, as there was unwanted XSS
555
		 * code found and removed/changed during processing.
556
		 */
557
		if ($is_image === TRUE)
558
		{
559
			return ($str === $converted_string);
560
		}
561
 
562
		return $str;
563
	}
564
 
565
	// --------------------------------------------------------------------
566
 
567
	/**
568
	 * XSS Hash
569
	 *
570
	 * Generates the XSS hash if needed and returns it.
571
	 *
572
	 * @see		CI_Security::$_xss_hash
573
	 * @return	string	XSS hash
574
	 */
575
	public function xss_hash()
576
	{
577
		if ($this->_xss_hash === NULL)
578
		{
579
			$rand = $this->get_random_bytes(16);
580
			$this->_xss_hash = ($rand === FALSE)
581
				? md5(uniqid(mt_rand(), TRUE))
582
				: bin2hex($rand);
583
		}
584
 
585
		return $this->_xss_hash;
586
	}
587
 
588
	// --------------------------------------------------------------------
589
 
590
	/**
591
	 * Get random bytes
592
	 *
593
	 * @param	int	$length	Output length
594
	 * @return	string
595
	 */
596
	public function get_random_bytes($length)
597
	{
598
		if (empty($length) OR ! ctype_digit((string) $length))
599
		{
600
			return FALSE;
601
		}
602
 
603
		if (function_exists('random_bytes'))
604
		{
605
			try
606
			{
607
				// The cast is required to avoid TypeError
608
				return random_bytes((int) $length);
609
			}
610
			catch (Exception $e)
611
			{
612
				// If random_bytes() can't do the job, we can't either ...
613
				// There's no point in using fallbacks.
614
				log_message('error', $e->getMessage());
615
				return FALSE;
616
			}
617
		}
618
 
619
		// Unfortunately, none of the following PRNGs is guaranteed to exist ...
620
		if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
621
		{
622
			return $output;
623
		}
624
 
625
 
626
		if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
627
		{
628
			// Try not to waste entropy ...
629
			is_php('5.4') && stream_set_chunk_size($fp, $length);
630
			$output = fread($fp, $length);
631
			fclose($fp);
632
			if ($output !== FALSE)
633
			{
634
				return $output;
635
			}
636
		}
637
 
638
		if (function_exists('openssl_random_pseudo_bytes'))
639
		{
640
			return openssl_random_pseudo_bytes($length);
641
		}
642
 
643
		return FALSE;
644
	}
645
 
646
	// --------------------------------------------------------------------
647
 
648
	/**
649
	 * HTML Entities Decode
650
	 *
651
	 * A replacement for html_entity_decode()
652
	 *
653
	 * The reason we are not using html_entity_decode() by itself is because
654
	 * while it is not technically correct to leave out the semicolon
655
	 * at the end of an entity most browsers will still interpret the entity
656
	 * correctly. html_entity_decode() does not convert entities without
657
	 * semicolons, so we are left with our own little solution here. Bummer.
658
	 *
659
	 * @link	http://php.net/html-entity-decode
660
	 *
661
	 * @param	string	$str		Input
662
	 * @param	string	$charset	Character set
663
	 * @return	string
664
	 */
665
	public function entity_decode($str, $charset = NULL)
666
	{
667
		if (strpos($str, '&') === FALSE)
668
		{
669
			return $str;
670
		}
671
 
672
		static $_entities;
673
 
674
		isset($charset) OR $charset = $this->charset;
675
		$flag = is_php('5.4')
676
			? ENT_COMPAT | ENT_HTML5
677
			: ENT_COMPAT;
678
 
1257 lars 679
		if ( ! isset($_entities))
680
		{
681
			$_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
682
 
683
			// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
684
			// entities to the array manually
685
			if ($flag === ENT_COMPAT)
686
			{
687
				$_entities[':'] = '&colon;';
688
				$_entities['('] = '&lpar;';
689
				$_entities[')'] = '&rpar;';
690
				$_entities["\n"] = '&NewLine;';
691
				$_entities["\t"] = '&Tab;';
692
			}
693
		}
694
 
68 lars 695
		do
696
		{
697
			$str_compare = $str;
698
 
699
			// Decode standard entities, avoiding false positives
700
			if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
701
			{
702
				$replace = array();
703
				$matches = array_unique(array_map('strtolower', $matches[0]));
704
				foreach ($matches as &$match)
705
				{
706
					if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
707
					{
708
						$replace[$match] = $char;
709
					}
710
				}
711
 
1257 lars 712
				$str = str_replace(array_keys($replace), array_values($replace), $str);
68 lars 713
			}
714
 
715
			// Decode numeric & UTF16 two byte entities
716
			$str = html_entity_decode(
717
				preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
718
				$flag,
719
				$charset
720
			);
1257 lars 721
 
722
			if ($flag === ENT_COMPAT)
723
			{
724
				$str = str_replace(array_values($_entities), array_keys($_entities), $str);
725
			}
68 lars 726
		}
727
		while ($str_compare !== $str);
728
		return $str;
729
	}
730
 
731
	// --------------------------------------------------------------------
732
 
733
	/**
734
	 * Sanitize Filename
735
	 *
736
	 * @param	string	$str		Input file name
737
	 * @param 	bool	$relative_path	Whether to preserve paths
738
	 * @return	string
739
	 */
740
	public function sanitize_filename($str, $relative_path = FALSE)
741
	{
742
		$bad = $this->filename_bad_chars;
743
 
744
		if ( ! $relative_path)
745
		{
746
			$bad[] = './';
747
			$bad[] = '/';
748
		}
749
 
750
		$str = remove_invisible_characters($str, FALSE);
751
 
752
		do
753
		{
754
			$old = $str;
755
			$str = str_replace($bad, '', $str);
756
		}
757
		while ($old !== $str);
758
 
759
		return stripslashes($str);
760
	}
761
 
762
	// ----------------------------------------------------------------
763
 
764
	/**
765
	 * Strip Image Tags
766
	 *
767
	 * @param	string	$str
768
	 * @return	string
769
	 */
770
	public function strip_image_tags($str)
771
	{
772
		return preg_replace(
773
			array(
774
				'#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
775
				'#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
776
			),
777
			'\\2',
778
			$str
779
		);
780
	}
781
 
782
	// ----------------------------------------------------------------
783
 
784
	/**
1257 lars 785
	 * URL-decode taking spaces into account
786
	 *
787
	 * @see		https://github.com/bcit-ci/CodeIgniter/issues/4877
788
	 * @param	array	$matches
789
	 * @return	string
790
	 */
791
	protected function _urldecodespaces($matches)
792
	{
793
		$input    = $matches[0];
794
		$nospaces = preg_replace('#\s+#', '', $input);
795
		return ($nospaces === $input)
796
			? $input
797
			: rawurldecode($nospaces);
798
	}
799
 
800
	// ----------------------------------------------------------------
801
 
802
	/**
68 lars 803
	 * Compact Exploded Words
804
	 *
805
	 * Callback method for xss_clean() to remove whitespace from
806
	 * things like 'j a v a s c r i p t'.
807
	 *
808
	 * @used-by	CI_Security::xss_clean()
809
	 * @param	array	$matches
810
	 * @return	string
811
	 */
812
	protected function _compact_exploded_words($matches)
813
	{
814
		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
815
	}
816
 
817
	// --------------------------------------------------------------------
818
 
819
	/**
820
	 * Sanitize Naughty HTML
821
	 *
822
	 * Callback method for xss_clean() to remove naughty HTML elements.
823
	 *
824
	 * @used-by	CI_Security::xss_clean()
825
	 * @param	array	$matches
826
	 * @return	string
827
	 */
828
	protected function _sanitize_naughty_html($matches)
829
	{
830
		static $naughty_tags    = array(
1257 lars 831
			'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
68 lars 832
			'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
833
			'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
834
			'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
835
		);
836
 
837
		static $evil_attributes = array(
838
			'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
839
		);
840
 
841
		// First, escape unclosed tags
842
		if (empty($matches['closeTag']))
843
		{
844
			return '&lt;'.$matches[1];
845
		}
846
		// Is the element that we caught naughty? If so, escape it
847
		elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
848
		{
849
			return '&lt;'.$matches[1].'&gt;';
850
		}
851
		// For other tags, see if their attributes are "evil" and strip those
852
		elseif (isset($matches['attributes']))
853
		{
854
			// We'll store the already fitlered attributes here
855
			$attributes = array();
856
 
857
			// Attribute-catching pattern
858
			$attributes_pattern = '#'
859
				.'(?<name>[^\s\042\047>/=]+)' // attribute characters
860
				// optional attribute-value
861
				.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
862
				.'#i';
863
 
864
			// Blacklist pattern for evil attribute names
865
			$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
866
 
867
			// Each iteration filters a single attribute
868
			do
869
			{
870
				// Strip any non-alpha characters that may preceed an attribute.
871
				// Browsers often parse these incorrectly and that has been a
872
				// of numerous XSS issues we've had.
873
				$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
874
 
875
				if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
876
				{
877
					// No (valid) attribute found? Discard everything else inside the tag
878
					break;
879
				}
880
 
881
				if (
882
					// Is it indeed an "evil" attribute?
883
					preg_match($is_evil_pattern, $attribute['name'][0])
884
					// Or does it have an equals sign, but no value and not quoted? Strip that too!
885
					OR (trim($attribute['value'][0]) === '')
886
				)
887
				{
888
					$attributes[] = 'xss=removed';
889
				}
890
				else
891
				{
892
					$attributes[] = $attribute[0][0];
893
				}
894
 
895
				$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
896
			}
897
			while ($matches['attributes'] !== '');
898
 
899
			$attributes = empty($attributes)
900
				? ''
901
				: ' '.implode(' ', $attributes);
902
			return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
903
		}
904
 
905
		return $matches[0];
906
	}
907
 
908
	// --------------------------------------------------------------------
909
 
910
	/**
911
	 * JS Link Removal
912
	 *
913
	 * Callback method for xss_clean() to sanitize links.
914
	 *
915
	 * This limits the PCRE backtracks, making it more performance friendly
916
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
917
	 * PHP 5.2+ on link-heavy strings.
918
	 *
919
	 * @used-by	CI_Security::xss_clean()
920
	 * @param	array	$match
921
	 * @return	string
922
	 */
923
	protected function _js_link_removal($match)
924
	{
925
		return str_replace(
926
			$match[1],
927
			preg_replace(
1257 lars 928
				'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
68 lars 929
				'',
930
				$this->_filter_attributes($match[1])
931
			),
932
			$match[0]
933
		);
934
	}
935
 
936
	// --------------------------------------------------------------------
937
 
938
	/**
939
	 * JS Image Removal
940
	 *
941
	 * Callback method for xss_clean() to sanitize image tags.
942
	 *
943
	 * This limits the PCRE backtracks, making it more performance friendly
944
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
945
	 * PHP 5.2+ on image tag heavy strings.
946
	 *
947
	 * @used-by	CI_Security::xss_clean()
948
	 * @param	array	$match
949
	 * @return	string
950
	 */
951
	protected function _js_img_removal($match)
952
	{
953
		return str_replace(
954
			$match[1],
955
			preg_replace(
956
				'#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
957
				'',
958
				$this->_filter_attributes($match[1])
959
			),
960
			$match[0]
961
		);
962
	}
963
 
964
	// --------------------------------------------------------------------
965
 
966
	/**
967
	 * Attribute Conversion
968
	 *
969
	 * @used-by	CI_Security::xss_clean()
970
	 * @param	array	$match
971
	 * @return	string
972
	 */
973
	protected function _convert_attribute($match)
974
	{
975
		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
976
	}
977
 
978
	// --------------------------------------------------------------------
979
 
980
	/**
981
	 * Filter Attributes
982
	 *
983
	 * Filters tag attributes for consistency and safety.
984
	 *
985
	 * @used-by	CI_Security::_js_img_removal()
986
	 * @used-by	CI_Security::_js_link_removal()
987
	 * @param	string	$str
988
	 * @return	string
989
	 */
990
	protected function _filter_attributes($str)
991
	{
992
		$out = '';
993
		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
994
		{
995
			foreach ($matches[0] as $match)
996
			{
997
				$out .= preg_replace('#/\*.*?\*/#s', '', $match);
998
			}
999
		}
1000
 
1001
		return $out;
1002
	}
1003
 
1004
	// --------------------------------------------------------------------
1005
 
1006
	/**
1007
	 * HTML Entity Decode Callback
1008
	 *
1009
	 * @used-by	CI_Security::xss_clean()
1010
	 * @param	array	$match
1011
	 * @return	string
1012
	 */
1013
	protected function _decode_entity($match)
1014
	{
1015
		// Protect GET variables in URLs
1016
		// 901119URL5918AMP18930PROTECT8198
1017
		$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
1018
 
1019
		// Decode, then un-protect URL GET vars
1020
		return str_replace(
1021
			$this->xss_hash(),
1022
			'&',
1023
			$this->entity_decode($match, $this->charset)
1024
		);
1025
	}
1026
 
1027
	// --------------------------------------------------------------------
1028
 
1029
	/**
1030
	 * Do Never Allowed
1031
	 *
1032
	 * @used-by	CI_Security::xss_clean()
1033
	 * @param 	string
1034
	 * @return 	string
1035
	 */
1036
	protected function _do_never_allowed($str)
1037
	{
1038
		$str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
1039
 
1040
		foreach ($this->_never_allowed_regex as $regex)
1041
		{
1042
			$str = preg_replace('#'.$regex.'#is', '[removed]', $str);
1043
		}
1044
 
1045
		return $str;
1046
	}
1047
 
1048
	// --------------------------------------------------------------------
1049
 
1050
	/**
1051
	 * Set CSRF Hash and Cookie
1052
	 *
1053
	 * @return	string
1054
	 */
1055
	protected function _csrf_set_hash()
1056
	{
1057
		if ($this->_csrf_hash === NULL)
1058
		{
1059
			// If the cookie exists we will use its value.
1060
			// We don't necessarily want to regenerate it with
1061
			// each page load since a page could contain embedded
1062
			// sub-pages causing this feature to fail
1063
			if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1064
				&& preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
1065
			{
1066
				return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1067
			}
1068
 
1069
			$rand = $this->get_random_bytes(16);
1070
			$this->_csrf_hash = ($rand === FALSE)
1071
				? md5(uniqid(mt_rand(), TRUE))
1072
				: bin2hex($rand);
1073
		}
1074
 
1075
		return $this->_csrf_hash;
1076
	}
1077
 
1078
}