Subversion-Projekte lars-tiefland.ci

Revision

Revision 1257 | Zur aktuellen Revision | Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
68 lars 1
<?php
2
/**
3
 * CodeIgniter
4
 *
5
 * An open source application development framework for PHP
6
 *
7
 * This content is released under the MIT License (MIT)
8
 *
9
 * Copyright (c) 2014 - 2016, British Columbia Institute of Technology
10
 *
11
 * Permission is hereby granted, free of charge, to any person obtaining a copy
12
 * of this software and associated documentation files (the "Software"), to deal
13
 * in the Software without restriction, including without limitation the rights
14
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
 * copies of the Software, and to permit persons to whom the Software is
16
 * furnished to do so, subject to the following conditions:
17
 *
18
 * The above copyright notice and this permission notice shall be included in
19
 * all copies or substantial portions of the Software.
20
 *
21
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
 * THE SOFTWARE.
28
 *
29
 * @package	CodeIgniter
30
 * @author	EllisLab Dev Team
31
 * @copyright	Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
32
 * @copyright	Copyright (c) 2014 - 2016, British Columbia Institute of Technology (http://bcit.ca/)
33
 * @license	http://opensource.org/licenses/MIT	MIT License
34
 * @link	https://codeigniter.com
35
 * @since	Version 1.0.0
36
 * @filesource
37
 */
38
defined('BASEPATH') OR exit('No direct script access allowed');
39
 
40
/**
41
 * Security Class
42
 *
43
 * @package		CodeIgniter
44
 * @subpackage	Libraries
45
 * @category	Security
46
 * @author		EllisLab Dev Team
47
 * @link		https://codeigniter.com/user_guide/libraries/security.html
48
 */
49
class CI_Security {
50
 
51
	/**
52
	 * List of sanitize filename strings
53
	 *
54
	 * @var	array
55
	 */
56
	public $filename_bad_chars =	array(
57
		'../', '<!--', '-->', '<', '>',
58
		"'", '"', '&', '$', '#',
59
		'{', '}', '[', ']', '=',
60
		';', '?', '%20', '%22',
61
		'%3c',		// <
62
		'%253c',	// <
63
		'%3e',		// >
64
		'%0e',		// >
65
		'%28',		// (
66
		'%29',		// )
67
		'%2528',	// (
68
		'%26',		// &
69
		'%24',		// $
70
		'%3f',		// ?
71
		'%3b',		// ;
72
		'%3d'		// =
73
	);
74
 
75
	/**
76
	 * Character set
77
	 *
78
	 * Will be overridden by the constructor.
79
	 *
80
	 * @var	string
81
	 */
82
	public $charset = 'UTF-8';
83
 
84
	/**
85
	 * XSS Hash
86
	 *
87
	 * Random Hash for protecting URLs.
88
	 *
89
	 * @var	string
90
	 */
91
	protected $_xss_hash;
92
 
93
	/**
94
	 * CSRF Hash
95
	 *
96
	 * Random hash for Cross Site Request Forgery protection cookie
97
	 *
98
	 * @var	string
99
	 */
100
	protected $_csrf_hash;
101
 
102
	/**
103
	 * CSRF Expire time
104
	 *
105
	 * Expiration time for Cross Site Request Forgery protection cookie.
106
	 * Defaults to two hours (in seconds).
107
	 *
108
	 * @var	int
109
	 */
110
	protected $_csrf_expire =	7200;
111
 
112
	/**
113
	 * CSRF Token name
114
	 *
115
	 * Token name for Cross Site Request Forgery protection cookie.
116
	 *
117
	 * @var	string
118
	 */
119
	protected $_csrf_token_name =	'ci_csrf_token';
120
 
121
	/**
122
	 * CSRF Cookie name
123
	 *
124
	 * Cookie name for Cross Site Request Forgery protection cookie.
125
	 *
126
	 * @var	string
127
	 */
128
	protected $_csrf_cookie_name =	'ci_csrf_token';
129
 
130
	/**
131
	 * List of never allowed strings
132
	 *
133
	 * @var	array
134
	 */
135
	protected $_never_allowed_str =	array(
136
		'document.cookie'	=> '[removed]',
137
		'document.write'	=> '[removed]',
138
		'.parentNode'		=> '[removed]',
139
		'.innerHTML'		=> '[removed]',
140
		'-moz-binding'		=> '[removed]',
141
		'<!--'				=> '&lt;!--',
142
		'-->'				=> '--&gt;',
143
		'<![CDATA['			=> '&lt;![CDATA[',
144
		'<comment>'			=> '&lt;comment&gt;'
145
	);
146
 
147
	/**
148
	 * List of never allowed regex replacements
149
	 *
150
	 * @var	array
151
	 */
152
	protected $_never_allowed_regex = array(
153
		'javascript\s*:',
154
		'(document|(document\.)?window)\.(location|on\w*)',
155
		'expression\s*(\(|&\#40;)', // CSS and IE
156
		'vbscript\s*:', // IE, surprise!
157
		'wscript\s*:', // IE
158
		'jscript\s*:', // IE
159
		'vbs\s*:', // IE
160
		'Redirect\s+30\d',
161
		"([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
162
	);
163
 
164
	/**
165
	 * Class constructor
166
	 *
167
	 * @return	void
168
	 */
169
	public function __construct()
170
	{
171
		// Is CSRF protection enabled?
172
		if (config_item('csrf_protection'))
173
		{
174
			// CSRF config
175
			foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
176
			{
177
				if (NULL !== ($val = config_item($key)))
178
				{
179
					$this->{'_'.$key} = $val;
180
				}
181
			}
182
 
183
			// Append application specific cookie prefix
184
			if ($cookie_prefix = config_item('cookie_prefix'))
185
			{
186
				$this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
187
			}
188
 
189
			// Set the CSRF hash
190
			$this->_csrf_set_hash();
191
		}
192
 
193
		$this->charset = strtoupper(config_item('charset'));
194
 
195
		log_message('info', 'Security Class Initialized');
196
	}
197
 
198
	// --------------------------------------------------------------------
199
 
200
	/**
201
	 * CSRF Verify
202
	 *
203
	 * @return	CI_Security
204
	 */
205
	public function csrf_verify()
206
	{
207
		// If it's not a POST request we will set the CSRF cookie
208
		if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
209
		{
210
			return $this->csrf_set_cookie();
211
		}
212
 
213
		// Check if URI has been whitelisted from CSRF checks
214
		if ($exclude_uris = config_item('csrf_exclude_uris'))
215
		{
216
			$uri = load_class('URI', 'core');
217
			foreach ($exclude_uris as $excluded)
218
			{
219
				if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
220
				{
221
					return $this;
222
				}
223
			}
224
		}
225
 
226
		// Do the tokens exist in both the _POST and _COOKIE arrays?
227
		if ( ! isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
228
			OR $_POST[$this->_csrf_token_name] !== $_COOKIE[$this->_csrf_cookie_name]) // Do the tokens match?
229
		{
230
			$this->csrf_show_error();
231
		}
232
 
233
		// We kill this since we're done and we don't want to polute the _POST array
234
		unset($_POST[$this->_csrf_token_name]);
235
 
236
		// Regenerate on every submission?
237
		if (config_item('csrf_regenerate'))
238
		{
239
			// Nothing should last forever
240
			unset($_COOKIE[$this->_csrf_cookie_name]);
241
			$this->_csrf_hash = NULL;
242
		}
243
 
244
		$this->_csrf_set_hash();
245
		$this->csrf_set_cookie();
246
 
247
		log_message('info', 'CSRF token verified');
248
		return $this;
249
	}
250
 
251
	// --------------------------------------------------------------------
252
 
253
	/**
254
	 * CSRF Set Cookie
255
	 *
256
	 * @codeCoverageIgnore
257
	 * @return	CI_Security
258
	 */
259
	public function csrf_set_cookie()
260
	{
261
		$expire = time() + $this->_csrf_expire;
262
		$secure_cookie = (bool) config_item('cookie_secure');
263
 
264
		if ($secure_cookie && ! is_https())
265
		{
266
			return FALSE;
267
		}
268
 
269
		setcookie(
270
			$this->_csrf_cookie_name,
271
			$this->_csrf_hash,
272
			$expire,
273
			config_item('cookie_path'),
274
			config_item('cookie_domain'),
275
			$secure_cookie,
276
			config_item('cookie_httponly')
277
		);
278
		log_message('info', 'CSRF cookie sent');
279
 
280
		return $this;
281
	}
282
 
283
	// --------------------------------------------------------------------
284
 
285
	/**
286
	 * Show CSRF Error
287
	 *
288
	 * @return	void
289
	 */
290
	public function csrf_show_error()
291
	{
292
		show_error('The action you have requested is not allowed.', 403);
293
	}
294
 
295
	// --------------------------------------------------------------------
296
 
297
	/**
298
	 * Get CSRF Hash
299
	 *
300
	 * @see		CI_Security::$_csrf_hash
301
	 * @return 	string	CSRF hash
302
	 */
303
	public function get_csrf_hash()
304
	{
305
		return $this->_csrf_hash;
306
	}
307
 
308
	// --------------------------------------------------------------------
309
 
310
	/**
311
	 * Get CSRF Token Name
312
	 *
313
	 * @see		CI_Security::$_csrf_token_name
314
	 * @return	string	CSRF token name
315
	 */
316
	public function get_csrf_token_name()
317
	{
318
		return $this->_csrf_token_name;
319
	}
320
 
321
	// --------------------------------------------------------------------
322
 
323
	/**
324
	 * XSS Clean
325
	 *
326
	 * Sanitizes data so that Cross Site Scripting Hacks can be
327
	 * prevented.  This method does a fair amount of work but
328
	 * it is extremely thorough, designed to prevent even the
329
	 * most obscure XSS attempts.  Nothing is ever 100% foolproof,
330
	 * of course, but I haven't been able to get anything passed
331
	 * the filter.
332
	 *
333
	 * Note: Should only be used to deal with data upon submission.
334
	 *	 It's not something that should be used for general
335
	 *	 runtime processing.
336
	 *
337
	 * @link	http://channel.bitflux.ch/wiki/XSS_Prevention
338
	 * 		Based in part on some code and ideas from Bitflux.
339
	 *
340
	 * @link	http://ha.ckers.org/xss.html
341
	 * 		To help develop this script I used this great list of
342
	 *		vulnerabilities along with a few other hacks I've
343
	 *		harvested from examining vulnerabilities in other programs.
344
	 *
345
	 * @param	string|string[]	$str		Input data
346
	 * @param 	bool		$is_image	Whether the input is an image
347
	 * @return	string
348
	 */
349
	public function xss_clean($str, $is_image = FALSE)
350
	{
351
		// Is the string an array?
352
		if (is_array($str))
353
		{
354
			while (list($key) = each($str))
355
			{
356
				$str[$key] = $this->xss_clean($str[$key]);
357
			}
358
 
359
			return $str;
360
		}
361
 
362
		// Remove Invisible Characters
363
		$str = remove_invisible_characters($str);
364
 
365
		/*
366
		 * URL Decode
367
		 *
368
		 * Just in case stuff like this is submitted:
369
		 *
370
		 * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
371
		 *
372
		 * Note: Use rawurldecode() so it does not remove plus signs
373
		 */
374
		do
375
		{
376
			$str = rawurldecode($str);
377
		}
378
		while (preg_match('/%[0-9a-f]{2,}/i', $str));
379
 
380
		/*
381
		 * Convert character entities to ASCII
382
		 *
383
		 * This permits our tests below to work reliably.
384
		 * We only convert entities that are within tags since
385
		 * these are the ones that will pose security problems.
386
		 */
387
		$str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
388
		$str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
389
 
390
		// Remove Invisible Characters Again!
391
		$str = remove_invisible_characters($str);
392
 
393
		/*
394
		 * Convert all tabs to spaces
395
		 *
396
		 * This prevents strings like this: ja	vascript
397
		 * NOTE: we deal with spaces between characters later.
398
		 * NOTE: preg_replace was found to be amazingly slow here on
399
		 * large blocks of data, so we use str_replace.
400
		 */
401
		$str = str_replace("\t", ' ', $str);
402
 
403
		// Capture converted string for later comparison
404
		$converted_string = $str;
405
 
406
		// Remove Strings that are never allowed
407
		$str = $this->_do_never_allowed($str);
408
 
409
		/*
410
		 * Makes PHP tags safe
411
		 *
412
		 * Note: XML tags are inadvertently replaced too:
413
		 *
414
		 * <?xml
415
		 *
416
		 * But it doesn't seem to pose a problem.
417
		 */
418
		if ($is_image === TRUE)
419
		{
420
			// Images have a tendency to have the PHP short opening and
421
			// closing tags every so often so we skip those and only
422
			// do the long opening tags.
423
			$str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
424
		}
425
		else
426
		{
427
			$str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
428
		}
429
 
430
		/*
431
		 * Compact any exploded words
432
		 *
433
		 * This corrects words like:  j a v a s c r i p t
434
		 * These words are compacted back to their correct state.
435
		 */
436
		$words = array(
437
			'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
438
			'vbs', 'script', 'base64', 'applet', 'alert', 'document',
439
			'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
440
		);
441
 
442
		foreach ($words as $word)
443
		{
444
			$word = implode('\s*', str_split($word)).'\s*';
445
 
446
			// We only want to do this when it is followed by a non-word character
447
			// That way valid stuff like "dealer to" does not become "dealerto"
448
			$str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
449
		}
450
 
451
		/*
452
		 * Remove disallowed Javascript in links or img tags
453
		 * We used to do some version comparisons and use of stripos(),
454
		 * but it is dog slow compared to these simplified non-capturing
455
		 * preg_match(), especially if the pattern exists in the string
456
		 *
457
		 * Note: It was reported that not only space characters, but all in
458
		 * the following pattern can be parsed as separators between a tag name
459
		 * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
460
		 * ... however, remove_invisible_characters() above already strips the
461
		 * hex-encoded ones, so we'll skip them below.
462
		 */
463
		do
464
		{
465
			$original = $str;
466
 
467
			if (preg_match('/<a/i', $str))
468
			{
469
				$str = preg_replace_callback('#<a[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
470
			}
471
 
472
			if (preg_match('/<img/i', $str))
473
			{
474
				$str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
475
			}
476
 
477
			if (preg_match('/script|xss/i', $str))
478
			{
479
				$str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
480
			}
481
		}
482
		while ($original !== $str);
483
		unset($original);
484
 
485
		/*
486
		 * Sanitize naughty HTML elements
487
		 *
488
		 * If a tag containing any of the words in the list
489
		 * below is found, the tag gets converted to entities.
490
		 *
491
		 * So this: <blink>
492
		 * Becomes: &lt;blink&gt;
493
		 */
494
		$pattern = '#'
495
			.'<((?<slash>/*\s*)(?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)' // tag start and name, followed by a non-tag character
496
			.'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
497
			// optional attributes
498
			.'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
499
			.'[^\s\042\047>/=]+' // attribute characters
500
			// optional attribute-value
501
				.'(?:\s*=' // attribute-value separator
502
					.'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
503
				.')?' // end optional attribute-value group
504
			.')*)' // end optional attributes group
505
			.'[^>]*)(?<closeTag>\>)?#isS';
506
 
507
		// Note: It would be nice to optimize this for speed, BUT
508
		//       only matching the naughty elements here results in
509
		//       false positives and in turn - vulnerabilities!
510
		do
511
		{
512
			$old_str = $str;
513
			$str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
514
		}
515
		while ($old_str !== $str);
516
		unset($old_str);
517
 
518
		/*
519
		 * Sanitize naughty scripting elements
520
		 *
521
		 * Similar to above, only instead of looking for
522
		 * tags it looks for PHP and JavaScript commands
523
		 * that are disallowed. Rather than removing the
524
		 * code, it simply converts the parenthesis to entities
525
		 * rendering the code un-executable.
526
		 *
527
		 * For example:	eval('some code')
528
		 * Becomes:	eval&#40;'some code'&#41;
529
		 */
530
		$str = preg_replace(
531
			'#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
532
			'\\1\\2&#40;\\3&#41;',
533
			$str
534
		);
535
 
536
		// Final clean up
537
		// This adds a bit of extra precaution in case
538
		// something got through the above filters
539
		$str = $this->_do_never_allowed($str);
540
 
541
		/*
542
		 * Images are Handled in a Special Way
543
		 * - Essentially, we want to know that after all of the character
544
		 * conversion is done whether any unwanted, likely XSS, code was found.
545
		 * If not, we return TRUE, as the image is clean.
546
		 * However, if the string post-conversion does not matched the
547
		 * string post-removal of XSS, then it fails, as there was unwanted XSS
548
		 * code found and removed/changed during processing.
549
		 */
550
		if ($is_image === TRUE)
551
		{
552
			return ($str === $converted_string);
553
		}
554
 
555
		return $str;
556
	}
557
 
558
	// --------------------------------------------------------------------
559
 
560
	/**
561
	 * XSS Hash
562
	 *
563
	 * Generates the XSS hash if needed and returns it.
564
	 *
565
	 * @see		CI_Security::$_xss_hash
566
	 * @return	string	XSS hash
567
	 */
568
	public function xss_hash()
569
	{
570
		if ($this->_xss_hash === NULL)
571
		{
572
			$rand = $this->get_random_bytes(16);
573
			$this->_xss_hash = ($rand === FALSE)
574
				? md5(uniqid(mt_rand(), TRUE))
575
				: bin2hex($rand);
576
		}
577
 
578
		return $this->_xss_hash;
579
	}
580
 
581
	// --------------------------------------------------------------------
582
 
583
	/**
584
	 * Get random bytes
585
	 *
586
	 * @param	int	$length	Output length
587
	 * @return	string
588
	 */
589
	public function get_random_bytes($length)
590
	{
591
		if (empty($length) OR ! ctype_digit((string) $length))
592
		{
593
			return FALSE;
594
		}
595
 
596
		if (function_exists('random_bytes'))
597
		{
598
			try
599
			{
600
				// The cast is required to avoid TypeError
601
				return random_bytes((int) $length);
602
			}
603
			catch (Exception $e)
604
			{
605
				// If random_bytes() can't do the job, we can't either ...
606
				// There's no point in using fallbacks.
607
				log_message('error', $e->getMessage());
608
				return FALSE;
609
			}
610
		}
611
 
612
		// Unfortunately, none of the following PRNGs is guaranteed to exist ...
613
		if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
614
		{
615
			return $output;
616
		}
617
 
618
 
619
		if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
620
		{
621
			// Try not to waste entropy ...
622
			is_php('5.4') && stream_set_chunk_size($fp, $length);
623
			$output = fread($fp, $length);
624
			fclose($fp);
625
			if ($output !== FALSE)
626
			{
627
				return $output;
628
			}
629
		}
630
 
631
		if (function_exists('openssl_random_pseudo_bytes'))
632
		{
633
			return openssl_random_pseudo_bytes($length);
634
		}
635
 
636
		return FALSE;
637
	}
638
 
639
	// --------------------------------------------------------------------
640
 
641
	/**
642
	 * HTML Entities Decode
643
	 *
644
	 * A replacement for html_entity_decode()
645
	 *
646
	 * The reason we are not using html_entity_decode() by itself is because
647
	 * while it is not technically correct to leave out the semicolon
648
	 * at the end of an entity most browsers will still interpret the entity
649
	 * correctly. html_entity_decode() does not convert entities without
650
	 * semicolons, so we are left with our own little solution here. Bummer.
651
	 *
652
	 * @link	http://php.net/html-entity-decode
653
	 *
654
	 * @param	string	$str		Input
655
	 * @param	string	$charset	Character set
656
	 * @return	string
657
	 */
658
	public function entity_decode($str, $charset = NULL)
659
	{
660
		if (strpos($str, '&') === FALSE)
661
		{
662
			return $str;
663
		}
664
 
665
		static $_entities;
666
 
667
		isset($charset) OR $charset = $this->charset;
668
		$flag = is_php('5.4')
669
			? ENT_COMPAT | ENT_HTML5
670
			: ENT_COMPAT;
671
 
672
		do
673
		{
674
			$str_compare = $str;
675
 
676
			// Decode standard entities, avoiding false positives
677
			if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
678
			{
679
				if ( ! isset($_entities))
680
				{
681
					$_entities = array_map(
682
						'strtolower',
683
						is_php('5.3.4')
684
							? get_html_translation_table(HTML_ENTITIES, $flag, $charset)
685
							: get_html_translation_table(HTML_ENTITIES, $flag)
686
					);
687
 
688
					// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
689
					// entities to the array manually
690
					if ($flag === ENT_COMPAT)
691
					{
692
						$_entities[':'] = '&colon;';
693
						$_entities['('] = '&lpar;';
694
						$_entities[')'] = '&rpar;';
695
						$_entities["\n"] = '&newline;';
696
						$_entities["\t"] = '&tab;';
697
					}
698
				}
699
 
700
				$replace = array();
701
				$matches = array_unique(array_map('strtolower', $matches[0]));
702
				foreach ($matches as &$match)
703
				{
704
					if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
705
					{
706
						$replace[$match] = $char;
707
					}
708
				}
709
 
710
				$str = str_ireplace(array_keys($replace), array_values($replace), $str);
711
			}
712
 
713
			// Decode numeric & UTF16 two byte entities
714
			$str = html_entity_decode(
715
				preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
716
				$flag,
717
				$charset
718
			);
719
		}
720
		while ($str_compare !== $str);
721
		return $str;
722
	}
723
 
724
	// --------------------------------------------------------------------
725
 
726
	/**
727
	 * Sanitize Filename
728
	 *
729
	 * @param	string	$str		Input file name
730
	 * @param 	bool	$relative_path	Whether to preserve paths
731
	 * @return	string
732
	 */
733
	public function sanitize_filename($str, $relative_path = FALSE)
734
	{
735
		$bad = $this->filename_bad_chars;
736
 
737
		if ( ! $relative_path)
738
		{
739
			$bad[] = './';
740
			$bad[] = '/';
741
		}
742
 
743
		$str = remove_invisible_characters($str, FALSE);
744
 
745
		do
746
		{
747
			$old = $str;
748
			$str = str_replace($bad, '', $str);
749
		}
750
		while ($old !== $str);
751
 
752
		return stripslashes($str);
753
	}
754
 
755
	// ----------------------------------------------------------------
756
 
757
	/**
758
	 * Strip Image Tags
759
	 *
760
	 * @param	string	$str
761
	 * @return	string
762
	 */
763
	public function strip_image_tags($str)
764
	{
765
		return preg_replace(
766
			array(
767
				'#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
768
				'#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
769
			),
770
			'\\2',
771
			$str
772
		);
773
	}
774
 
775
	// ----------------------------------------------------------------
776
 
777
	/**
778
	 * Compact Exploded Words
779
	 *
780
	 * Callback method for xss_clean() to remove whitespace from
781
	 * things like 'j a v a s c r i p t'.
782
	 *
783
	 * @used-by	CI_Security::xss_clean()
784
	 * @param	array	$matches
785
	 * @return	string
786
	 */
787
	protected function _compact_exploded_words($matches)
788
	{
789
		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
790
	}
791
 
792
	// --------------------------------------------------------------------
793
 
794
	/**
795
	 * Sanitize Naughty HTML
796
	 *
797
	 * Callback method for xss_clean() to remove naughty HTML elements.
798
	 *
799
	 * @used-by	CI_Security::xss_clean()
800
	 * @param	array	$matches
801
	 * @return	string
802
	 */
803
	protected function _sanitize_naughty_html($matches)
804
	{
805
		static $naughty_tags    = array(
806
			'alert', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
807
			'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
808
			'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
809
			'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
810
		);
811
 
812
		static $evil_attributes = array(
813
			'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
814
		);
815
 
816
		// First, escape unclosed tags
817
		if (empty($matches['closeTag']))
818
		{
819
			return '&lt;'.$matches[1];
820
		}
821
		// Is the element that we caught naughty? If so, escape it
822
		elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
823
		{
824
			return '&lt;'.$matches[1].'&gt;';
825
		}
826
		// For other tags, see if their attributes are "evil" and strip those
827
		elseif (isset($matches['attributes']))
828
		{
829
			// We'll store the already fitlered attributes here
830
			$attributes = array();
831
 
832
			// Attribute-catching pattern
833
			$attributes_pattern = '#'
834
				.'(?<name>[^\s\042\047>/=]+)' // attribute characters
835
				// optional attribute-value
836
				.'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
837
				.'#i';
838
 
839
			// Blacklist pattern for evil attribute names
840
			$is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
841
 
842
			// Each iteration filters a single attribute
843
			do
844
			{
845
				// Strip any non-alpha characters that may preceed an attribute.
846
				// Browsers often parse these incorrectly and that has been a
847
				// of numerous XSS issues we've had.
848
				$matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
849
 
850
				if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
851
				{
852
					// No (valid) attribute found? Discard everything else inside the tag
853
					break;
854
				}
855
 
856
				if (
857
					// Is it indeed an "evil" attribute?
858
					preg_match($is_evil_pattern, $attribute['name'][0])
859
					// Or does it have an equals sign, but no value and not quoted? Strip that too!
860
					OR (trim($attribute['value'][0]) === '')
861
				)
862
				{
863
					$attributes[] = 'xss=removed';
864
				}
865
				else
866
				{
867
					$attributes[] = $attribute[0][0];
868
				}
869
 
870
				$matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
871
			}
872
			while ($matches['attributes'] !== '');
873
 
874
			$attributes = empty($attributes)
875
				? ''
876
				: ' '.implode(' ', $attributes);
877
			return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
878
		}
879
 
880
		return $matches[0];
881
	}
882
 
883
	// --------------------------------------------------------------------
884
 
885
	/**
886
	 * JS Link Removal
887
	 *
888
	 * Callback method for xss_clean() to sanitize links.
889
	 *
890
	 * This limits the PCRE backtracks, making it more performance friendly
891
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
892
	 * PHP 5.2+ on link-heavy strings.
893
	 *
894
	 * @used-by	CI_Security::xss_clean()
895
	 * @param	array	$match
896
	 * @return	string
897
	 */
898
	protected function _js_link_removal($match)
899
	{
900
		return str_replace(
901
			$match[1],
902
			preg_replace(
903
				'#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
904
				'',
905
				$this->_filter_attributes($match[1])
906
			),
907
			$match[0]
908
		);
909
	}
910
 
911
	// --------------------------------------------------------------------
912
 
913
	/**
914
	 * JS Image Removal
915
	 *
916
	 * Callback method for xss_clean() to sanitize image tags.
917
	 *
918
	 * This limits the PCRE backtracks, making it more performance friendly
919
	 * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
920
	 * PHP 5.2+ on image tag heavy strings.
921
	 *
922
	 * @used-by	CI_Security::xss_clean()
923
	 * @param	array	$match
924
	 * @return	string
925
	 */
926
	protected function _js_img_removal($match)
927
	{
928
		return str_replace(
929
			$match[1],
930
			preg_replace(
931
				'#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
932
				'',
933
				$this->_filter_attributes($match[1])
934
			),
935
			$match[0]
936
		);
937
	}
938
 
939
	// --------------------------------------------------------------------
940
 
941
	/**
942
	 * Attribute Conversion
943
	 *
944
	 * @used-by	CI_Security::xss_clean()
945
	 * @param	array	$match
946
	 * @return	string
947
	 */
948
	protected function _convert_attribute($match)
949
	{
950
		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
951
	}
952
 
953
	// --------------------------------------------------------------------
954
 
955
	/**
956
	 * Filter Attributes
957
	 *
958
	 * Filters tag attributes for consistency and safety.
959
	 *
960
	 * @used-by	CI_Security::_js_img_removal()
961
	 * @used-by	CI_Security::_js_link_removal()
962
	 * @param	string	$str
963
	 * @return	string
964
	 */
965
	protected function _filter_attributes($str)
966
	{
967
		$out = '';
968
		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
969
		{
970
			foreach ($matches[0] as $match)
971
			{
972
				$out .= preg_replace('#/\*.*?\*/#s', '', $match);
973
			}
974
		}
975
 
976
		return $out;
977
	}
978
 
979
	// --------------------------------------------------------------------
980
 
981
	/**
982
	 * HTML Entity Decode Callback
983
	 *
984
	 * @used-by	CI_Security::xss_clean()
985
	 * @param	array	$match
986
	 * @return	string
987
	 */
988
	protected function _decode_entity($match)
989
	{
990
		// Protect GET variables in URLs
991
		// 901119URL5918AMP18930PROTECT8198
992
		$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
993
 
994
		// Decode, then un-protect URL GET vars
995
		return str_replace(
996
			$this->xss_hash(),
997
			'&',
998
			$this->entity_decode($match, $this->charset)
999
		);
1000
	}
1001
 
1002
	// --------------------------------------------------------------------
1003
 
1004
	/**
1005
	 * Do Never Allowed
1006
	 *
1007
	 * @used-by	CI_Security::xss_clean()
1008
	 * @param 	string
1009
	 * @return 	string
1010
	 */
1011
	protected function _do_never_allowed($str)
1012
	{
1013
		$str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
1014
 
1015
		foreach ($this->_never_allowed_regex as $regex)
1016
		{
1017
			$str = preg_replace('#'.$regex.'#is', '[removed]', $str);
1018
		}
1019
 
1020
		return $str;
1021
	}
1022
 
1023
	// --------------------------------------------------------------------
1024
 
1025
	/**
1026
	 * Set CSRF Hash and Cookie
1027
	 *
1028
	 * @return	string
1029
	 */
1030
	protected function _csrf_set_hash()
1031
	{
1032
		if ($this->_csrf_hash === NULL)
1033
		{
1034
			// If the cookie exists we will use its value.
1035
			// We don't necessarily want to regenerate it with
1036
			// each page load since a page could contain embedded
1037
			// sub-pages causing this feature to fail
1038
			if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
1039
				&& preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
1040
			{
1041
				return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
1042
			}
1043
 
1044
			$rand = $this->get_random_bytes(16);
1045
			$this->_csrf_hash = ($rand === FALSE)
1046
				? md5(uniqid(mt_rand(), TRUE))
1047
				: bin2hex($rand);
1048
		}
1049
 
1050
		return $this->_csrf_hash;
1051
	}
1052
 
1053
}