Subversion-Projekte lars-tiefland.codeigniter

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php  if ( ! defined('BASEPATH')) exit('No direct script access allowed');
2
/**
3
 * CodeIgniter
4
 *
5
 * An open source application development framework for PHP 4.3.2 or newer
6
 *
7
 * @package		CodeIgniter
8
 * @author		ExpressionEngine Dev Team
9
 * @copyright	Copyright (c) 2008, EllisLab, Inc.
10
 * @license		http://codeigniter.com/user_guide/license.html
11
 * @link		http://codeigniter.com
12
 * @since		Version 1.0
13
 * @filesource
14
 */
15
 
16
// ------------------------------------------------------------------------
17
 
18
/**
19
 * Input Class
20
 *
21
 * Pre-processes global input data for security
22
 *
23
 * @package		CodeIgniter
24
 * @subpackage	Libraries
25
 * @category	Input
26
 * @author		ExpressionEngine Dev Team
27
 * @link		http://codeigniter.com/user_guide/libraries/input.html
28
 */
29
class CI_Input {
30
	var $use_xss_clean		= FALSE;
31
	var $xss_hash			= '';
32
	var $ip_address			= FALSE;
33
	var $user_agent			= FALSE;
34
	var $allow_get_array	= FALSE;
35
 
36
	/* never allowed, string replacement */
37
	var $never_allowed_str = array(
38
									'document.cookie'	=> '[removed]',
39
									'document.write'	=> '[removed]',
40
									'.parentNode'		=> '[removed]',
41
									'.innerHTML'		=> '[removed]',
42
									'window.location'	=> '[removed]',
43
									'-moz-binding'		=> '[removed]',
44
									'<!--'				=> '&lt;!--',
45
									'-->'				=> '--&gt;',
46
									'<![CDATA['			=> '&lt;![CDATA['
47
									);
48
	/* never allowed, regex replacement */
49
	var $never_allowed_regex = array(
50
										"javascript\s*:"			=> '[removed]',
51
										"expression\s*(\(|&\#40;)"	=> '[removed]', // CSS and IE
52
										"vbscript\s*:"				=> '[removed]', // IE, surprise!
53
										"Redirect\s+302"			=> '[removed]'
54
									);
55
 
56
	/**
57
	* Constructor
58
	*
59
	* Sets whether to globally enable the XSS processing
60
	* and whether to allow the $_GET array
61
	*
62
	* @access	public
63
	*/
64
	function CI_Input()
65
	{
66
		log_message('debug', "Input Class Initialized");
67
 
68
		$CFG =& load_class('Config');
69
		$this->use_xss_clean	= ($CFG->item('global_xss_filtering') === TRUE) ? TRUE : FALSE;
70
		$this->allow_get_array	= ($CFG->item('enable_query_strings') === TRUE) ? TRUE : FALSE;
71
		$this->_sanitize_globals();
72
	}
73
 
74
	// --------------------------------------------------------------------
75
 
76
	/**
77
	* Sanitize Globals
78
	*
79
	* This function does the following:
80
	*
81
	* Unsets $_GET data (if query strings are not enabled)
82
	*
83
	* Unsets all globals if register_globals is enabled
84
	*
85
	* Standardizes newline characters to \n
86
	*
87
	* @access	private
88
	* @return	void
89
	*/
90
	function _sanitize_globals()
91
	{
92
		// Would kind of be "wrong" to unset any of these GLOBALS
93
		$protected = array('_SERVER', '_GET', '_POST', '_FILES', '_REQUEST', '_SESSION', '_ENV', 'GLOBALS', 'HTTP_RAW_POST_DATA',
94
							'system_folder', 'application_folder', 'BM', 'EXT', 'CFG', 'URI', 'RTR', 'OUT', 'IN');
95
 
96
		// Unset globals for security.
97
		// This is effectively the same as register_globals = off
98
		foreach (array($_GET, $_POST, $_COOKIE, $_SERVER, $_FILES, $_ENV, (isset($_SESSION) && is_array($_SESSION)) ? $_SESSION : array()) as $global)
99
		{
100
			if ( ! is_array($global))
101
			{
102
				if ( ! in_array($global, $protected))
103
				{
104
					unset($GLOBALS[$global]);
105
				}
106
			}
107
			else
108
			{
109
				foreach ($global as $key => $val)
110
				{
111
					if ( ! in_array($key, $protected))
112
					{
113
						unset($GLOBALS[$key]);
114
					}
115
 
116
					if (is_array($val))
117
					{
118
						foreach($val as $k => $v)
119
						{
120
							if ( ! in_array($k, $protected))
121
							{
122
								unset($GLOBALS[$k]);
123
							}
124
						}
125
					}
126
				}
127
			}
128
		}
129
 
130
		// Is $_GET data allowed? If not we'll set the $_GET to an empty array
131
		if ($this->allow_get_array == FALSE)
132
		{
133
			$_GET = array();
134
		}
135
		else
136
		{
137
			$_GET = $this->_clean_input_data($_GET);
138
		}
139
 
140
		// Clean $_POST Data
141
		$_POST = $this->_clean_input_data($_POST);
142
 
143
		// Clean $_COOKIE Data
144
		// Also get rid of specially treated cookies that might be set by a server
145
		// or silly application, that are of no use to a CI application anyway
146
		// but that when present will trip our 'Disallowed Key Characters' alarm
147
		// http://www.ietf.org/rfc/rfc2109.txt
148
		// note that the key names below are single quoted strings, and are not PHP variables
149
		unset($_COOKIE['$Version']);
150
		unset($_COOKIE['$Path']);
151
		unset($_COOKIE['$Domain']);
152
		$_COOKIE = $this->_clean_input_data($_COOKIE);
153
 
154
		log_message('debug', "Global POST and COOKIE data sanitized");
155
	}
156
 
157
	// --------------------------------------------------------------------
158
 
159
	/**
160
	* Clean Input Data
161
	*
162
	* This is a helper function. It escapes data and
163
	* standardizes newline characters to \n
164
	*
165
	* @access	private
166
	* @param	string
167
	* @return	string
168
	*/
169
	function _clean_input_data($str)
170
	{
171
		if (is_array($str))
172
		{
173
			$new_array = array();
174
			foreach ($str as $key => $val)
175
			{
176
				$new_array[$this->_clean_input_keys($key)] = $this->_clean_input_data($val);
177
			}
178
			return $new_array;
179
		}
180
 
181
		// We strip slashes if magic quotes is on to keep things consistent
182
		if (get_magic_quotes_gpc())
183
		{
184
			$str = stripslashes($str);
185
		}
186
 
187
		// Should we filter the input data?
188
		if ($this->use_xss_clean === TRUE)
189
		{
190
			$str = $this->xss_clean($str);
191
		}
192
 
193
		// Standardize newlines
194
		if (strpos($str, "\r") !== FALSE)
195
		{
196
			$str = str_replace(array("\r\n", "\r"), "\n", $str);
197
		}
198
 
199
		return $str;
200
	}
201
 
202
	// --------------------------------------------------------------------
203
 
204
	/**
205
	* Clean Keys
206
	*
207
	* This is a helper function. To prevent malicious users
208
	* from trying to exploit keys we make sure that keys are
209
	* only named with alpha-numeric text and a few other items.
210
	*
211
	* @access	private
212
	* @param	string
213
	* @return	string
214
	*/
215
	function _clean_input_keys($str)
216
	{
217
		if ( ! preg_match("/^[a-z0-9:_\/-]+$/i", $str))
218
		{
219
			exit('Disallowed Key Characters.');
220
		}
221
 
222
		return $str;
223
	}
224
 
225
	// --------------------------------------------------------------------
226
 
227
	/**
228
	* Fetch from array
229
	*
230
	* This is a helper function to retrieve values from global arrays
231
	*
232
	* @access	private
233
	* @param	array
234
	* @param	string
235
	* @param	bool
236
	* @return	string
237
	*/
238
	function _fetch_from_array(&$array, $index = '', $xss_clean = FALSE)
239
	{
240
		if ( ! isset($array[$index]))
241
		{
242
			return FALSE;
243
		}
244
 
245
		if ($xss_clean === TRUE)
246
		{
247
			return $this->xss_clean($array[$index]);
248
		}
249
 
250
		return $array[$index];
251
	}
252
 
253
	// --------------------------------------------------------------------
254
 
255
	/**
256
	* Fetch an item from the GET array
257
	*
258
	* @access	public
259
	* @param	string
260
	* @param	bool
261
	* @return	string
262
	*/
263
	function get($index = '', $xss_clean = FALSE)
264
	{
265
		return $this->_fetch_from_array($_GET, $index, $xss_clean);
266
	}
267
 
268
	// --------------------------------------------------------------------
269
 
270
	/**
271
	* Fetch an item from the POST array
272
	*
273
	* @access	public
274
	* @param	string
275
	* @param	bool
276
	* @return	string
277
	*/
278
	function post($index = '', $xss_clean = FALSE)
279
	{
280
		return $this->_fetch_from_array($_POST, $index, $xss_clean);
281
	}
282
 
283
	// --------------------------------------------------------------------
284
 
285
	/**
286
	* Fetch an item from either the GET array or the POST
287
	*
288
	* @access	public
289
	* @param	string	The index key
290
	* @param	bool	XSS cleaning
291
	* @return	string
292
	*/
293
	function get_post($index = '', $xss_clean = FALSE)
294
	{
295
		if ( ! isset($_POST[$index]) )
296
		{
297
			return $this->get($index, $xss_clean);
298
		}
299
		else
300
		{
301
			return $this->post($index, $xss_clean);
302
		}
303
	}
304
 
305
	// --------------------------------------------------------------------
306
 
307
	/**
308
	* Fetch an item from the COOKIE array
309
	*
310
	* @access	public
311
	* @param	string
312
	* @param	bool
313
	* @return	string
314
	*/
315
	function cookie($index = '', $xss_clean = FALSE)
316
	{
317
		return $this->_fetch_from_array($_COOKIE, $index, $xss_clean);
318
	}
319
 
320
	// --------------------------------------------------------------------
321
 
322
	/**
323
	* Fetch an item from the SERVER array
324
	*
325
	* @access	public
326
	* @param	string
327
	* @param	bool
328
	* @return	string
329
	*/
330
	function server($index = '', $xss_clean = FALSE)
331
	{
332
		return $this->_fetch_from_array($_SERVER, $index, $xss_clean);
333
	}
334
 
335
	// --------------------------------------------------------------------
336
 
337
	/**
338
	* Fetch the IP Address
339
	*
340
	* @access	public
341
	* @return	string
342
	*/
343
	function ip_address()
344
	{
345
		if ($this->ip_address !== FALSE)
346
		{
347
			return $this->ip_address;
348
		}
349
 
350
		if (config_item('proxy_ips') != '' && $this->server('HTTP_X_FORWARDED_FOR') && $this->server('REMOTE_ADDR'))
351
		{
352
			$proxies = preg_split('/[\s,]/', config_item('proxy_ips'), -1, PREG_SPLIT_NO_EMPTY);
353
			$proxies = is_array($proxies) ? $proxies : array($proxies);
354
 
355
			$this->ip_address = in_array($_SERVER['REMOTE_ADDR'], $proxies) ? $_SERVER['HTTP_X_FORWARDED_FOR'] : $_SERVER['REMOTE_ADDR'];
356
		}
357
		elseif ($this->server('REMOTE_ADDR') AND $this->server('HTTP_CLIENT_IP'))
358
		{
359
			$this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
360
		}
361
		elseif ($this->server('REMOTE_ADDR'))
362
		{
363
			$this->ip_address = $_SERVER['REMOTE_ADDR'];
364
		}
365
		elseif ($this->server('HTTP_CLIENT_IP'))
366
		{
367
			$this->ip_address = $_SERVER['HTTP_CLIENT_IP'];
368
		}
369
		elseif ($this->server('HTTP_X_FORWARDED_FOR'))
370
		{
371
			$this->ip_address = $_SERVER['HTTP_X_FORWARDED_FOR'];
372
		}
373
 
374
		if ($this->ip_address === FALSE)
375
		{
376
			$this->ip_address = '0.0.0.0';
377
			return $this->ip_address;
378
		}
379
 
380
		if (strstr($this->ip_address, ','))
381
		{
382
			$x = explode(',', $this->ip_address);
383
			$this->ip_address = trim(end($x));
384
		}
385
 
386
		if ( ! $this->valid_ip($this->ip_address))
387
		{
388
			$this->ip_address = '0.0.0.0';
389
		}
390
 
391
		return $this->ip_address;
392
	}
393
 
394
	// --------------------------------------------------------------------
395
 
396
	/**
397
	* Validate IP Address
398
	*
399
	* Updated version suggested by Geert De Deckere
400
	*
401
	* @access	public
402
	* @param	string
403
	* @return	string
404
	*/
405
	function valid_ip($ip)
406
	{
407
		$ip_segments = explode('.', $ip);
408
 
409
		// Always 4 segments needed
410
		if (count($ip_segments) != 4)
411
		{
412
			return FALSE;
413
		}
414
		// IP can not start with 0
415
		if ($ip_segments[0][0] == '0')
416
		{
417
			return FALSE;
418
		}
419
		// Check each segment
420
		foreach ($ip_segments as $segment)
421
		{
422
			// IP segments must be digits and can not be
423
			// longer than 3 digits or greater then 255
424
			if ($segment == '' OR preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
425
			{
426
				return FALSE;
427
			}
428
		}
429
 
430
		return TRUE;
431
	}
432
 
433
	// --------------------------------------------------------------------
434
 
435
	/**
436
	* User Agent
437
	*
438
	* @access	public
439
	* @return	string
440
	*/
441
	function user_agent()
442
	{
443
		if ($this->user_agent !== FALSE)
444
		{
445
			return $this->user_agent;
446
		}
447
 
448
		$this->user_agent = ( ! isset($_SERVER['HTTP_USER_AGENT'])) ? FALSE : $_SERVER['HTTP_USER_AGENT'];
449
 
450
		return $this->user_agent;
451
	}
452
 
453
	// --------------------------------------------------------------------
454
 
455
	/**
456
	* Filename Security
457
	*
458
	* @access	public
459
	* @param	string
460
	* @return	string
461
	*/
462
	function filename_security($str)
463
	{
464
		$bad = array(
465
						"../",
466
						"./",
467
						"<!--",
468
						"-->",
469
						"<",
470
						">",
471
						"'",
472
						'"',
473
						'&',
474
						'$',
475
						'#',
476
						'{',
477
						'}',
478
						'[',
479
						']',
480
						'=',
481
						';',
482
						'?',
483
						"%20",
484
						"%22",
485
						"%3c",		// <
486
						"%253c", 	// <
487
						"%3e", 		// >
488
						"%0e", 		// >
489
						"%28", 		// (
490
						"%29", 		// )
491
						"%2528", 	// (
492
						"%26", 		// &
493
						"%24", 		// $
494
						"%3f", 		// ?
495
						"%3b", 		// ;
496
						"%3d"		// =
497
					);
498
 
499
		return stripslashes(str_replace($bad, '', $str));
500
	}
501
 
502
	// --------------------------------------------------------------------
503
 
504
	/**
505
	* XSS Clean
506
	*
507
	* Sanitizes data so that Cross Site Scripting Hacks can be
508
	* prevented.  This function does a fair amount of work but
509
	* it is extremely thorough, designed to prevent even the
510
	* most obscure XSS attempts.  Nothing is ever 100% foolproof,
511
	* of course, but I haven't been able to get anything passed
512
	* the filter.
513
	*
514
	* Note: This function should only be used to deal with data
515
	* upon submission.  It's not something that should
516
	* be used for general runtime processing.
517
	*
518
	* This function was based in part on some code and ideas I
519
	* got from Bitflux: http://blog.bitflux.ch/wiki/XSS_Prevention
520
	*
521
	* To help develop this script I used this great list of
522
	* vulnerabilities along with a few other hacks I've
523
	* harvested from examining vulnerabilities in other programs:
524
	* http://ha.ckers.org/xss.html
525
	*
526
	* @access	public
527
	* @param	string
528
	* @return	string
529
	*/
530
	function xss_clean($str, $is_image = FALSE)
531
	{
532
		/*
533
		* Is the string an array?
534
		*
535
		*/
536
		if (is_array($str))
537
		{
538
			while (list($key) = each($str))
539
			{
540
				$str[$key] = $this->xss_clean($str[$key]);
541
			}
542
 
543
			return $str;
544
		}
545
 
546
		/*
547
		* Remove Invisible Characters
548
		*/
549
		$str = $this->_remove_invisible_characters($str);
550
 
551
		/*
552
		* Protect GET variables in URLs
553
		*/
554
 
555
		// 901119URL5918AMP18930PROTECT8198
556
 
557
		$str = preg_replace('|\&([a-z\_0-9]+)\=([a-z\_0-9]+)|i', $this->xss_hash()."\\1=\\2", $str);
558
 
559
		/*
560
		* Validate standard character entities
561
		*
562
		* Add a semicolon if missing.  We do this to enable
563
		* the conversion of entities to ASCII later.
564
		*
565
		*/
566
		$str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
567
 
568
		/*
569
		* Validate UTF16 two byte encoding (x00)
570
		*
571
		* Just as above, adds a semicolon if missing.
572
		*
573
		*/
574
		$str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
575
 
576
		/*
577
		* Un-Protect GET variables in URLs
578
		*/
579
		$str = str_replace($this->xss_hash(), '&', $str);
580
 
581
		/*
582
		* URL Decode
583
		*
584
		* Just in case stuff like this is submitted:
585
		*
586
		* <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
587
		*
588
		* Note: Use rawurldecode() so it does not remove plus signs
589
		*
590
		*/
591
		$str = rawurldecode($str);
592
 
593
		/*
594
		* Convert character entities to ASCII
595
		*
596
		* This permits our tests below to work reliably.
597
		* We only convert entities that are within tags since
598
		* these are the ones that will pose security problems.
599
		*
600
		*/
601
 
602
		$str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
603
 
604
		$str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_html_entity_decode_callback'), $str);
605
 
606
		/*
607
		* Remove Invisible Characters Again!
608
		*/
609
		$str = $this->_remove_invisible_characters($str);
610
 
611
		/*
612
		* Convert all tabs to spaces
613
		*
614
		* This prevents strings like this: ja	vascript
615
		* NOTE: we deal with spaces between characters later.
616
		* NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
617
		* so we use str_replace.
618
		*
619
		*/
620
 
621
 		if (strpos($str, "\t") !== FALSE)
622
		{
623
			$str = str_replace("\t", ' ', $str);
624
		}
625
 
626
		/*
627
		* Capture converted string for later comparison
628
		*/
629
		$converted_string = $str;
630
 
631
		/*
632
		* Not Allowed Under Any Conditions
633
		*/
634
 
635
		foreach ($this->never_allowed_str as $key => $val)
636
		{
637
			$str = str_replace($key, $val, $str);
638
		}
639
 
640
		foreach ($this->never_allowed_regex as $key => $val)
641
		{
642
			$str = preg_replace("#".$key."#i", $val, $str);
643
		}
644
 
645
		/*
646
		* Makes PHP tags safe
647
		*
648
		*  Note: XML tags are inadvertently replaced too:
649
		*
650
		*	<?xml
651
		*
652
		* But it doesn't seem to pose a problem.
653
		*
654
		*/
655
		if ($is_image === TRUE)
656
		{
657
			// Images have a tendency to have the PHP short opening and closing tags every so often
658
			// so we skip those and only do the long opening tags.
659
			$str = str_replace(array('<?php', '<?PHP'),  array('&lt;?php', '&lt;?PHP'), $str);
660
		}
661
		else
662
		{
663
			$str = str_replace(array('<?php', '<?PHP', '<?', '?'.'>'),  array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'), $str);
664
		}
665
 
666
		/*
667
		* Compact any exploded words
668
		*
669
		* This corrects words like:  j a v a s c r i p t
670
		* These words are compacted back to their correct state.
671
		*
672
		*/
673
		$words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
674
		foreach ($words as $word)
675
		{
676
			$temp = '';
677
 
678
			for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
679
			{
680
				$temp .= substr($word, $i, 1)."\s*";
681
			}
682
 
683
			// We only want to do this when it is followed by a non-word character
684
			// That way valid stuff like "dealer to" does not become "dealerto"
685
			$str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
686
		}
687
 
688
		/*
689
		* Remove disallowed Javascript in links or img tags
690
		* We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
691
		* to these simplified non-capturing preg_match(), especially if the pattern exists in the string
692
		*/
693
		do
694
		{
695
			$original = $str;
696
 
697
			if (preg_match("/<a/i", $str))
698
			{
699
				$str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
700
			}
701
 
702
			if (preg_match("/<img/i", $str))
703
			{
704
				$str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
705
			}
706
 
707
			if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
708
			{
709
				$str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
710
			}
711
		}
712
		while($original != $str);
713
 
714
		unset($original);
715
 
716
		/*
717
		* Remove JavaScript Event Handlers
718
		*
719
		* Note: This code is a little blunt.  It removes
720
		* the event handler and anything up to the closing >,
721
		* but it's unlikely to be a problem.
722
		*
723
		*/
724
		$event_handlers = array('[^a-z_\-]on\w*','xmlns');
725
 
726
		if ($is_image === TRUE)
727
		{
728
			/*
729
			* Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
730
			* so we have to allow this for images. -Paul
731
			*/
732
			unset($event_handlers[array_search('xmlns', $event_handlers)]);
733
		}
734
 
735
		$str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
736
 
737
		/*
738
		* Sanitize naughty HTML elements
739
		*
740
		* If a tag containing any of the words in the list
741
		* below is found, the tag gets converted to entities.
742
		*
743
		* So this: <blink>
744
		* Becomes: &lt;blink&gt;
745
		*
746
		*/
747
		$naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
748
		$str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
749
 
750
		/*
751
		* Sanitize naughty scripting elements
752
		*
753
		* Similar to above, only instead of looking for
754
		* tags it looks for PHP and JavaScript commands
755
		* that are disallowed.  Rather than removing the
756
		* code, it simply converts the parenthesis to entities
757
		* rendering the code un-executable.
758
		*
759
		* For example:	eval('some code')
760
		* Becomes:		eval&#40;'some code'&#41;
761
		*
762
		*/
763
		$str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
764
 
765
		/*
766
		* Final clean up
767
		*
768
		* This adds a bit of extra precaution in case
769
		* something got through the above filters
770
		*
771
		*/
772
		foreach ($this->never_allowed_str as $key => $val)
773
		{
774
			$str = str_replace($key, $val, $str);
775
		}
776
 
777
		foreach ($this->never_allowed_regex as $key => $val)
778
		{
779
			$str = preg_replace("#".$key."#i", $val, $str);
780
		}
781
 
782
		/*
783
		*  Images are Handled in a Special Way
784
		*  - Essentially, we want to know that after all of the character conversion is done whether
785
		*  any unwanted, likely XSS, code was found.  If not, we return TRUE, as the image is clean.
786
		*  However, if the string post-conversion does not matched the string post-removal of XSS,
787
		*  then it fails, as there was unwanted XSS code found and removed/changed during processing.
788
		*/
789
 
790
		if ($is_image === TRUE)
791
		{
792
			if ($str == $converted_string)
793
			{
794
				return TRUE;
795
			}
796
			else
797
			{
798
				return FALSE;
799
			}
800
		}
801
 
802
		log_message('debug', "XSS Filtering completed");
803
		return $str;
804
	}
805
 
806
	// --------------------------------------------------------------------
807
 
808
	/**
809
	* Random Hash for protecting URLs
810
	*
811
	* @access	public
812
	* @return	string
813
	*/
814
	function xss_hash()
815
	{
816
		if ($this->xss_hash == '')
817
		{
818
			if (phpversion() >= 4.2)
819
				mt_srand();
820
			else
821
				mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
822
 
823
			$this->xss_hash = md5(time() + mt_rand(0, 1999999999));
824
		}
825
 
826
		return $this->xss_hash;
827
	}
828
 
829
	// --------------------------------------------------------------------
830
 
831
	/**
832
	* Remove Invisible Characters
833
	*
834
	* This prevents sandwiching null characters
835
	* between ascii characters, like Java\0script.
836
	*
837
	* @access	public
838
	* @param	string
839
	* @return	string
840
	*/
841
	function _remove_invisible_characters($str)
842
	{
843
		static $non_displayables;
844
 
845
		if ( ! isset($non_displayables))
846
		{
847
			// every control character except newline (dec 10), carriage return (dec 13), and horizontal tab (dec 09),
848
			$non_displayables = array(
849
										'/%0[0-8bcef]/',			// url encoded 00-08, 11, 12, 14, 15
850
										'/%1[0-9a-f]/',				// url encoded 16-31
851
										'/[\x00-\x08]/',			// 00-08
852
										'/\x0b/', '/\x0c/',			// 11, 12
853
										'/[\x0e-\x1f]/'				// 14-31
854
									);
855
		}
856
 
857
		do
858
		{
859
			$cleaned = $str;
860
			$str = preg_replace($non_displayables, '', $str);
861
		}
862
		while ($cleaned != $str);
863
 
864
		return $str;
865
	}
866
 
867
	// --------------------------------------------------------------------
868
 
869
	/**
870
	* Compact Exploded Words
871
	*
872
	* Callback function for xss_clean() to remove whitespace from
873
	* things like j a v a s c r i p t
874
	*
875
	* @access	public
876
	* @param	type
877
	* @return	type
878
	*/
879
	function _compact_exploded_words($matches)
880
	{
881
		return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
882
	}
883
 
884
	// --------------------------------------------------------------------
885
 
886
	/**
887
	* Sanitize Naughty HTML
888
	*
889
	* Callback function for xss_clean() to remove naughty HTML elements
890
	*
891
	* @access	private
892
	* @param	array
893
	* @return	string
894
	*/
895
	function _sanitize_naughty_html($matches)
896
	{
897
		// encode opening brace
898
		$str = '&lt;'.$matches[1].$matches[2].$matches[3];
899
 
900
		// encode captured opening or closing brace to prevent recursive vectors
901
		$str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
902
 
903
		return $str;
904
	}
905
 
906
	// --------------------------------------------------------------------
907
 
908
	/**
909
	* JS Link Removal
910
	*
911
	* Callback function for xss_clean() to sanitize links
912
	* This limits the PCRE backtracks, making it more performance friendly
913
	* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
914
	* PHP 5.2+ on link-heavy strings
915
	*
916
	* @access	private
917
	* @param	array
918
	* @return	string
919
	*/
920
	function _js_link_removal($match)
921
	{
922
		$attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
923
		return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
924
	}
925
 
926
	/**
927
	* JS Image Removal
928
	*
929
	* Callback function for xss_clean() to sanitize image tags
930
	* This limits the PCRE backtracks, making it more performance friendly
931
	* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
932
	* PHP 5.2+ on image tag heavy strings
933
	*
934
	* @access	private
935
	* @param	array
936
	* @return	string
937
	*/
938
	function _js_img_removal($match)
939
	{
940
		$attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
941
		return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
942
	}
943
 
944
	// --------------------------------------------------------------------
945
 
946
	/**
947
	* Attribute Conversion
948
	*
949
	* Used as a callback for XSS Clean
950
	*
951
	* @access	public
952
	* @param	array
953
	* @return	string
954
	*/
955
	function _convert_attribute($match)
956
	{
957
		return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
958
	}
959
 
960
	// --------------------------------------------------------------------
961
 
962
	/**
963
	* HTML Entity Decode Callback
964
	*
965
	* Used as a callback for XSS Clean
966
	*
967
	* @access	public
968
	* @param	array
969
	* @return	string
970
	*/
971
	function _html_entity_decode_callback($match)
972
	{
973
		$CFG =& load_class('Config');
974
		$charset = $CFG->item('charset');
975
 
976
		return $this->_html_entity_decode($match[0], strtoupper($charset));
977
	}
978
 
979
	// --------------------------------------------------------------------
980
 
981
	/**
982
	* HTML Entities Decode
983
	*
984
	* This function is a replacement for html_entity_decode()
985
	*
986
	* In some versions of PHP the native function does not work
987
	* when UTF-8 is the specified character set, so this gives us
988
	* a work-around.  More info here:
989
	* http://bugs.php.net/bug.php?id=25670
990
	*
991
	* @access	private
992
	* @param	string
993
	* @param	string
994
	* @return	string
995
	*/
996
	/* -------------------------------------------------
997
	/*  Replacement for html_entity_decode()
998
	/* -------------------------------------------------*/
999
 
1000
	/*
1001
	NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
1002
	character set, and the PHP developers said they were not back porting the
1003
	fix to versions other than PHP 5.x.
1004
	*/
1005
	function _html_entity_decode($str, $charset='UTF-8')
1006
	{
1007
		if (stristr($str, '&') === FALSE) return $str;
1008
 
1009
		// The reason we are not using html_entity_decode() by itself is because
1010
		// while it is not technically correct to leave out the semicolon
1011
		// at the end of an entity most browsers will still interpret the entity
1012
		// correctly.  html_entity_decode() does not convert entities without
1013
		// semicolons, so we are left with our own little solution here. Bummer.
1014
 
1015
		if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
1016
		{
1017
			$str = html_entity_decode($str, ENT_COMPAT, $charset);
1018
			$str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
1019
			return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
1020
		}
1021
 
1022
		// Numeric Entities
1023
		$str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
1024
		$str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
1025
 
1026
		// Literal Entities - Slightly slow so we do another check
1027
		if (stristr($str, '&') === FALSE)
1028
		{
1029
			$str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
1030
		}
1031
 
1032
		return $str;
1033
	}
1034
 
1035
	// --------------------------------------------------------------------
1036
 
1037
	/**
1038
	* Filter Attributes
1039
	*
1040
	* Filters tag attributes for consistency and safety
1041
	*
1042
	* @access	public
1043
	* @param	string
1044
	* @return	string
1045
	*/
1046
	function _filter_attributes($str)
1047
	{
1048
		$out = '';
1049
 
1050
		if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
1051
		{
1052
			foreach ($matches[0] as $match)
1053
			{
1054
				$out .= preg_replace("#/\*.*?\*/#s", '', $match);
1055
			}
1056
		}
1057
 
1058
		return $out;
1059
	}
1060
 
1061
	// --------------------------------------------------------------------
1062
 
1063
}
1064
// END Input class
1065
 
1066
/* End of file Input.php */
1067
/* Location: ./system/libraries/Input.php */