WebSVN – lars-tiefland.codeigniter – Blame – /system/libraries/Typography.php

Revision	Autor	Zeilennr.	Zeile
1	lars	1	`<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');`
		2	`/**`
		3	`* CodeIgniter`
		4	`*`
		5	`* An open source application development framework for PHP 4.3.2 or newer`
		6	`*`
		7	`* @package CodeIgniter`
		8	`* @author ExpressionEngine Dev Team`
		9	`* @copyright Copyright (c) 2008, EllisLab, Inc.`
		10	`* @license http://codeigniter.com/user_guide/license.html`
		11	`* @link http://codeigniter.com`
		12	`* @since Version 1.0`
		13	`* @filesource`
		14	`*/`
		15
		16	`// ------------------------------------------------------------------------`
		17
		18	`/**`
		19	`* Typography Class`
		20	`*`
		21	`*`
		22	`* @access private`
		23	`* @category Helpers`
		24	`* @author ExpressionEngine Dev Team`
		25	`* @link http://codeigniter.com/user_guide/helpers/`
		26	`*/`
		27	`class CI_Typography {`
		28
		29	`// Block level elements that should not be wrapped inside <p> tags`
		30	`var $block_elements = 'address\|blockquote\|div\|dl\|fieldset\|form\|h\d\|hr\|noscript\|object\|ol\|p\|pre\|script\|table\|ul';`
		31
		32	`// Elements that should not have <p> and <br /> tags within them.`
		33	`var $skip_elements = 'p\|pre\|ol\|ul\|dl\|object\|table';`
		34
		35	`// Tags we want the parser to completely ignore when splitting the string.`
		36	`var $inline_elements = 'a\|abbr\|acronym\|b\|bdo\|big\|br\|button\|cite\|code\|del\|dfn\|em\|i\|img\|ins\|input\|label\|map\|kbd\|q\|samp\|select\|small\|span\|strong\|sub\|sup\|textarea\|tt\|var';`
		37
		38	`// array of block level elements that require inner content to be within another block level element`
		39	`var $inner_block_required = array('blockquote');`
		40
		41	`// the last block element parsed`
		42	`var $last_block_element = '';`
		43
		44	`// whether or not to protect quotes within { curly braces }`
		45	`var $protect_braced_quotes = FALSE;`
		46
		47	`/**`
		48	`* Nothing to do here...`
		49	`*`
		50	`*/`
		51	`function CI_Typography()`
		52	`{`
		53	`}`
		54
		55	`/**`
		56	`* Auto Typography`
		57	`*`
		58	`* This function converts text, making it typographically correct:`
		59	`* - Converts double spaces into paragraphs.`
		60	`* - Converts single line breaks into <br /> tags`
		61	`* - Converts single and double quotes into correctly facing curly quote entities.`
		62	`* - Converts three dots into ellipsis.`
		63	`* - Converts double dashes into em-dashes.`
		64	`* - Converts two spaces into entities`
		65	`*`
		66	`* @access public`
		67	`* @param string`
		68	`* @param bool whether to reduce more then two consecutive newlines to two`
		69	`* @return string`
		70	`*/`
		71	`function auto_typography($str, $reduce_linebreaks = FALSE)`
		72	`{`
		73	`if ($str == '')`
		74	`{`
		75	`return '';`
		76	`}`
		77
		78	`// Standardize Newlines to make matching easier`
		79	`if (strpos($str, "\r") !== FALSE)`
		80	`{`
		81	`$str = str_replace(array("\r\n", "\r"), "\n", $str);`
		82	`}`
		83
		84	`// Reduce line breaks. If there are more than two consecutive linebreaks`
		85	`// we'll compress them down to a maximum of two since there's no benefit to more.`
		86	`if ($reduce_linebreaks === TRUE)`
		87	`{`
		88	`$str = preg_replace("/\n\n+/", "\n\n", $str);`
		89	`}`
		90
		91	`// HTML comment tags don't conform to patterns of normal tags, so pull them out separately, only if needed`
		92	`$html_comments = array();`
		93	`if (strpos($str, '<!--') !== FALSE)`
		94	`{`
		95	`if (preg_match_all("#(<!\-\-.*?\-\->)#s", $str, $matches))`
		96	`{`
		97	`for ($i = 0, $total = count($matches[0]); $i < $total; $i++)`
		98	`{`
		99	`$html_comments[] = $matches[0][$i];`
		100	`$str = str_replace($matches[0][$i], '{@HC'.$i.'}', $str);`
		101	`}`
		102	`}`
		103	`}`
		104
		105	`// match and yank <pre> tags if they exist. It's cheaper to do this separately since most content will`
		106	`// not contain <pre> tags, and it keeps the PCRE patterns below simpler and faster`
		107	`if (strpos($str, '<pre') !== FALSE)`
		108	`{`
		109	`$str = preg_replace_callback("#<pre.?>.?</pre>#si", array($this, '_protect_characters'), $str);`
		110	`}`
		111
		112	`// Convert quotes within tags to temporary markers.`
		113	`$str = preg_replace_callback("#<.+?>#si", array($this, '_protect_characters'), $str);`
		114
		115	`// Do the same with braces if necessary`
		116	`if ($this->protect_braced_quotes === TRUE)`
		117	`{`
		118	`$str = preg_replace_callback("#\{.+?\}#si", array($this, '_protect_characters'), $str);`
		119	`}`
		120
		121	`// Convert "ignore" tags to temporary marker. The parser splits out the string at every tag`
		122	`// it encounters. Certain inline tags, like image tags, links, span tags, etc. will be`
		123	`// adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}`
		124	`$str = preg_replace("#<(/*)(".$this->inline_elements.")([ >])#i", "{@TAG}\\1\\2\\3", $str);`
		125
		126	`// Split the string at every tag. This expression creates an array with this prototype:`
		127	`//`
		128	`// [array]`
		129	`// {`
		130	`// [0] = <opening tag>`
		131	`// [1] = Content...`
		132	`// [2] = <closing tag>`
		133	`// Etc...`
		134	`// }`
		135	`$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);`
		136
		137	`// Build our finalized string. We cycle through the array, skipping tags, and processing the contained text`
		138	`$str = '';`
		139	`$process = TRUE;`
		140	`$paragraph = FALSE;`
		141	`$current_chunk = 0;`
		142	`$total_chunks = count($chunks);`
		143
		144	`foreach ($chunks as $chunk)`
		145	`{`
		146	`$current_chunk++;`
		147
		148	`// Are we dealing with a tag? If so, we'll skip the processing for this cycle.`
		149	`// Well also set the "process" flag which allows us to skip <pre> tags and a few other things.`
		150	`if (preg_match("#<(/)(".$this->block_elements.").?>#", $chunk, $match))`
		151	`{`
		152	`if (preg_match("#".$this->skip_elements."#", $match[2]))`
		153	`{`
		154	`$process = ($match[1] == '/') ? TRUE : FALSE;`
		155	`}`
		156
		157	`if ($match[1] == '')`
		158	`{`
		159	`$this->last_block_element = $match[2];`
		160	`}`
		161
		162	`$str .= $chunk;`
		163	`continue;`
		164	`}`
		165
		166	`if ($process == FALSE)`
		167	`{`
		168	`$str .= $chunk;`
		169	`continue;`
		170	`}`
		171
		172	`// Force a newline to make sure end tags get processed by _format_newlines()`
		173	`if ($current_chunk == $total_chunks)`
		174	`{`
		175	`$chunk .= "\n";`
		176	`}`
		177
		178	`// Convert Newlines into <p> and <br /> tags`
		179	`$str .= $this->_format_newlines($chunk);`
		180	`}`
		181
		182	`// No opening block level tag? Add it if needed.`
		183	`if ( ! preg_match("/^\s*<(?:".$this->block_elements.")/i", $str))`
		184	`{`
		185	`$str = preg_replace("/^(.*?)<(".$this->block_elements.")/i", '<p>$1</p><$2', $str);`
		186	`}`
		187
		188	`// Convert quotes, elipsis, em-dashes, non-breaking spaces, and ampersands`
		189	`$str = $this->format_characters($str);`
		190
		191	`// restore HTML comments`
		192	`for ($i = 0, $total = count($html_comments); $i < $total; $i++)`
		193	`{`
		194	`// remove surrounding paragraph tags, but only if there's an opening paragraph tag`
		195	`// otherwise HTML comments at the ends of paragraphs will have the closing tag removed`
		196	`// if '<p>{@HC1}' then replace <p>{@HC1}</p> with the comment, else replace only {@HC1} with the comment`
		197	`$str = preg_replace('#(?(?=<p>\{@HC'.$i.'\})<p>\{@HC'.$i.'\}(\s*</p>)\|\{@HC'.$i.'\})#s', $html_comments[$i], $str);`
		198	`}`
		199
		200	`// Final clean up`
		201	`$table = array(`
		202
		203	`// If the user submitted their own paragraph tags within the text`
		204	`// we will retain them instead of using our tags.`
		205	`'/(<p[^>*?]>)<p>/' => '$1', // <?php BBEdit syntax coloring bug fix`
		206
		207	`// Reduce multiple instances of opening/closing paragraph tags to a single one`
		208	`'#(</p>)+#' => '</p>',`
		209	`'/(<p>\W*<p>)+/' => '<p>',`
		210
		211	`// Clean up stray paragraph tags that appear before block level elements`
		212	`'#<p></p><('.$this->block_elements.')#' => '<$1',`
		213
		214	`// Clean up stray non-breaking spaces preceeding block elements`
		215	`'#( \s*)+<('.$this->block_elements.')#' => ' <$2',`
		216
		217	`// Replace the temporary markers we added earlier`
		218	`'/\{@TAG\}/' => '<',`
		219	`'/\{@DQ\}/' => '"',`
		220	`'/\{@SQ\}/' => "'",`
		221	`'/\{@DD\}/' => '--',`
		222	`'/\{@NBS\}/' => ' '`
		223
		224	`);`
		225
		226	`// Do we need to reduce empty lines?`
		227	`if ($reduce_linebreaks === TRUE)`
		228	`{`
		229	`$table['#<p>\n*</p>#'] = '';`
		230	`}`
		231	`else`
		232	`{`
		233	`// If we have empty paragraph tags we add a non-breaking space`
		234	`// otherwise most browsers won't treat them as true paragraphs`
		235	`$table['#<p></p>#'] = '<p> </p>';`
		236	`}`
		237
		238	`return preg_replace(array_keys($table), $table, $str);`
		239
		240	`}`
		241
		242	`// --------------------------------------------------------------------`
		243
		244	`/**`
		245	`* Format Characters`
		246	`*`
		247	`* This function mainly converts double and single quotes`
		248	`* to curly entities, but it also converts em-dashes,`
		249	`* double spaces, and ampersands`
		250	`*`
		251	`* @access public`
		252	`* @param string`
		253	`* @return string`
		254	`*/`
		255	`function format_characters($str)`
		256	`{`
		257	`static $table;`
		258
		259	`if ( ! isset($table))`
		260	`{`
		261	`$table = array(`
		262	`// nested smart quotes, opening and closing`
		263	`// note that rules for grammar (English) allow only for two levels deep`
		264	`// and that single quotes are _supposed_ to always be on the outside`
		265	`// but we'll accommodate both`
		266	`// Note that in all cases, whitespace is the primary determining factor`
		267	`// on which direction to curl, with non-word characters like punctuation`
		268	`// being a secondary factor only after whitespace is addressed.`
		269	`'/\'"(\s\|$)/' => '’”$1',`
		270	`'/(^\|\s\|<p>)\'"/' => '$1‘“',`
		271	`'/\'"(\W)/' => '’”$1',`
		272	`'/(\W)\'"/' => '$1‘“',`
		273	`'/"\'(\s\|$)/' => '”’$1',`
		274	`'/(^\|\s\|<p>)"\'/' => '$1“‘',`
		275	`'/"\'(\W)/' => '”’$1',`
		276	`'/(\W)"\'/' => '$1“‘',`
		277
		278	`// single quote smart quotes`
		279	`'/\'(\s\|$)/' => '’$1',`
		280	`'/(^\|\s\|<p>)\'/' => '$1‘',`
		281	`'/\'(\W)/' => '’$1',`
		282	`'/(\W)\'/' => '$1‘',`
		283
		284	`// double quote smart quotes`
		285	`'/"(\s\|$)/' => '”$1',`
		286	`'/(^\|\s\|<p>)"/' => '$1“',`
		287	`'/"(\W)/' => '”$1',`
		288	`'/(\W)"/' => '$1“',`
		289
		290	`// apostrophes`
		291	`"/(\w)'(\w)/" => '$1’$2',`
		292
		293	`// Em dash and ellipses dots`
		294	`'/\s?\-\-\s?/' => '—',`
		295	`'/(\w)\.{3}/' => '$1…',`
		296
		297	`// double space after sentences`
		298	`'/(\W) /' => '$1  ',`
		299
		300	`// ampersands, if not a character entity`
		301	`'/&(?!#?[a-zA-Z0-9]{2,};)/' => '&'`
		302	`);`
		303	`}`
		304
		305	`return preg_replace(array_keys($table), $table, $str);`
		306	`}`
		307
		308	`// --------------------------------------------------------------------`
		309
		310	`/**`
		311	`* Format Newlines`
		312	`*`
		313	`* Converts newline characters into either <p> tags or <br />`
		314	`*`
		315	`* @access public`
		316	`* @param string`
		317	`* @return string`
		318	`*/`
		319	`function _format_newlines($str)`
		320	`{`
		321	`if ($str == '')`
		322	`{`
		323	`return $str;`
		324	`}`
		325
		326	`if (strpos($str, "\n") === FALSE && ! in_array($this->last_block_element, $this->inner_block_required))`
		327	`{`
		328	`return $str;`
		329	`}`
		330
		331	`// Convert two consecutive newlines to paragraphs`
		332	`$str = str_replace("\n\n", "</p>\n\n<p>", $str);`
		333
		334	`// Convert single spaces to <br /> tags`
		335	`$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);`
		336
		337	`// Wrap the whole enchilada in enclosing paragraphs`
		338	`if ($str != "\n")`
		339	`{`
		340	`$str = '<p>'.$str.'</p>';`
		341	`}`
		342
		343	`// Remove empty paragraphs if they are on the first line, as this`
		344	`// is a potential unintended consequence of the previous code`
		345	`$str = preg_replace("/<p><\/p>(.*)/", "\\1", $str, 1);`
		346
		347	`return $str;`
		348	`}`
		349
		350	`// ------------------------------------------------------------------------`
		351
		352	`/**`
		353	`* Protect Characters`
		354	`*`
		355	`* Protects special characters from being formatted later`
		356	`* We don't want quotes converted within tags so we'll temporarily convert them to {@DQ} and {@SQ}`
		357	`* and we don't want double dashes converted to emdash entities, so they are marked with {@DD}`
		358	`* likewise double spaces are converted to {@NBS} to prevent entity conversion`
		359	`*`
		360	`* @access public`
		361	`* @param array`
		362	`* @return string`
		363	`*/`
		364	`function _protect_characters($match)`
		365	`{`
		366	`return str_replace(array("'",'"','--',' '), array('{@SQ}', '{@DQ}', '{@DD}', '{@NBS}'), $match[0]);`
		367	`}`
		368
		369	`// --------------------------------------------------------------------`
		370
		371	`/**`
		372	`* Convert newlines to HTML line breaks except within PRE tags`
		373	`*`
		374	`* @access public`
		375	`* @param string`
		376	`* @return string`
		377	`*/`
		378	`function nl2br_except_pre($str)`
		379	`{`
		380	`$ex = explode("pre>",$str);`
		381	`$ct = count($ex);`
		382
		383	`$newstr = "";`
		384	`for ($i = 0; $i < $ct; $i++)`
		385	`{`
		386	`if (($i % 2) == 0)`
		387	`{`
		388	`$newstr .= nl2br($ex[$i]);`
		389	`}`
		390	`else`
		391	`{`
		392	`$newstr .= $ex[$i];`
		393	`}`
		394
		395	`if ($ct - 1 != $i)`
		396	`$newstr .= "pre>";`
		397	`}`
		398
		399	`return $newstr;`
		400	`}`
		401
		402	`}`
		403	`// END Typography Class`
		404
		405	`/* End of file Typography.php */`
		406	`/* Location: ./system/libraries/Typography.php */`

Subversion-Projekte lars-tiefland.codeigniter

(root)/system/libraries/Typography.php – Revision 1