| 1 |
lars |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
#
|
|
|
4 |
# Markdown - A text-to-HTML conversion tool for web writers
|
|
|
5 |
#
|
|
|
6 |
# Copyright (c) 2004-2005 John Gruber
|
|
|
7 |
# <http://daringfireball.net/projects/markdown/>
|
|
|
8 |
#
|
|
|
9 |
# Copyright (c) 2004-2005 Michel Fortin - PHP Port
|
|
|
10 |
# <http://www.michelf.com/projects/php-markdown/>
|
|
|
11 |
#
|
|
|
12 |
|
|
|
13 |
/**
|
|
|
14 |
* PHP5 version of the markdown parser.
|
|
|
15 |
* Usage:
|
|
|
16 |
* <code>
|
|
|
17 |
* $markdown = new MarkdownParser;
|
|
|
18 |
* echo $markdown->parse($text);
|
|
|
19 |
* </code>
|
|
|
20 |
*/
|
|
|
21 |
class MarkdownParser
|
|
|
22 |
{
|
|
|
23 |
private static $md_nested_brackets;
|
|
|
24 |
private static $md_escape_table = array();
|
|
|
25 |
private static $md_backslash_escape_table = array();
|
|
|
26 |
private static $md_nested_brackets_depth = 6;
|
|
|
27 |
|
|
|
28 |
protected $md_empty_element_suffix = " />"; # Change to ">" for HTML output
|
|
|
29 |
protected $md_tab_width = 4;
|
|
|
30 |
|
|
|
31 |
private $md_list_level = 0;
|
|
|
32 |
private $md_urls = array();
|
|
|
33 |
private $md_titles = array();
|
|
|
34 |
private $md_html_blocks = array();
|
|
|
35 |
|
|
|
36 |
public function __construct()
|
|
|
37 |
{
|
|
|
38 |
if(is_null(self::$md_nested_brackets))
|
|
|
39 |
$this->initialize();
|
|
|
40 |
}
|
|
|
41 |
|
|
|
42 |
private function initialize()
|
|
|
43 |
{
|
|
|
44 |
self::$md_nested_brackets =
|
|
|
45 |
str_repeat('(?>[^\[\]]+|\[', self::$md_nested_brackets_depth).
|
|
|
46 |
str_repeat('\])*', self::$md_nested_brackets_depth);
|
|
|
47 |
|
|
|
48 |
self::$md_escape_table = array(
|
|
|
49 |
"\\" => md5("\\"),
|
|
|
50 |
"`" => md5("`"),
|
|
|
51 |
"*" => md5("*"),
|
|
|
52 |
"_" => md5("_"),
|
|
|
53 |
"{" => md5("{"),
|
|
|
54 |
"}" => md5("}"),
|
|
|
55 |
"[" => md5("["),
|
|
|
56 |
"]" => md5("]"),
|
|
|
57 |
"(" => md5("("),
|
|
|
58 |
")" => md5(")"),
|
|
|
59 |
">" => md5(">"),
|
|
|
60 |
"#" => md5("#"),
|
|
|
61 |
"+" => md5("+"),
|
|
|
62 |
"-" => md5("-"),
|
|
|
63 |
"." => md5("."),
|
|
|
64 |
"!" => md5("!")
|
|
|
65 |
);
|
|
|
66 |
|
|
|
67 |
# Table of hash values for escaped characters:
|
|
|
68 |
# Create an identical table but for escaped characters.
|
|
|
69 |
foreach (self::$md_escape_table as $key => $char)
|
|
|
70 |
self::$md_backslash_escape_table["\\$key"] = $char;
|
|
|
71 |
}
|
|
|
72 |
|
|
|
73 |
public function parse($text)
|
|
|
74 |
{
|
|
|
75 |
#
|
|
|
76 |
# Main function. The order in which other subs are called here is
|
|
|
77 |
# essential. Link and image substitutions need to happen before
|
|
|
78 |
# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
|
|
|
79 |
# and <img> tags get encoded.
|
|
|
80 |
#
|
|
|
81 |
# Clear the hashes. If we don't clear these, you get conflicts
|
|
|
82 |
# from other articles when generating a page which contains more than
|
|
|
83 |
# one article (e.g. an index page that shows the N most recent
|
|
|
84 |
# articles):
|
|
|
85 |
$this->md_urls = array();
|
|
|
86 |
$this->md_titles = array();
|
|
|
87 |
$this->md_html_blocks = array();
|
|
|
88 |
|
|
|
89 |
# Standardize line endings:
|
|
|
90 |
# DOS to Unix and Mac to Unix
|
|
|
91 |
$text = str_replace(array("\r\n", "\r"), "\n", $text);
|
|
|
92 |
|
|
|
93 |
# Make sure $text ends with a couple of newlines:
|
|
|
94 |
$text .= "\n\n";
|
|
|
95 |
|
|
|
96 |
# Convert all tabs to spaces.
|
|
|
97 |
$text = $this->_Detab($text);
|
|
|
98 |
|
|
|
99 |
# Strip any lines consisting only of spaces and tabs.
|
|
|
100 |
# This makes subsequent regexen easier to write, because we can
|
|
|
101 |
# match consecutive blank lines with /\n+/ instead of something
|
|
|
102 |
# contorted like /[ \t]*\n+/ .
|
|
|
103 |
$text = preg_replace('/^[ \t]+$/m', '', $text);
|
|
|
104 |
|
|
|
105 |
# Turn block-level HTML blocks into hash entries
|
|
|
106 |
$text = $this->_HashHTMLBlocks($text);
|
|
|
107 |
|
|
|
108 |
# Strip link definitions, store in hashes.
|
|
|
109 |
$text = $this->_StripLinkDefinitions($text);
|
|
|
110 |
|
|
|
111 |
$text = $this->_RunBlockGamut($text);
|
|
|
112 |
|
|
|
113 |
$text = $this->_UnescapeSpecialChars($text);
|
|
|
114 |
|
|
|
115 |
return $text . "\n";
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
private function _StripLinkDefinitions($text) {
|
|
|
120 |
#
|
|
|
121 |
# Strips link definitions from text, stores the URLs and titles in
|
|
|
122 |
# hash references.
|
|
|
123 |
#
|
|
|
124 |
$less_than_tab = $this->md_tab_width - 1;
|
|
|
125 |
|
|
|
126 |
# Link defs are in the form: ^[id]: url "optional title"
|
|
|
127 |
$text = preg_replace_callback('{
|
|
|
128 |
^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1
|
|
|
129 |
[ \t]*
|
|
|
130 |
\n? # maybe *one* newline
|
|
|
131 |
[ \t]*
|
|
|
132 |
<?(\S+?)>? # url = $2
|
|
|
133 |
[ \t]*
|
|
|
134 |
\n? # maybe one newline
|
|
|
135 |
[ \t]*
|
|
|
136 |
(?:
|
|
|
137 |
(?<=\s) # lookbehind for whitespace
|
|
|
138 |
["(]
|
|
|
139 |
(.+?) # title = $3
|
|
|
140 |
[")]
|
|
|
141 |
[ \t]*
|
|
|
142 |
)? # title is optional
|
|
|
143 |
(?:\n+|\Z)
|
|
|
144 |
}xm',
|
|
|
145 |
array($this,'_StripLinkDefinitions_callback'),
|
|
|
146 |
$text);
|
|
|
147 |
return $text;
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
private function _StripLinkDefinitions_callback($matches) {
|
|
|
151 |
$link_id = strtolower($matches[1]);
|
|
|
152 |
$this->md_urls[$link_id] = $this->_EncodeAmpsAndAngles($matches[2]);
|
|
|
153 |
if (isset($matches[3]))
|
|
|
154 |
$this->md_titles[$link_id] = str_replace('"', '"', $matches[3]);
|
|
|
155 |
return ''; # String that will replace the block
|
|
|
156 |
}
|
|
|
157 |
|
|
|
158 |
|
|
|
159 |
private function _HashHTMLBlocks($text) {
|
|
|
160 |
$less_than_tab = $this->md_tab_width - 1;
|
|
|
161 |
|
|
|
162 |
# Hashify HTML blocks:
|
|
|
163 |
# We only want to do this for block-level HTML tags, such as headers,
|
|
|
164 |
# lists, and tables. That's because we still want to wrap <p>s around
|
|
|
165 |
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
|
|
|
166 |
# phrase emphasis, and spans. The list of tags we're looking for is
|
|
|
167 |
# hard-coded:
|
|
|
168 |
$block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
|
|
|
169 |
'script|noscript|form|fieldset|iframe|math|ins|del';
|
|
|
170 |
$block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
|
|
|
171 |
'script|noscript|form|fieldset|iframe|math';
|
|
|
172 |
|
|
|
173 |
# First, look for nested blocks, e.g.:
|
|
|
174 |
# <div>
|
|
|
175 |
# <div>
|
|
|
176 |
# tags for inner block must be indented.
|
|
|
177 |
# </div>
|
|
|
178 |
# </div>
|
|
|
179 |
#
|
|
|
180 |
# The outermost tags must start at the left margin for this to match, and
|
|
|
181 |
# the inner nested divs must be indented.
|
|
|
182 |
# We need to do this before the next, more liberal match, because the next
|
|
|
183 |
# match will start at the first `<div>` and stop at the first `</div>`.
|
|
|
184 |
$text = preg_replace_callback("{
|
|
|
185 |
( # save in $1
|
|
|
186 |
^ # start of line (with /m)
|
|
|
187 |
<($block_tags_a) # start tag = $2
|
|
|
188 |
\\b # word break
|
|
|
189 |
(.*\\n)*? # any number of lines, minimally matching
|
|
|
190 |
</\\2> # the matching end tag
|
|
|
191 |
[ \\t]* # trailing spaces/tabs
|
|
|
192 |
(?=\\n+|\\Z) # followed by a newline or end of document
|
|
|
193 |
)
|
|
|
194 |
}xm",
|
|
|
195 |
array($this,'_HashHTMLBlocks_callback'),
|
|
|
196 |
$text);
|
|
|
197 |
|
|
|
198 |
#
|
|
|
199 |
# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
|
|
|
200 |
#
|
|
|
201 |
$text = preg_replace_callback("{
|
|
|
202 |
( # save in $1
|
|
|
203 |
^ # start of line (with /m)
|
|
|
204 |
<($block_tags_b) # start tag = $2
|
|
|
205 |
\\b # word break
|
|
|
206 |
(.*\\n)*? # any number of lines, minimally matching
|
|
|
207 |
.*</\\2> # the matching end tag
|
|
|
208 |
[ \\t]* # trailing spaces/tabs
|
|
|
209 |
(?=\\n+|\\Z) # followed by a newline or end of document
|
|
|
210 |
)
|
|
|
211 |
}xm",
|
|
|
212 |
array($this,'_HashHTMLBlocks_callback'),
|
|
|
213 |
$text);
|
|
|
214 |
|
|
|
215 |
# Special case just for <hr />. It was easier to make a special case than
|
|
|
216 |
# to make the other regex more complicated.
|
|
|
217 |
$text = preg_replace_callback('{
|
|
|
218 |
(?:
|
|
|
219 |
(?<=\n\n) # Starting after a blank line
|
|
|
220 |
| # or
|
|
|
221 |
\A\n? # the beginning of the doc
|
|
|
222 |
)
|
|
|
223 |
( # save in $1
|
|
|
224 |
[ ]{0,'.$less_than_tab.'}
|
|
|
225 |
<(hr) # start tag = $2
|
|
|
226 |
\b # word break
|
|
|
227 |
([^<>])*? #
|
|
|
228 |
/?> # the matching end tag
|
|
|
229 |
[ \t]*
|
|
|
230 |
(?=\n{2,}|\Z) # followed by a blank line or end of document
|
|
|
231 |
)
|
|
|
232 |
}x',
|
|
|
233 |
array($this,'_HashHTMLBlocks_callback'),
|
|
|
234 |
$text);
|
|
|
235 |
|
|
|
236 |
# Special case for standalone HTML comments:
|
|
|
237 |
$text = preg_replace_callback('{
|
|
|
238 |
(?:
|
|
|
239 |
(?<=\n\n) # Starting after a blank line
|
|
|
240 |
| # or
|
|
|
241 |
\A\n? # the beginning of the doc
|
|
|
242 |
)
|
|
|
243 |
( # save in $1
|
|
|
244 |
[ ]{0,'.$less_than_tab.'}
|
|
|
245 |
(?s:
|
|
|
246 |
<!
|
|
|
247 |
(--.*?--\s*)+
|
|
|
248 |
>
|
|
|
249 |
)
|
|
|
250 |
[ \t]*
|
|
|
251 |
(?=\n{2,}|\Z) # followed by a blank line or end of document
|
|
|
252 |
)
|
|
|
253 |
}x',
|
|
|
254 |
array($this,'_HashHTMLBlocks_callback'),
|
|
|
255 |
$text);
|
|
|
256 |
|
|
|
257 |
return $text;
|
|
|
258 |
}
|
|
|
259 |
private function _HashHTMLBlocks_callback($matches) {
|
|
|
260 |
$text = $matches[1];
|
|
|
261 |
$key = md5($text);
|
|
|
262 |
$this->md_html_blocks[$key] = $text;
|
|
|
263 |
return "\n\n$key\n\n"; # String that will replace the block
|
|
|
264 |
}
|
|
|
265 |
|
|
|
266 |
|
|
|
267 |
private function _RunBlockGamut($text) {
|
|
|
268 |
#
|
|
|
269 |
# These are all the transformations that form block-level
|
|
|
270 |
# tags like paragraphs, headers, and list items.
|
|
|
271 |
#
|
|
|
272 |
$text = $this->_DoHeaders($text);
|
|
|
273 |
|
|
|
274 |
# Do Horizontal Rules:
|
|
|
275 |
$text = preg_replace(
|
|
|
276 |
array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
|
|
|
277 |
'{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
|
|
|
278 |
'{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
|
|
|
279 |
"\n<hr{$this->md_empty_element_suffix}\n",
|
|
|
280 |
$text);
|
|
|
281 |
|
|
|
282 |
$text = $this->_DoLists($text);
|
|
|
283 |
$text = $this->_DoCodeBlocks($text);
|
|
|
284 |
$text = $this->_DoBlockQuotes($text);
|
|
|
285 |
|
|
|
286 |
# We already ran _HashHTMLBlocks() before, in Markdown(), but that
|
|
|
287 |
# was to escape raw HTML in the original Markdown source. This time,
|
|
|
288 |
# we're escaping the markup we've just created, so that we don't wrap
|
|
|
289 |
# <p> tags around block-level tags.
|
|
|
290 |
$text = $this->_HashHTMLBlocks($text);
|
|
|
291 |
$text = $this->_FormParagraphs($text);
|
|
|
292 |
|
|
|
293 |
return $text;
|
|
|
294 |
}
|
|
|
295 |
|
|
|
296 |
|
|
|
297 |
private function _RunSpanGamut($text) {
|
|
|
298 |
#
|
|
|
299 |
# These are all the transformations that occur *within* block-level
|
|
|
300 |
# tags like paragraphs, headers, and list items.
|
|
|
301 |
#
|
|
|
302 |
|
|
|
303 |
$text = $this->_DoCodeSpans($text);
|
|
|
304 |
|
|
|
305 |
$text = $this->_EscapeSpecialChars($text);
|
|
|
306 |
|
|
|
307 |
# Process anchor and image tags. Images must come first,
|
|
|
308 |
# because ![foo][f] looks like an anchor.
|
|
|
309 |
$text = $this->_DoImages($text);
|
|
|
310 |
$text = $this->_DoAnchors($text);
|
|
|
311 |
|
|
|
312 |
# Make links out of things like `<http://example.com/>`
|
|
|
313 |
# Must come after _DoAnchors(), because you can use < and >
|
|
|
314 |
# delimiters in inline links like [this](<url>).
|
|
|
315 |
$text = $this->_DoAutoLinks($text);
|
|
|
316 |
$text = $this->_EncodeAmpsAndAngles($text);
|
|
|
317 |
$text = $this->_DoItalicsAndBold($text);
|
|
|
318 |
|
|
|
319 |
# Do hard breaks:
|
|
|
320 |
$text = preg_replace('/ {2,}\n/', "<br{$this->md_empty_element_suffix}\n", $text);
|
|
|
321 |
|
|
|
322 |
return $text;
|
|
|
323 |
}
|
|
|
324 |
|
|
|
325 |
|
|
|
326 |
private function _EscapeSpecialChars($text) {
|
|
|
327 |
$tokens = $this->_TokenizeHTML($text);
|
|
|
328 |
|
|
|
329 |
$text = ''; # rebuild $text from the tokens
|
|
|
330 |
# $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
|
|
|
331 |
# $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
|
|
|
332 |
|
|
|
333 |
foreach ($tokens as $cur_token) {
|
|
|
334 |
if ($cur_token[0] == 'tag') {
|
|
|
335 |
# Within tags, encode * and _ so they don't conflict
|
|
|
336 |
# with their use in Markdown for italics and strong.
|
|
|
337 |
# We're replacing each such character with its
|
|
|
338 |
# corresponding MD5 checksum value; this is likely
|
|
|
339 |
# overkill, but it should prevent us from colliding
|
|
|
340 |
# with the escape values by accident.
|
|
|
341 |
$cur_token[1] = str_replace(array('*', '_'),
|
|
|
342 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
343 |
$cur_token[1]);
|
|
|
344 |
$text .= $cur_token[1];
|
|
|
345 |
} else {
|
|
|
346 |
$t = $cur_token[1];
|
|
|
347 |
$t = $this->_EncodeBackslashEscapes($t);
|
|
|
348 |
$text .= $t;
|
|
|
349 |
}
|
|
|
350 |
}
|
|
|
351 |
return $text;
|
|
|
352 |
}
|
|
|
353 |
|
|
|
354 |
|
|
|
355 |
private function _DoAnchors($text) {
|
|
|
356 |
#
|
|
|
357 |
# Turn Markdown link shortcuts into XHTML <a> tags.
|
|
|
358 |
#
|
|
|
359 |
#
|
|
|
360 |
# First, handle reference-style links: [link text] [id]
|
|
|
361 |
#
|
|
|
362 |
$bracket = self::$md_nested_brackets;
|
|
|
363 |
$text = preg_replace_callback("{
|
|
|
364 |
( # wrap whole match in $1
|
|
|
365 |
\\[
|
|
|
366 |
({$bracket}) # link text = $2
|
|
|
367 |
\\]
|
|
|
368 |
|
|
|
369 |
[ ]? # one optional space
|
|
|
370 |
(?:\\n[ ]*)? # one optional newline followed by spaces
|
|
|
371 |
|
|
|
372 |
\\[
|
|
|
373 |
(.*?) # id = $3
|
|
|
374 |
\\]
|
|
|
375 |
)
|
|
|
376 |
}xs",
|
|
|
377 |
array($this,'_DoAnchors_reference_callback'), $text);
|
|
|
378 |
|
|
|
379 |
#
|
|
|
380 |
# Next, inline-style links: [link text](url "optional title")
|
|
|
381 |
#
|
|
|
382 |
$text = preg_replace_callback("{
|
|
|
383 |
( # wrap whole match in $1
|
|
|
384 |
\\[
|
|
|
385 |
({$bracket}) # link text = $2
|
|
|
386 |
\\]
|
|
|
387 |
\\( # literal paren
|
|
|
388 |
[ \\t]*
|
|
|
389 |
<?(.*?)>? # href = $3
|
|
|
390 |
[ \\t]*
|
|
|
391 |
( # $4
|
|
|
392 |
(['\"]) # quote char = $5
|
|
|
393 |
(.*?) # Title = $6
|
|
|
394 |
\\5 # matching quote
|
|
|
395 |
)? # title is optional
|
|
|
396 |
\\)
|
|
|
397 |
)
|
|
|
398 |
}xs",
|
|
|
399 |
array($this,'_DoAnchors_inline_callback'), $text);
|
|
|
400 |
|
|
|
401 |
return $text;
|
|
|
402 |
}
|
|
|
403 |
private function _DoAnchors_reference_callback($matches) {
|
|
|
404 |
$whole_match = $matches[1];
|
|
|
405 |
$link_text = $matches[2];
|
|
|
406 |
$link_id = strtolower($matches[3]);
|
|
|
407 |
|
|
|
408 |
if ($link_id == "") {
|
|
|
409 |
$link_id = strtolower($link_text); # for shortcut links like [this][].
|
|
|
410 |
}
|
|
|
411 |
|
|
|
412 |
if (isset($this->md_urls[$link_id])) {
|
|
|
413 |
$url = $this->md_urls[$link_id];
|
|
|
414 |
# We've got to encode these to avoid conflicting with italics/bold.
|
|
|
415 |
$url = str_replace(array('*', '_'),
|
|
|
416 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
417 |
$url);
|
|
|
418 |
$result = "<a href=\"$url\"";
|
|
|
419 |
if ( isset( $this->md_titles[$link_id] ) ) {
|
|
|
420 |
$title = $this->md_titles[$link_id];
|
|
|
421 |
$title = str_replace(array('*', '_'),
|
|
|
422 |
array(self::$md_escape_table['*'],
|
|
|
423 |
self::$md_escape_table['_']), $title);
|
|
|
424 |
$result .= " title=\"$title\"";
|
|
|
425 |
}
|
|
|
426 |
$result .= ">$link_text</a>";
|
|
|
427 |
}
|
|
|
428 |
else {
|
|
|
429 |
$result = $whole_match;
|
|
|
430 |
}
|
|
|
431 |
return $result;
|
|
|
432 |
}
|
|
|
433 |
private function _DoAnchors_inline_callback($matches) {
|
|
|
434 |
$whole_match = $matches[1];
|
|
|
435 |
$link_text = $matches[2];
|
|
|
436 |
$url = $matches[3];
|
|
|
437 |
$title =& $matches[6];
|
|
|
438 |
|
|
|
439 |
# We've got to encode these to avoid conflicting with italics/bold.
|
|
|
440 |
$url = str_replace(array('*', '_'),
|
|
|
441 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
442 |
$url);
|
|
|
443 |
$result = "<a href=\"$url\"";
|
|
|
444 |
if (isset($title)) {
|
|
|
445 |
$title = str_replace('"', '"', $title);
|
|
|
446 |
$title = str_replace(array('*', '_'),
|
|
|
447 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
448 |
$title);
|
|
|
449 |
$result .= " title=\"$title\"";
|
|
|
450 |
}
|
|
|
451 |
|
|
|
452 |
$result .= ">$link_text</a>";
|
|
|
453 |
|
|
|
454 |
return $result;
|
|
|
455 |
}
|
|
|
456 |
|
|
|
457 |
|
|
|
458 |
private function _DoImages($text) {
|
|
|
459 |
#
|
|
|
460 |
# Turn Markdown image shortcuts into <img> tags.
|
|
|
461 |
#
|
|
|
462 |
#
|
|
|
463 |
# First, handle reference-style labeled images: ![alt text][id]
|
|
|
464 |
#
|
|
|
465 |
$text = preg_replace_callback('{
|
|
|
466 |
( # wrap whole match in $1
|
|
|
467 |
!\[
|
|
|
468 |
('.self::$md_nested_brackets.') # alt text = $2
|
|
|
469 |
\]
|
|
|
470 |
|
|
|
471 |
[ ]? # one optional space
|
|
|
472 |
(?:\n[ ]*)? # one optional newline followed by spaces
|
|
|
473 |
|
|
|
474 |
\[
|
|
|
475 |
(.*?) # id = $3
|
|
|
476 |
\]
|
|
|
477 |
|
|
|
478 |
)
|
|
|
479 |
}xs',
|
|
|
480 |
array($this,'_DoImages_reference_callback'), $text);
|
|
|
481 |
|
|
|
482 |
#
|
|
|
483 |
# Next, handle inline images: 
|
|
|
484 |
# Don't forget: encode * and _
|
|
|
485 |
|
|
|
486 |
$text = preg_replace_callback('{
|
|
|
487 |
( # wrap whole match in $1
|
|
|
488 |
!\[
|
|
|
489 |
('.self::$md_nested_brackets.') # alt text = $2
|
|
|
490 |
\]
|
|
|
491 |
\( # literal paren
|
|
|
492 |
[ \t]*
|
|
|
493 |
<?(\S+?)>? # src url = $3
|
|
|
494 |
[ \t]*
|
|
|
495 |
( # $4
|
|
|
496 |
([\'"]) # quote char = $5
|
|
|
497 |
(.*?) # title = $6
|
|
|
498 |
\5 # matching quote
|
|
|
499 |
[ \t]*
|
|
|
500 |
)? # title is optional
|
|
|
501 |
\)
|
|
|
502 |
)
|
|
|
503 |
}xs',
|
|
|
504 |
array($this,'_DoImages_inline_callback'), $text);
|
|
|
505 |
|
|
|
506 |
return $text;
|
|
|
507 |
}
|
|
|
508 |
private function _DoImages_reference_callback($matches) {
|
|
|
509 |
$whole_match = $matches[1];
|
|
|
510 |
$alt_text = $matches[2];
|
|
|
511 |
$link_id = strtolower($matches[3]);
|
|
|
512 |
|
|
|
513 |
if ($link_id == "") {
|
|
|
514 |
$link_id = strtolower($alt_text); # for shortcut links like ![this][].
|
|
|
515 |
}
|
|
|
516 |
|
|
|
517 |
$alt_text = str_replace('"', '"', $alt_text);
|
|
|
518 |
if (isset($this->md_urls[$link_id])) {
|
|
|
519 |
$url = $this->md_urls[$link_id];
|
|
|
520 |
# We've got to encode these to avoid conflicting with italics/bold.
|
|
|
521 |
$url = str_replace(array('*', '_'),
|
|
|
522 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
523 |
$url);
|
|
|
524 |
$result = "<img src=\"$url\" alt=\"$alt_text\"";
|
|
|
525 |
if (isset($this->md_titles[$link_id])) {
|
|
|
526 |
$title = $this->md_titles[$link_id];
|
|
|
527 |
$title = str_replace(array('*', '_'),
|
|
|
528 |
array(self::$md_escape_table['*'],
|
|
|
529 |
self::$md_escape_table['_']), $title);
|
|
|
530 |
$result .= " title=\"$title\"";
|
|
|
531 |
}
|
|
|
532 |
$result .= $this->md_empty_element_suffix;
|
|
|
533 |
}
|
|
|
534 |
else {
|
|
|
535 |
# If there's no such link ID, leave intact:
|
|
|
536 |
$result = $whole_match;
|
|
|
537 |
}
|
|
|
538 |
|
|
|
539 |
return $result;
|
|
|
540 |
}
|
|
|
541 |
private function _DoImages_inline_callback($matches) {
|
|
|
542 |
$whole_match = $matches[1];
|
|
|
543 |
$alt_text = $matches[2];
|
|
|
544 |
$url = $matches[3];
|
|
|
545 |
$title = '';
|
|
|
546 |
if (isset($matches[6])) {
|
|
|
547 |
$title = $matches[6];
|
|
|
548 |
}
|
|
|
549 |
|
|
|
550 |
$alt_text = str_replace('"', '"', $alt_text);
|
|
|
551 |
$title = str_replace('"', '"', $title);
|
|
|
552 |
# We've got to encode these to avoid conflicting with italics/bold.
|
|
|
553 |
$url = str_replace(array('*', '_'),
|
|
|
554 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
555 |
$url);
|
|
|
556 |
$result = "<img src=\"$url\" alt=\"$alt_text\"";
|
|
|
557 |
if (isset($title)) {
|
|
|
558 |
$title = str_replace(array('*', '_'),
|
|
|
559 |
array(self::$md_escape_table['*'], self::$md_escape_table['_']),
|
|
|
560 |
$title);
|
|
|
561 |
$result .= " title=\"$title\""; # $title already quoted
|
|
|
562 |
}
|
|
|
563 |
$result .= $this->md_empty_element_suffix;
|
|
|
564 |
|
|
|
565 |
return $result;
|
|
|
566 |
}
|
|
|
567 |
|
|
|
568 |
|
|
|
569 |
private function _DoHeaders($text) {
|
|
|
570 |
# Setext-style headers:
|
|
|
571 |
# Header 1
|
|
|
572 |
# ========
|
|
|
573 |
#
|
|
|
574 |
# Header 2
|
|
|
575 |
# --------
|
|
|
576 |
#
|
|
|
577 |
$text = preg_replace(
|
|
|
578 |
array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
|
|
|
579 |
'{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
|
|
|
580 |
array("'<h1>'.\$this->_RunSpanGamut(\$this->_UnslashQuotes('\\1')).'</h1>\n\n'",
|
|
|
581 |
"'<h2>'.\$this->_RunSpanGamut(\$this->_UnslashQuotes('\\1')).'</h2>\n\n'"),
|
|
|
582 |
$text);
|
|
|
583 |
|
|
|
584 |
# atx-style headers:
|
|
|
585 |
# # Header 1
|
|
|
586 |
# ## Header 2
|
|
|
587 |
# ## Header 2 with closing hashes ##
|
|
|
588 |
# ...
|
|
|
589 |
# ###### Header 6
|
|
|
590 |
#
|
|
|
591 |
$text = preg_replace("{
|
|
|
592 |
^(\\#{1,6}) # $1 = string of #'s
|
|
|
593 |
[ \\t]*
|
|
|
594 |
(.+?) # $2 = Header text
|
|
|
595 |
[ \\t]*
|
|
|
596 |
\\#* # optional closing #'s (not counted)
|
|
|
597 |
\\n+
|
|
|
598 |
}xme",
|
|
|
599 |
"'<h'.strlen('\\1').'>'.\$this->_RunSpanGamut(\$this->_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
|
|
|
600 |
$text);
|
|
|
601 |
|
|
|
602 |
return $text;
|
|
|
603 |
}
|
|
|
604 |
|
|
|
605 |
|
|
|
606 |
private function _DoLists($text) {
|
|
|
607 |
#
|
|
|
608 |
# Form HTML ordered (numbered) and unordered (bulleted) lists.
|
|
|
609 |
#
|
|
|
610 |
$less_than_tab = $this->md_tab_width - 1;
|
|
|
611 |
|
|
|
612 |
# Re-usable patterns to match list item bullets and number markers:
|
|
|
613 |
$marker_ul = '[*+-]';
|
|
|
614 |
$marker_ol = '\d+[.]';
|
|
|
615 |
$marker_any = "(?:$marker_ul|$marker_ol)";
|
|
|
616 |
|
|
|
617 |
$markers = array($marker_ul, $marker_ol);
|
|
|
618 |
|
|
|
619 |
foreach ($markers as $marker) {
|
|
|
620 |
# Re-usable pattern to match any entirel ul or ol list:
|
|
|
621 |
$whole_list = '
|
|
|
622 |
( # $1 = whole list
|
|
|
623 |
( # $2
|
|
|
624 |
[ ]{0,'.$less_than_tab.'}
|
|
|
625 |
('.$marker.') # $3 = first list item marker
|
|
|
626 |
[ \t]+
|
|
|
627 |
)
|
|
|
628 |
(?s:.+?)
|
|
|
629 |
( # $4
|
|
|
630 |
\z
|
|
|
631 |
|
|
|
|
632 |
\n{2,}
|
|
|
633 |
(?=\S)
|
|
|
634 |
(?! # Negative lookahead for another list item marker
|
|
|
635 |
[ \t]*
|
|
|
636 |
'.$marker.'[ \t]+
|
|
|
637 |
)
|
|
|
638 |
)
|
|
|
639 |
)
|
|
|
640 |
'; // mx
|
|
|
641 |
|
|
|
642 |
# We use a different prefix before nested lists than top-level lists.
|
|
|
643 |
# See extended comment in _ProcessListItems().
|
|
|
644 |
|
|
|
645 |
if ($this->md_list_level) {
|
|
|
646 |
$text = preg_replace_callback('{
|
|
|
647 |
^
|
|
|
648 |
'.$whole_list.'
|
|
|
649 |
}mx',
|
|
|
650 |
array($this,'_DoLists_callback_top'), $text);
|
|
|
651 |
}
|
|
|
652 |
else {
|
|
|
653 |
$text = preg_replace_callback('{
|
|
|
654 |
(?:(?<=\n\n)|\A\n?)
|
|
|
655 |
'.$whole_list.'
|
|
|
656 |
}mx',
|
|
|
657 |
array($this,'_DoLists_callback_nested'), $text);
|
|
|
658 |
}
|
|
|
659 |
}
|
|
|
660 |
|
|
|
661 |
return $text;
|
|
|
662 |
}
|
|
|
663 |
private function _DoLists_callback_top($matches) {
|
|
|
664 |
# Re-usable patterns to match list item bullets and number markers:
|
|
|
665 |
$marker_ul = '[*+-]';
|
|
|
666 |
$marker_ol = '\d+[.]';
|
|
|
667 |
$marker_any = "(?:$marker_ul|$marker_ol)";
|
|
|
668 |
|
|
|
669 |
$list = $matches[1];
|
|
|
670 |
$list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
|
|
|
671 |
|
|
|
672 |
$marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
|
|
|
673 |
|
|
|
674 |
# Turn double returns into triple returns, so that we can make a
|
|
|
675 |
# paragraph for the last item in a list, if necessary:
|
|
|
676 |
$list = preg_replace("/\n{2,}/", "\n\n\n", $list);
|
|
|
677 |
$result = $this->_ProcessListItems($list, $marker_any);
|
|
|
678 |
|
|
|
679 |
# Trim any trailing whitespace, to put the closing `</$list_type>`
|
|
|
680 |
# up on the preceding line, to get it past the current stupid
|
|
|
681 |
# HTML block parser. This is a hack to work around the terrible
|
|
|
682 |
# hack that is the HTML block parser.
|
|
|
683 |
$result = rtrim($result);
|
|
|
684 |
$result = "<$list_type>" . $result . "</$list_type>\n";
|
|
|
685 |
return $result;
|
|
|
686 |
}
|
|
|
687 |
private function _DoLists_callback_nested($matches) {
|
|
|
688 |
# Re-usable patterns to match list item bullets and number markers:
|
|
|
689 |
$marker_ul = '[*+-]';
|
|
|
690 |
$marker_ol = '\d+[.]';
|
|
|
691 |
$marker_any = "(?:$marker_ul|$marker_ol)";
|
|
|
692 |
|
|
|
693 |
$list = $matches[1];
|
|
|
694 |
$list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
|
|
|
695 |
|
|
|
696 |
$marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
|
|
|
697 |
|
|
|
698 |
# Turn double returns into triple returns, so that we can make a
|
|
|
699 |
# paragraph for the last item in a list, if necessary:
|
|
|
700 |
$list = preg_replace("/\n{2,}/", "\n\n\n", $list);
|
|
|
701 |
$result = $this->_ProcessListItems($list, $marker_any);
|
|
|
702 |
$result = "<$list_type>\n" . $result . "</$list_type>\n";
|
|
|
703 |
return $result;
|
|
|
704 |
}
|
|
|
705 |
|
|
|
706 |
|
|
|
707 |
private function _ProcessListItems($list_str, $marker_any) {
|
|
|
708 |
#
|
|
|
709 |
# Process the contents of a single ordered or unordered list, splitting it
|
|
|
710 |
# into individual list items.
|
|
|
711 |
#
|
|
|
712 |
|
|
|
713 |
# The $md_list_level keeps track of when we're inside a list.
|
|
|
714 |
# Each time we enter a list, we increment it; when we leave a list,
|
|
|
715 |
# we decrement. If it's zero, we're not in a list anymore.
|
|
|
716 |
#
|
|
|
717 |
# We do this because when we're not inside a list, we want to treat
|
|
|
718 |
# something like this:
|
|
|
719 |
#
|
|
|
720 |
# I recommend upgrading to version
|
|
|
721 |
# 8. Oops, now this line is treated
|
|
|
722 |
# as a sub-list.
|
|
|
723 |
#
|
|
|
724 |
# As a single paragraph, despite the fact that the second line starts
|
|
|
725 |
# with a digit-period-space sequence.
|
|
|
726 |
#
|
|
|
727 |
# Whereas when we're inside a list (or sub-list), that line will be
|
|
|
728 |
# treated as the start of a sub-list. What a kludge, huh? This is
|
|
|
729 |
# an aspect of Markdown's syntax that's hard to parse perfectly
|
|
|
730 |
# without resorting to mind-reading. Perhaps the solution is to
|
|
|
731 |
# change the syntax rules such that sub-lists must start with a
|
|
|
732 |
# starting cardinal number; e.g. "1." or "a.".
|
|
|
733 |
|
|
|
734 |
$this->md_list_level++;
|
|
|
735 |
|
|
|
736 |
# trim trailing blank lines:
|
|
|
737 |
$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
|
|
|
738 |
|
|
|
739 |
$list_str = preg_replace_callback('{
|
|
|
740 |
(\n)? # leading line = $1
|
|
|
741 |
(^[ \t]*) # leading whitespace = $2
|
|
|
742 |
('.$marker_any.') [ \t]+ # list marker = $3
|
|
|
743 |
((?s:.+?) # list item text = $4
|
|
|
744 |
(\n{1,2}))
|
|
|
745 |
(?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
|
|
|
746 |
}xm',
|
|
|
747 |
array($this,'_ProcessListItems_callback'), $list_str);
|
|
|
748 |
|
|
|
749 |
$this->md_list_level--;
|
|
|
750 |
return $list_str;
|
|
|
751 |
}
|
|
|
752 |
private function _ProcessListItems_callback($matches) {
|
|
|
753 |
$item = $matches[4];
|
|
|
754 |
$leading_line =& $matches[1];
|
|
|
755 |
$leading_space =& $matches[2];
|
|
|
756 |
|
|
|
757 |
if ($leading_line || preg_match('/\n{2,}/', $item)) {
|
|
|
758 |
$item = $this->_RunBlockGamut($this->_Outdent($item));
|
|
|
759 |
}
|
|
|
760 |
else {
|
|
|
761 |
# Recursion for sub-lists:
|
|
|
762 |
$item = $this->_DoLists($this->_Outdent($item));
|
|
|
763 |
$item = preg_replace('/\n+$/', '', $item);
|
|
|
764 |
$item = $this->_RunSpanGamut($item);
|
|
|
765 |
}
|
|
|
766 |
|
|
|
767 |
return "<li>" . $item . "</li>\n";
|
|
|
768 |
}
|
|
|
769 |
|
|
|
770 |
|
|
|
771 |
private function _DoCodeBlocks($text) {
|
|
|
772 |
#
|
|
|
773 |
# Process Markdown `<pre><code>` blocks.
|
|
|
774 |
#
|
|
|
775 |
$text = preg_replace_callback('{
|
|
|
776 |
(?:\n\n|\A)
|
|
|
777 |
( # $1 = the code block -- one or more lines, starting with a space/tab
|
|
|
778 |
(?:
|
|
|
779 |
(?:[ ]{'.$this->md_tab_width.'} | \t) # Lines must start with a tab or a tab-width of spaces
|
|
|
780 |
.*\n+
|
|
|
781 |
)+
|
|
|
782 |
)
|
|
|
783 |
((?=^[ ]{0,'.$this->md_tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
|
|
|
784 |
}xm',
|
|
|
785 |
array($this,'_DoCodeBlocks_callback'), $text);
|
|
|
786 |
|
|
|
787 |
return $text;
|
|
|
788 |
}
|
|
|
789 |
private function _DoCodeBlocks_callback($matches) {
|
|
|
790 |
$codeblock = $matches[1];
|
|
|
791 |
|
|
|
792 |
$codeblock = $this->_EncodeCode($this->_Outdent($codeblock));
|
|
|
793 |
// $codeblock = _Detab($codeblock);
|
|
|
794 |
# trim leading newlines and trailing whitespace
|
|
|
795 |
$codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
|
|
|
796 |
|
|
|
797 |
$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
|
|
|
798 |
|
|
|
799 |
return $result;
|
|
|
800 |
}
|
|
|
801 |
|
|
|
802 |
|
|
|
803 |
private function _DoCodeSpans($text) {
|
|
|
804 |
#
|
|
|
805 |
# * Backtick quotes are used for <code></code> spans.
|
|
|
806 |
#
|
|
|
807 |
# * You can use multiple backticks as the delimiters if you want to
|
|
|
808 |
# include literal backticks in the code span. So, this input:
|
|
|
809 |
#
|
|
|
810 |
# Just type ``foo `bar` baz`` at the prompt.
|
|
|
811 |
#
|
|
|
812 |
# Will translate to:
|
|
|
813 |
#
|
|
|
814 |
# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
|
|
|
815 |
#
|
|
|
816 |
# There's no arbitrary limit to the number of backticks you
|
|
|
817 |
# can use as delimters. If you need three consecutive backticks
|
|
|
818 |
# in your code, use four for delimiters, etc.
|
|
|
819 |
#
|
|
|
820 |
# * You can use spaces to get literal backticks at the edges:
|
|
|
821 |
#
|
|
|
822 |
# ... type `` `bar` `` ...
|
|
|
823 |
#
|
|
|
824 |
# Turns to:
|
|
|
825 |
#
|
|
|
826 |
# ... type <code>`bar`</code> ...
|
|
|
827 |
#
|
|
|
828 |
$text = preg_replace_callback('@
|
|
|
829 |
(?<!\\\) # Character before opening ` can\'t be a backslash
|
|
|
830 |
(`+) # $1 = Opening run of `
|
|
|
831 |
(.+?) # $2 = The code block
|
|
|
832 |
(?<!`)
|
|
|
833 |
\1 # Matching closer
|
|
|
834 |
(?!`)
|
|
|
835 |
@xs',
|
|
|
836 |
array($this,'_DoCodeSpans_callback'), $text);
|
|
|
837 |
|
|
|
838 |
return $text;
|
|
|
839 |
}
|
|
|
840 |
private function _DoCodeSpans_callback($matches) {
|
|
|
841 |
$c = $matches[2];
|
|
|
842 |
$c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
|
|
|
843 |
$c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
|
|
|
844 |
$c = $this->_EncodeCode($c);
|
|
|
845 |
return "<code>$c</code>";
|
|
|
846 |
}
|
|
|
847 |
|
|
|
848 |
|
|
|
849 |
private function _EncodeCode($_) {
|
|
|
850 |
#
|
|
|
851 |
# Encode/escape certain characters inside Markdown code runs.
|
|
|
852 |
# The point is that in code, these characters are literals,
|
|
|
853 |
# and lose their special Markdown meanings.
|
|
|
854 |
#
|
|
|
855 |
# Encode all ampersands; HTML entities are not
|
|
|
856 |
# entities within a Markdown code span.
|
|
|
857 |
$_ = str_replace('&', '&', $_);
|
|
|
858 |
|
|
|
859 |
# Do the angle bracket song and dance:
|
|
|
860 |
$_ = str_replace(array('<', '>'),
|
|
|
861 |
array('<', '>'), $_);
|
|
|
862 |
|
|
|
863 |
# Now, escape characters that are magic in Markdown:
|
|
|
864 |
$_ = str_replace(array_keys(self::$md_escape_table),
|
|
|
865 |
array_values(self::$md_escape_table), $_);
|
|
|
866 |
|
|
|
867 |
return $_;
|
|
|
868 |
}
|
|
|
869 |
|
|
|
870 |
|
|
|
871 |
private function _DoItalicsAndBold($text) {
|
|
|
872 |
# <strong> must go first:
|
|
|
873 |
$text = preg_replace('{
|
|
|
874 |
( # $1: Marker
|
|
|
875 |
(?<!\*\*) \*\* | # (not preceded by two chars of
|
|
|
876 |
(?<!__) __ # the same marker)
|
|
|
877 |
)
|
|
|
878 |
(?=\S) # Not followed by whitespace
|
|
|
879 |
(?!\1) # or two others marker chars.
|
|
|
880 |
( # $2: Content
|
|
|
881 |
(?:
|
|
|
882 |
[^*_]+? # Anthing not em markers.
|
|
|
883 |
|
|
|
|
884 |
# Balence any regular emphasis inside.
|
|
|
885 |
([*_]) (?=\S) .+? (?<=\S) \3 # $3: em char (* or _)
|
|
|
886 |
|
|
|
|
887 |
(?! \1 ) . # Allow unbalenced * and _.
|
|
|
888 |
)+?
|
|
|
889 |
)
|
|
|
890 |
(?<=\S) \1 # End mark not preceded by whitespace.
|
|
|
891 |
}sx',
|
|
|
892 |
'<strong>\2</strong>', $text);
|
|
|
893 |
# Then <em>:
|
|
|
894 |
$text = preg_replace(
|
|
|
895 |
'{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
|
|
|
896 |
'<em>\2</em>', $text);
|
|
|
897 |
|
|
|
898 |
return $text;
|
|
|
899 |
}
|
|
|
900 |
|
|
|
901 |
|
|
|
902 |
private function _DoBlockQuotes($text) {
|
|
|
903 |
$text = preg_replace_callback('/
|
|
|
904 |
( # Wrap whole match in $1
|
|
|
905 |
(
|
|
|
906 |
^[ \t]*>[ \t]? # ">" at the start of a line
|
|
|
907 |
.+\n # rest of the first line
|
|
|
908 |
(.+\n)* # subsequent consecutive lines
|
|
|
909 |
\n* # blanks
|
|
|
910 |
)+
|
|
|
911 |
)
|
|
|
912 |
/xm',
|
|
|
913 |
array($this,'_DoBlockQuotes_callback'), $text);
|
|
|
914 |
|
|
|
915 |
return $text;
|
|
|
916 |
}
|
|
|
917 |
private function _DoBlockQuotes_callback($matches) {
|
|
|
918 |
$bq = $matches[1];
|
|
|
919 |
# trim one level of quoting - trim whitespace-only lines
|
|
|
920 |
$bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
|
|
|
921 |
$bq = $this->_RunBlockGamut($bq); # recurse
|
|
|
922 |
|
|
|
923 |
$bq = preg_replace('/^/m', " ", $bq);
|
|
|
924 |
# These leading spaces screw with <pre> content, so we need to fix that:
|
|
|
925 |
$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
|
|
|
926 |
array($this,'_DoBlockQuotes_callback2'), $bq);
|
|
|
927 |
|
|
|
928 |
return "<blockquote>\n$bq\n</blockquote>\n\n";
|
|
|
929 |
}
|
|
|
930 |
private function _DoBlockQuotes_callback2($matches) {
|
|
|
931 |
$pre = $matches[1];
|
|
|
932 |
$pre = preg_replace('/^ /m', '', $pre);
|
|
|
933 |
return $pre;
|
|
|
934 |
}
|
|
|
935 |
|
|
|
936 |
|
|
|
937 |
private function _FormParagraphs($text) {
|
|
|
938 |
#
|
|
|
939 |
# Params:
|
|
|
940 |
# $text - string to process with html <p> tags
|
|
|
941 |
#
|
|
|
942 |
# Strip leading and trailing lines:
|
|
|
943 |
$text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
|
|
|
944 |
|
|
|
945 |
$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
|
|
|
946 |
|
|
|
947 |
#
|
|
|
948 |
# Wrap <p> tags.
|
|
|
949 |
#
|
|
|
950 |
foreach ($grafs as $key => $value) {
|
|
|
951 |
if (!isset( $this->md_html_blocks[$value] )) {
|
|
|
952 |
$value = $this->_RunSpanGamut($value);
|
|
|
953 |
$value = preg_replace('/^([ \t]*)/', '<p>', $value);
|
|
|
954 |
$value .= "</p>";
|
|
|
955 |
$grafs[$key] = $value;
|
|
|
956 |
}
|
|
|
957 |
}
|
|
|
958 |
|
|
|
959 |
#
|
|
|
960 |
# Unhashify HTML blocks
|
|
|
961 |
#
|
|
|
962 |
foreach ($grafs as $key => $value) {
|
|
|
963 |
if (isset( $this->md_html_blocks[$value] )) {
|
|
|
964 |
$grafs[$key] = $this->md_html_blocks[$value];
|
|
|
965 |
}
|
|
|
966 |
}
|
|
|
967 |
|
|
|
968 |
return implode("\n\n", $grafs);
|
|
|
969 |
}
|
|
|
970 |
|
|
|
971 |
|
|
|
972 |
private function _EncodeAmpsAndAngles($text) {
|
|
|
973 |
# Smart processing for ampersands and angle brackets that need to be encoded.
|
|
|
974 |
|
|
|
975 |
# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
|
|
|
976 |
# http://bumppo.net/projects/amputator/
|
|
|
977 |
$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
|
|
|
978 |
'&', $text);;
|
|
|
979 |
|
|
|
980 |
# Encode naked <'s
|
|
|
981 |
$text = preg_replace('{<(?![a-z/?\$!])}i', '<', $text);
|
|
|
982 |
|
|
|
983 |
return $text;
|
|
|
984 |
}
|
|
|
985 |
|
|
|
986 |
|
|
|
987 |
private function _EncodeBackslashEscapes($text) {
|
|
|
988 |
#
|
|
|
989 |
# Parameter: String.
|
|
|
990 |
# Returns: The string, with after processing the following backslash
|
|
|
991 |
# escape sequences.
|
|
|
992 |
#
|
|
|
993 |
# Must process escaped backslashes first.
|
|
|
994 |
return str_replace(array_keys(self::$md_backslash_escape_table),
|
|
|
995 |
array_values(self::$md_backslash_escape_table), $text);
|
|
|
996 |
}
|
|
|
997 |
|
|
|
998 |
|
|
|
999 |
private function _DoAutoLinks($text) {
|
|
|
1000 |
$text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
|
|
|
1001 |
'<a href="\1">\1</a>', $text);
|
|
|
1002 |
|
|
|
1003 |
# Email addresses: <address@domain.foo>
|
|
|
1004 |
$text = preg_replace('{
|
|
|
1005 |
<
|
|
|
1006 |
(?:mailto:)?
|
|
|
1007 |
(
|
|
|
1008 |
[-.\w]+
|
|
|
1009 |
\@
|
|
|
1010 |
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
|
|
|
1011 |
)
|
|
|
1012 |
>
|
|
|
1013 |
}exi',
|
|
|
1014 |
"\$this->_EncodeEmailAddress(\$this->_UnescapeSpecialChars(\$this->_UnslashQuotes('\\1')))",
|
|
|
1015 |
$text);
|
|
|
1016 |
|
|
|
1017 |
return $text;
|
|
|
1018 |
}
|
|
|
1019 |
|
|
|
1020 |
|
|
|
1021 |
private function _EncodeEmailAddress($addr) {
|
|
|
1022 |
#
|
|
|
1023 |
# Input: an email address, e.g. "foo@example.com"
|
|
|
1024 |
#
|
|
|
1025 |
# Output: the email address as a mailto link, with each character
|
|
|
1026 |
# of the address encoded as either a decimal or hex entity, in
|
|
|
1027 |
# the hopes of foiling most address harvesting spam bots. E.g.:
|
|
|
1028 |
#
|
|
|
1029 |
# <a href="mailto:foo@e
|
|
|
1030 |
# xample.com">foo
|
|
|
1031 |
# @example.com</a>
|
|
|
1032 |
#
|
|
|
1033 |
# Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
|
|
|
1034 |
# mailing list: <http://tinyurl.com/yu7ue>
|
|
|
1035 |
#
|
|
|
1036 |
$addr = "mailto:" . $addr;
|
|
|
1037 |
$length = strlen($addr);
|
|
|
1038 |
|
|
|
1039 |
# leave ':' alone (to spot mailto: later)
|
|
|
1040 |
$addr = preg_replace_callback('/([^\:])/',
|
|
|
1041 |
array($this,'_EncodeEmailAddress_callback'), $addr);
|
|
|
1042 |
|
|
|
1043 |
$addr = "<a href=\"$addr\">$addr</a>";
|
|
|
1044 |
# strip the mailto: from the visible part
|
|
|
1045 |
$addr = preg_replace('/">.+?:/', '">', $addr);
|
|
|
1046 |
|
|
|
1047 |
return $addr;
|
|
|
1048 |
}
|
|
|
1049 |
private function _EncodeEmailAddress_callback($matches) {
|
|
|
1050 |
$char = $matches[1];
|
|
|
1051 |
$r = rand(0, 100);
|
|
|
1052 |
# roughly 10% raw, 45% hex, 45% dec
|
|
|
1053 |
# '@' *must* be encoded. I insist.
|
|
|
1054 |
if ($r > 90 && $char != '@') return $char;
|
|
|
1055 |
if ($r < 45) return '&#x'.dechex(ord($char)).';';
|
|
|
1056 |
return '&#'.ord($char).';';
|
|
|
1057 |
}
|
|
|
1058 |
|
|
|
1059 |
|
|
|
1060 |
private function _UnescapeSpecialChars($text) {
|
|
|
1061 |
#
|
|
|
1062 |
# Swap back in all the special characters we've hidden.
|
|
|
1063 |
#
|
|
|
1064 |
return str_replace(array_values(self::$md_escape_table),
|
|
|
1065 |
array_keys(self::$md_escape_table), $text);
|
|
|
1066 |
}
|
|
|
1067 |
|
|
|
1068 |
|
|
|
1069 |
# _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
|
|
|
1070 |
# We only define it if it is not already defined.
|
|
|
1071 |
|
|
|
1072 |
private function _TokenizeHTML($str) {
|
|
|
1073 |
#
|
|
|
1074 |
# Parameter: String containing HTML markup.
|
|
|
1075 |
# Returns: An array of the tokens comprising the input
|
|
|
1076 |
# string. Each token is either a tag (possibly with nested,
|
|
|
1077 |
# tags contained therein, such as <a href="<MTFoo>">, or a
|
|
|
1078 |
# run of text between tags. Each element of the array is a
|
|
|
1079 |
# two-element array; the first is either 'tag' or 'text';
|
|
|
1080 |
# the second is the actual value.
|
|
|
1081 |
#
|
|
|
1082 |
#
|
|
|
1083 |
# Regular expression derived from the _tokenize() subroutine in
|
|
|
1084 |
# Brad Choate's MTRegex plugin.
|
|
|
1085 |
# <http://www.bradchoate.com/past/mtregex.php>
|
|
|
1086 |
#
|
|
|
1087 |
$index = 0;
|
|
|
1088 |
$tokens = array();
|
|
|
1089 |
|
|
|
1090 |
$match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
|
|
|
1091 |
'(?s:<\?.*?\?>)|'. # processing instruction
|
|
|
1092 |
# regular tags
|
|
|
1093 |
'(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
|
|
|
1094 |
|
|
|
1095 |
$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
|
1096 |
|
|
|
1097 |
foreach ($parts as $part) {
|
|
|
1098 |
if (++$index % 2 && $part != '')
|
|
|
1099 |
$tokens[] = array('text', $part);
|
|
|
1100 |
else
|
|
|
1101 |
$tokens[] = array('tag', $part);
|
|
|
1102 |
}
|
|
|
1103 |
|
|
|
1104 |
return $tokens;
|
|
|
1105 |
}
|
|
|
1106 |
|
|
|
1107 |
private function _Outdent($text) {
|
|
|
1108 |
#
|
|
|
1109 |
# Remove one level of line-leading tabs or spaces
|
|
|
1110 |
#
|
|
|
1111 |
return preg_replace("/^(\\t|[ ]{1,".$this->md_tab_width."})/m", "", $text);
|
|
|
1112 |
}
|
|
|
1113 |
|
|
|
1114 |
|
|
|
1115 |
private function _Detab($text) {
|
|
|
1116 |
#
|
|
|
1117 |
# Replace tabs with the appropriate amount of space.
|
|
|
1118 |
#
|
|
|
1119 |
# For each line we separate the line in blocks delemited by
|
|
|
1120 |
# tab characters. Then we reconstruct every line by adding the
|
|
|
1121 |
# appropriate number of space between each blocks.
|
|
|
1122 |
|
|
|
1123 |
$lines = explode("\n", $text);
|
|
|
1124 |
$text = "";
|
|
|
1125 |
|
|
|
1126 |
foreach ($lines as $line) {
|
|
|
1127 |
# Split in blocks.
|
|
|
1128 |
$blocks = explode("\t", $line);
|
|
|
1129 |
# Add each blocks to the line.
|
|
|
1130 |
$line = $blocks[0];
|
|
|
1131 |
unset($blocks[0]); # Do not add first block twice.
|
|
|
1132 |
foreach ($blocks as $block) {
|
|
|
1133 |
# Calculate amount of space, insert spaces, insert block.
|
|
|
1134 |
$amount = $this->md_tab_width - strlen($line) % $this->md_tab_width;
|
|
|
1135 |
$line .= str_repeat(" ", $amount) . $block;
|
|
|
1136 |
}
|
|
|
1137 |
$text .= "$line\n";
|
|
|
1138 |
}
|
|
|
1139 |
return $text;
|
|
|
1140 |
}
|
|
|
1141 |
|
|
|
1142 |
|
|
|
1143 |
private function _UnslashQuotes($text) {
|
|
|
1144 |
#
|
|
|
1145 |
# This function is useful to remove automaticaly slashed double quotes
|
|
|
1146 |
# when using preg_replace and evaluating an expression.
|
|
|
1147 |
# Parameter: String.
|
|
|
1148 |
# Returns: The string with any slash-double-quote (\") sequence replaced
|
|
|
1149 |
# by a single double quote.
|
|
|
1150 |
#
|
|
|
1151 |
return str_replace('\"', '"', $text);
|
|
|
1152 |
}
|
|
|
1153 |
}
|
|
|
1154 |
|
|
|
1155 |
/*
|
|
|
1156 |
|
|
|
1157 |
PHP Markdown
|
|
|
1158 |
============
|
|
|
1159 |
|
|
|
1160 |
Description
|
|
|
1161 |
-----------
|
|
|
1162 |
|
|
|
1163 |
This is a PHP translation of the original Markdown formatter written in
|
|
|
1164 |
Perl by John Gruber.
|
|
|
1165 |
|
|
|
1166 |
Markdown is a text-to-HTML filter; it translates an easy-to-read /
|
|
|
1167 |
easy-to-write structured text format into HTML. Markdown's text format
|
|
|
1168 |
is most similar to that of plain text email, and supports features such
|
|
|
1169 |
as headers, *emphasis*, code blocks, blockquotes, and links.
|
|
|
1170 |
|
|
|
1171 |
Markdown's syntax is designed not as a generic markup language, but
|
|
|
1172 |
specifically to serve as a front-end to (X)HTML. You can use span-level
|
|
|
1173 |
HTML tags anywhere in a Markdown document, and you can use block level
|
|
|
1174 |
HTML tags (like <div> and <table> as well).
|
|
|
1175 |
|
|
|
1176 |
For more information about Markdown's syntax, see:
|
|
|
1177 |
|
|
|
1178 |
<http://daringfireball.net/projects/markdown/>
|
|
|
1179 |
|
|
|
1180 |
|
|
|
1181 |
Bugs
|
|
|
1182 |
----
|
|
|
1183 |
|
|
|
1184 |
To file bug reports please send email to:
|
|
|
1185 |
|
|
|
1186 |
<michel.fortin@michelf.com>
|
|
|
1187 |
|
|
|
1188 |
Please include with your report: (1) the example input; (2) the output you
|
|
|
1189 |
expected; (3) the output Markdown actually produced.
|
|
|
1190 |
|
|
|
1191 |
|
|
|
1192 |
Version History
|
|
|
1193 |
---------------
|
|
|
1194 |
|
|
|
1195 |
See the readme file for detailed release notes for this version.
|
|
|
1196 |
|
|
|
1197 |
1.0.1c - 9 Dec 2005
|
|
|
1198 |
|
|
|
1199 |
1.0.1b - 6 Jun 2005
|
|
|
1200 |
|
|
|
1201 |
1.0.1a - 15 Apr 2005
|
|
|
1202 |
|
|
|
1203 |
1.0.1 - 16 Dec 2004
|
|
|
1204 |
|
|
|
1205 |
1.0 - 21 Aug 2004
|
|
|
1206 |
|
|
|
1207 |
|
|
|
1208 |
Author & Contributors
|
|
|
1209 |
---------------------
|
|
|
1210 |
|
|
|
1211 |
Original Perl version by John Gruber
|
|
|
1212 |
<http://daringfireball.net/>
|
|
|
1213 |
|
|
|
1214 |
PHP port and other contributions by Michel Fortin
|
|
|
1215 |
<http://www.michelf.com/>
|
|
|
1216 |
|
|
|
1217 |
|
|
|
1218 |
Copyright and License
|
|
|
1219 |
---------------------
|
|
|
1220 |
|
|
|
1221 |
Copyright (c) 2004-2005 Michel Fortin
|
|
|
1222 |
<http://www.michelf.com/>
|
|
|
1223 |
All rights reserved.
|
|
|
1224 |
|
|
|
1225 |
Copyright (c) 2003-2004 John Gruber
|
|
|
1226 |
<http://daringfireball.net/>
|
|
|
1227 |
All rights reserved.
|
|
|
1228 |
|
|
|
1229 |
Redistribution and use in source and binary forms, with or without
|
|
|
1230 |
modification, are permitted provided that the following conditions are
|
|
|
1231 |
met:
|
|
|
1232 |
|
|
|
1233 |
* Redistributions of source code must retain the above copyright notice,
|
|
|
1234 |
this list of conditions and the following disclaimer.
|
|
|
1235 |
|
|
|
1236 |
* Redistributions in binary form must reproduce the above copyright
|
|
|
1237 |
notice, this list of conditions and the following disclaimer in the
|
|
|
1238 |
documentation and/or other materials provided with the distribution.
|
|
|
1239 |
|
|
|
1240 |
* Neither the name "Markdown" nor the names of its contributors may
|
|
|
1241 |
be used to endorse or promote products derived from this software
|
|
|
1242 |
without specific prior written permission.
|
|
|
1243 |
|
|
|
1244 |
This software is provided by the copyright holders and contributors "as
|
|
|
1245 |
is" and any express or implied warranties, including, but not limited
|
|
|
1246 |
to, the implied warranties of merchantability and fitness for a
|
|
|
1247 |
particular purpose are disclaimed. In no event shall the copyright owner
|
|
|
1248 |
or contributors be liable for any direct, indirect, incidental, special,
|
|
|
1249 |
exemplary, or consequential damages (including, but not limited to,
|
|
|
1250 |
procurement of substitute goods or services; loss of use, data, or
|
|
|
1251 |
profits; or business interruption) however caused and on any theory of
|
|
|
1252 |
liability, whether in contract, strict liability, or tort (including
|
|
|
1253 |
negligence or otherwise) arising in any way out of the use of this
|
|
|
1254 |
software, even if advised of the possibility of such damage.
|
|
|
1255 |
|
|
|
1256 |
*/
|
|
|
1257 |
?>
|