Repository: erusev/parsedown
Branch: master
Commit: 4e433a8d5707
Files: 77
Total size: 88.7 KB
Directory structure:
gitextract_8dv4ybdv/
├── .gitattributes
├── .github/
│ └── workflows/
│ └── unit-tests.yaml
├── .gitignore
├── LICENSE.txt
├── Parsedown.php
├── composer.json
├── phpunit.xml.dist
├── readme.md
└── test/
├── CommonMarkTestStrict.php
├── CommonMarkTestWeak.php
├── ParsedownTest.php
├── SampleExtensions.php
├── TestParsedown.php
└── data/
├── aesthetic_table.html
├── aligned_table.html
├── atx_heading.html
├── automatic_link.html
├── block-level_html.html
├── code_block.html
├── code_span.html
├── compound_blockquote.html
├── compound_emphasis.html
├── compound_list.html
├── deeply_nested_list.html
├── em_strong.html
├── email.html
├── emphasis.html
├── escaping.html
├── fenced_code_block.html
├── horizontal_rule.html
├── html_comment.html
├── html_entity.html
├── image_reference.html
├── image_title.html
├── implicit_reference.html
├── inline_link.html
├── inline_link_title.html
├── inline_title.html
├── lazy_blockquote.html
├── lazy_list.html
├── line_break.html
├── markup_consecutive_one.html
├── markup_consecutive_one_line.html
├── markup_consecutive_one_stripped.html
├── markup_consecutive_two.html
├── markup_consecutive_two_lines.html
├── markup_consecutive_two_stripped.html
├── multiline_list_paragraph.html
├── multiline_lists.html
├── nested_block-level_html.html
├── ordered_list.html
├── paragraph_list.html
├── reference_title.html
├── self-closing_html.html
├── separated_nested_list.html
├── setext_header.html
├── setext_header_spaces.html
├── simple_blockquote.html
├── simple_table.html
├── span-level_html.html
├── sparse_dense_list.html
├── sparse_html.html
├── sparse_list.html
├── special_characters.html
├── strict_atx_heading.html
├── strikethrough.html
├── strong_em.html
├── tab-indented_code_block.html
├── table_inline_markdown.html
├── text_reference.html
├── unordered_list.html
├── untidy_table.html
├── url_autolinking.html
├── whitespace.html
├── xss_attribute_encoding.html
├── xss_bad_url.html
└── xss_text_encoding.html
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
# Ignore all tests for archive
/test export-ignore
/.gitattributes export-ignore
/.gitignore export-ignore
/.travis.yml export-ignore
/phpunit.xml.dist export-ignore
================================================
FILE: .github/workflows/unit-tests.yaml
================================================
on:
- push
- pull_request
jobs:
phpunit:
runs-on: ubuntu-latest
strategy:
matrix:
php:
- '7.2'
- '7.3'
- '7.4'
- '8.0'
- '8.1'
- '8.2'
- '8.3'
- '8.4'
steps:
- name: Checkout the source code
uses: actions/checkout@v4
- name: Set up PHP
uses: shivammathur/setup-php@v2
with:
php-version: '${{ matrix.php }}'
- name: Install dependencies
run: composer install
- name: Run tests
run: |
vendor/bin/phpunit
vendor/bin/phpunit test/CommonMarkTestWeak.php || true
================================================
FILE: .gitignore
================================================
*.md
!readme.md
composer.lock
vendor/
.phpunit.result.cache
================================================
FILE: LICENSE.txt
================================================
The MIT License (MIT)
Copyright (c) 2013-2018 Emanuil Rusev, erusev.com
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: Parsedown.php
================================================
textElements($text);
# convert to markup
$markup = $this->elements($Elements);
# trim line breaks
$markup = trim($markup, "\n");
return $markup;
}
protected function textElements($text)
{
# make sure no definitions are set
$this->DefinitionData = array();
# standardize line breaks
$text = str_replace(array("\r\n", "\r"), "\n", $text);
# remove surrounding line breaks
$text = trim($text, "\n");
# split text into lines
$lines = explode("\n", $text);
# iterate through lines to identify blocks
return $this->linesElements($lines);
}
#
# Setters
#
function setBreaksEnabled($breaksEnabled)
{
$this->breaksEnabled = $breaksEnabled;
return $this;
}
protected $breaksEnabled;
function setMarkupEscaped($markupEscaped)
{
$this->markupEscaped = $markupEscaped;
return $this;
}
protected $markupEscaped;
function setUrlsLinked($urlsLinked)
{
$this->urlsLinked = $urlsLinked;
return $this;
}
protected $urlsLinked = true;
function setSafeMode($safeMode)
{
$this->safeMode = (bool) $safeMode;
return $this;
}
protected $safeMode;
function setStrictMode($strictMode)
{
$this->strictMode = (bool) $strictMode;
return $this;
}
protected $strictMode;
protected $safeLinksWhitelist = array(
'http://',
'https://',
'ftp://',
'ftps://',
'mailto:',
'tel:',
'data:image/png;base64,',
'data:image/gif;base64,',
'data:image/jpeg;base64,',
'irc:',
'ircs:',
'git:',
'ssh:',
'news:',
'steam:',
);
#
# Lines
#
protected $BlockTypes = array(
'#' => array('Header'),
'*' => array('Rule', 'List'),
'+' => array('List'),
'-' => array('SetextHeader', 'Table', 'Rule', 'List'),
'0' => array('List'),
'1' => array('List'),
'2' => array('List'),
'3' => array('List'),
'4' => array('List'),
'5' => array('List'),
'6' => array('List'),
'7' => array('List'),
'8' => array('List'),
'9' => array('List'),
':' => array('Table'),
'<' => array('Comment', 'Markup'),
'=' => array('SetextHeader'),
'>' => array('Quote'),
'[' => array('Reference'),
'_' => array('Rule'),
'`' => array('FencedCode'),
'|' => array('Table'),
'~' => array('FencedCode'),
);
# ~
protected $unmarkedBlockTypes = array(
'Code',
);
#
# Blocks
#
protected function lines(array $lines)
{
return $this->elements($this->linesElements($lines));
}
protected function linesElements(array $lines)
{
$Elements = array();
$CurrentBlock = null;
foreach ($lines as $line)
{
if (chop($line) === '')
{
if (isset($CurrentBlock))
{
$CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted'])
? $CurrentBlock['interrupted'] + 1 : 1
);
}
continue;
}
while (($beforeTab = strstr($line, "\t", true)) !== false)
{
$shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4;
$line = $beforeTab
. str_repeat(' ', $shortage)
. substr($line, strlen($beforeTab) + 1)
;
}
$indent = strspn($line, ' ');
$text = $indent > 0 ? substr($line, $indent) : $line;
# ~
$Line = array('body' => $line, 'indent' => $indent, 'text' => $text);
# ~
if (isset($CurrentBlock['continuable']))
{
$methodName = 'block' . $CurrentBlock['type'] . 'Continue';
$Block = $this->$methodName($Line, $CurrentBlock);
if (isset($Block))
{
$CurrentBlock = $Block;
continue;
}
else
{
if ($this->isBlockCompletable($CurrentBlock['type']))
{
$methodName = 'block' . $CurrentBlock['type'] . 'Complete';
$CurrentBlock = $this->$methodName($CurrentBlock);
}
}
}
# ~
$marker = $text[0];
# ~
$blockTypes = $this->unmarkedBlockTypes;
if (isset($this->BlockTypes[$marker]))
{
foreach ($this->BlockTypes[$marker] as $blockType)
{
$blockTypes []= $blockType;
}
}
#
# ~
foreach ($blockTypes as $blockType)
{
$Block = $this->{"block$blockType"}($Line, $CurrentBlock);
if (isset($Block))
{
$Block['type'] = $blockType;
if ( ! isset($Block['identified']))
{
if (isset($CurrentBlock))
{
$Elements[] = $this->extractElement($CurrentBlock);
}
$Block['identified'] = true;
}
if ($this->isBlockContinuable($blockType))
{
$Block['continuable'] = true;
}
$CurrentBlock = $Block;
continue 2;
}
}
# ~
if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph')
{
$Block = $this->paragraphContinue($Line, $CurrentBlock);
}
if (isset($Block))
{
$CurrentBlock = $Block;
}
else
{
if (isset($CurrentBlock))
{
$Elements[] = $this->extractElement($CurrentBlock);
}
$CurrentBlock = $this->paragraph($Line);
$CurrentBlock['identified'] = true;
}
}
# ~
if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type']))
{
$methodName = 'block' . $CurrentBlock['type'] . 'Complete';
$CurrentBlock = $this->$methodName($CurrentBlock);
}
# ~
if (isset($CurrentBlock))
{
$Elements[] = $this->extractElement($CurrentBlock);
}
# ~
return $Elements;
}
protected function extractElement(array $Component)
{
if ( ! isset($Component['element']))
{
if (isset($Component['markup']))
{
$Component['element'] = array('rawHtml' => $Component['markup']);
}
elseif (isset($Component['hidden']))
{
$Component['element'] = array();
}
}
return $Component['element'];
}
protected function isBlockContinuable($Type)
{
return method_exists($this, 'block' . $Type . 'Continue');
}
protected function isBlockCompletable($Type)
{
return method_exists($this, 'block' . $Type . 'Complete');
}
#
# Code
protected function blockCode($Line, $Block = null)
{
if (isset($Block) and $Block['type'] === 'Paragraph' and ! isset($Block['interrupted']))
{
return;
}
if ($Line['indent'] >= 4)
{
$text = substr($Line['body'], 4);
$Block = array(
'element' => array(
'name' => 'pre',
'element' => array(
'name' => 'code',
'text' => $text,
),
),
);
return $Block;
}
}
protected function blockCodeContinue($Line, $Block)
{
if ($Line['indent'] >= 4)
{
if (isset($Block['interrupted']))
{
$Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']);
unset($Block['interrupted']);
}
$Block['element']['element']['text'] .= "\n";
$text = substr($Line['body'], 4);
$Block['element']['element']['text'] .= $text;
return $Block;
}
}
protected function blockCodeComplete($Block)
{
return $Block;
}
#
# Comment
protected function blockComment($Line)
{
if ($this->markupEscaped or $this->safeMode)
{
return;
}
if (strpos($Line['text'], '') !== false)
{
$Block['closed'] = true;
}
return $Block;
}
}
protected function blockCommentContinue($Line, array $Block)
{
if (isset($Block['closed']))
{
return;
}
$Block['element']['rawHtml'] .= "\n" . $Line['body'];
if (strpos($Line['text'], '-->') !== false)
{
$Block['closed'] = true;
}
return $Block;
}
#
# Fenced Code
protected function blockFencedCode($Line)
{
$marker = $Line['text'][0];
$openerLength = strspn($Line['text'], $marker);
if ($openerLength < 3)
{
return;
}
$infostring = trim(substr($Line['text'], $openerLength), "\t ");
if (strpos($infostring, '`') !== false)
{
return;
}
$Element = array(
'name' => 'code',
'text' => '',
);
if ($infostring !== '')
{
/**
* https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes
* Every HTML element may have a class attribute specified.
* The attribute, if specified, must have a value that is a set
* of space-separated tokens representing the various classes
* that the element belongs to.
* [...]
* The space characters, for the purposes of this specification,
* are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab),
* U+000A LINE FEED (LF), U+000C FORM FEED (FF), and
* U+000D CARRIAGE RETURN (CR).
*/
$language = substr($infostring, 0, strcspn($infostring, " \t\n\f\r"));
$Element['attributes'] = array('class' => "language-$language");
}
$Block = array(
'char' => $marker,
'openerLength' => $openerLength,
'element' => array(
'name' => 'pre',
'element' => $Element,
),
);
return $Block;
}
protected function blockFencedCodeContinue($Line, $Block)
{
if (isset($Block['complete']))
{
return;
}
if (isset($Block['interrupted']))
{
$Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']);
unset($Block['interrupted']);
}
if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength']
and chop(substr($Line['text'], $len), ' ') === ''
) {
$Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1);
$Block['complete'] = true;
return $Block;
}
$Block['element']['element']['text'] .= "\n" . $Line['body'];
return $Block;
}
protected function blockFencedCodeComplete($Block)
{
return $Block;
}
#
# Header
protected function blockHeader($Line)
{
$level = strspn($Line['text'], '#');
if ($level > 6)
{
return;
}
$text = trim($Line['text'], '#');
if ($this->strictMode and isset($text[0]) and $text[0] !== ' ')
{
return;
}
$text = trim($text, ' ');
$Block = array(
'element' => array(
'name' => 'h' . $level,
'handler' => array(
'function' => 'lineElements',
'argument' => $text,
'destination' => 'elements',
)
),
);
return $Block;
}
#
# List
protected function blockList($Line, ?array $CurrentBlock = null)
{
list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]');
if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches))
{
$contentIndent = strlen($matches[2]);
if ($contentIndent >= 5)
{
$contentIndent -= 1;
$matches[1] = substr($matches[1], 0, -$contentIndent);
$matches[3] = str_repeat(' ', $contentIndent) . $matches[3];
}
elseif ($contentIndent === 0)
{
$matches[1] .= ' ';
}
$markerWithoutWhitespace = strstr($matches[1], ' ', true);
$Block = array(
'indent' => $Line['indent'],
'pattern' => $pattern,
'data' => array(
'type' => $name,
'marker' => $matches[1],
'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)),
),
'element' => array(
'name' => $name,
'elements' => array(),
),
);
$Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/');
if ($name === 'ol')
{
$listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0';
if ($listStart !== '1')
{
if (
isset($CurrentBlock)
and $CurrentBlock['type'] === 'Paragraph'
and ! isset($CurrentBlock['interrupted'])
) {
return;
}
$Block['element']['attributes'] = array('start' => $listStart);
}
}
$Block['li'] = array(
'name' => 'li',
'handler' => array(
'function' => 'li',
'argument' => !empty($matches[3]) ? array($matches[3]) : array(),
'destination' => 'elements'
)
);
$Block['element']['elements'] []= & $Block['li'];
return $Block;
}
}
protected function blockListContinue($Line, array $Block)
{
if (isset($Block['interrupted']) and empty($Block['li']['handler']['argument']))
{
return null;
}
$requiredIndent = ($Block['indent'] + strlen($Block['data']['marker']));
if ($Line['indent'] < $requiredIndent
and (
(
$Block['data']['type'] === 'ol'
and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches)
) or (
$Block['data']['type'] === 'ul'
and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches)
)
)
) {
if (isset($Block['interrupted']))
{
$Block['li']['handler']['argument'] []= '';
$Block['loose'] = true;
unset($Block['interrupted']);
}
unset($Block['li']);
$text = isset($matches[1]) ? $matches[1] : '';
$Block['indent'] = $Line['indent'];
$Block['li'] = array(
'name' => 'li',
'handler' => array(
'function' => 'li',
'argument' => array($text),
'destination' => 'elements'
)
);
$Block['element']['elements'] []= & $Block['li'];
return $Block;
}
elseif ($Line['indent'] < $requiredIndent and $this->blockList($Line))
{
return null;
}
if ($Line['text'][0] === '[' and $this->blockReference($Line))
{
return $Block;
}
if ($Line['indent'] >= $requiredIndent)
{
if (isset($Block['interrupted']))
{
$Block['li']['handler']['argument'] []= '';
$Block['loose'] = true;
unset($Block['interrupted']);
}
$text = substr($Line['body'], $requiredIndent);
$Block['li']['handler']['argument'] []= $text;
return $Block;
}
if ( ! isset($Block['interrupted']))
{
$text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']);
$Block['li']['handler']['argument'] []= $text;
return $Block;
}
}
protected function blockListComplete(array $Block)
{
if (isset($Block['loose']))
{
foreach ($Block['element']['elements'] as &$li)
{
if (end($li['handler']['argument']) !== '')
{
$li['handler']['argument'] []= '';
}
}
}
return $Block;
}
#
# Quote
protected function blockQuote($Line)
{
if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches))
{
$Block = array(
'element' => array(
'name' => 'blockquote',
'handler' => array(
'function' => 'linesElements',
'argument' => (array) $matches[1],
'destination' => 'elements',
)
),
);
return $Block;
}
}
protected function blockQuoteContinue($Line, array $Block)
{
if (isset($Block['interrupted']))
{
return;
}
if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches))
{
$Block['element']['handler']['argument'] []= $matches[1];
return $Block;
}
if ( ! isset($Block['interrupted']))
{
$Block['element']['handler']['argument'] []= $Line['text'];
return $Block;
}
}
#
# Rule
protected function blockRule($Line)
{
$marker = $Line['text'][0];
if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '')
{
$Block = array(
'element' => array(
'name' => 'hr',
),
);
return $Block;
}
}
#
# Setext
protected function blockSetextHeader($Line, ?array $Block = null)
{
if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted']))
{
return;
}
if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '')
{
$Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
return $Block;
}
}
#
# Markup
protected function blockMarkup($Line)
{
if ($this->markupEscaped or $this->safeMode)
{
return;
}
if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches))
{
$element = strtolower($matches[1]);
if (in_array($element, $this->textLevelElements))
{
return;
}
$Block = array(
'name' => $matches[1],
'element' => array(
'rawHtml' => $Line['text'],
'autobreak' => true,
),
);
return $Block;
}
}
protected function blockMarkupContinue($Line, array $Block)
{
if (isset($Block['closed']) or isset($Block['interrupted']))
{
return;
}
$Block['element']['rawHtml'] .= "\n" . $Line['body'];
return $Block;
}
#
# Reference
protected function blockReference($Line)
{
if (strpos($Line['text'], ']') !== false
and preg_match('/^\[(.+?)\]:[ ]*+(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches)
) {
$id = strtolower($matches[1]);
$Data = array(
'url' => $matches[2],
'title' => isset($matches[3]) ? $matches[3] : null,
);
$this->DefinitionData['Reference'][$id] = $Data;
$Block = array(
'element' => array(),
);
return $Block;
}
}
#
# Table
protected function blockTable($Line, ?array $Block = null)
{
if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted']))
{
return;
}
if (
strpos($Block['element']['handler']['argument'], '|') === false
and strpos($Line['text'], '|') === false
and strpos($Line['text'], ':') === false
or strpos($Block['element']['handler']['argument'], "\n") !== false
) {
return;
}
if (chop($Line['text'], ' -:|') !== '')
{
return;
}
$alignments = array();
$divider = $Line['text'];
$divider = trim($divider);
$divider = trim($divider, '|');
$dividerCells = explode('|', $divider);
foreach ($dividerCells as $dividerCell)
{
$dividerCell = trim($dividerCell);
if ($dividerCell === '')
{
return;
}
$alignment = null;
if ($dividerCell[0] === ':')
{
$alignment = 'left';
}
if (substr($dividerCell, - 1) === ':')
{
$alignment = $alignment === 'left' ? 'center' : 'right';
}
$alignments []= $alignment;
}
# ~
$HeaderElements = array();
$header = $Block['element']['handler']['argument'];
$header = trim($header);
$header = trim($header, '|');
$headerCells = explode('|', $header);
if (count($headerCells) !== count($alignments))
{
return;
}
foreach ($headerCells as $index => $headerCell)
{
$headerCell = trim($headerCell);
$HeaderElement = array(
'name' => 'th',
'handler' => array(
'function' => 'lineElements',
'argument' => $headerCell,
'destination' => 'elements',
)
);
if (isset($alignments[$index]))
{
$alignment = $alignments[$index];
$HeaderElement['attributes'] = array(
'style' => "text-align: $alignment;",
);
}
$HeaderElements []= $HeaderElement;
}
# ~
$Block = array(
'alignments' => $alignments,
'identified' => true,
'element' => array(
'name' => 'table',
'elements' => array(),
),
);
$Block['element']['elements'] []= array(
'name' => 'thead',
);
$Block['element']['elements'] []= array(
'name' => 'tbody',
'elements' => array(),
);
$Block['element']['elements'][0]['elements'] []= array(
'name' => 'tr',
'elements' => $HeaderElements,
);
return $Block;
}
protected function blockTableContinue($Line, array $Block)
{
if (isset($Block['interrupted']))
{
return;
}
if (count($Block['alignments']) === 1 or $Line['text'][0] === '|' or strpos($Line['text'], '|'))
{
$Elements = array();
$row = $Line['text'];
$row = trim($row);
$row = trim($row, '|');
preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches);
$cells = array_slice($matches[0], 0, count($Block['alignments']));
foreach ($cells as $index => $cell)
{
$cell = trim($cell);
$Element = array(
'name' => 'td',
'handler' => array(
'function' => 'lineElements',
'argument' => $cell,
'destination' => 'elements',
)
);
if (isset($Block['alignments'][$index]))
{
$Element['attributes'] = array(
'style' => 'text-align: ' . $Block['alignments'][$index] . ';',
);
}
$Elements []= $Element;
}
$Element = array(
'name' => 'tr',
'elements' => $Elements,
);
$Block['element']['elements'][1]['elements'] []= $Element;
return $Block;
}
}
#
# ~
#
protected function paragraph($Line)
{
return array(
'type' => 'Paragraph',
'element' => array(
'name' => 'p',
'handler' => array(
'function' => 'lineElements',
'argument' => $Line['text'],
'destination' => 'elements',
),
),
);
}
protected function paragraphContinue($Line, array $Block)
{
if (isset($Block['interrupted']))
{
return;
}
$Block['element']['handler']['argument'] .= "\n".$Line['text'];
return $Block;
}
#
# Inline Elements
#
protected $InlineTypes = array(
'!' => array('Image'),
'&' => array('SpecialCharacter'),
'*' => array('Emphasis'),
':' => array('Url'),
'<' => array('UrlTag', 'EmailTag', 'Markup'),
'[' => array('Link'),
'_' => array('Emphasis'),
'`' => array('Code'),
'~' => array('Strikethrough'),
'\\' => array('EscapeSequence'),
);
# ~
protected $inlineMarkerList = '!*_&[:<`~\\';
#
# ~
#
public function line($text, $nonNestables = array())
{
return $this->elements($this->lineElements($text, $nonNestables));
}
protected function lineElements($text, $nonNestables = array())
{
# standardize line breaks
$text = str_replace(array("\r\n", "\r"), "\n", $text);
$Elements = array();
$nonNestables = (empty($nonNestables)
? array()
: array_combine($nonNestables, $nonNestables)
);
# $excerpt is based on the first occurrence of a marker
while ($excerpt = strpbrk($text, $this->inlineMarkerList))
{
$marker = $excerpt[0];
$markerPosition = strlen($text) - strlen($excerpt);
$Excerpt = array('text' => $excerpt, 'context' => $text);
foreach ($this->InlineTypes[$marker] as $inlineType)
{
# check to see if the current inline type is nestable in the current context
if (isset($nonNestables[$inlineType]))
{
continue;
}
$Inline = $this->{"inline$inlineType"}($Excerpt);
if ( ! isset($Inline))
{
continue;
}
# makes sure that the inline belongs to "our" marker
if (isset($Inline['position']) and $Inline['position'] > $markerPosition)
{
continue;
}
# sets a default inline position
if ( ! isset($Inline['position']))
{
$Inline['position'] = $markerPosition;
}
# cause the new element to 'inherit' our non nestables
$Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables'])
? array_merge($Inline['element']['nonNestables'], $nonNestables)
: $nonNestables
;
# the text that comes before the inline
$unmarkedText = substr($text, 0, $Inline['position']);
# compile the unmarked text
$InlineText = $this->inlineText($unmarkedText);
$Elements[] = $InlineText['element'];
# compile the inline
$Elements[] = $this->extractElement($Inline);
# remove the examined text
$text = substr($text, $Inline['position'] + $Inline['extent']);
continue 2;
}
# the marker does not belong to an inline
$unmarkedText = substr($text, 0, $markerPosition + 1);
$InlineText = $this->inlineText($unmarkedText);
$Elements[] = $InlineText['element'];
$text = substr($text, $markerPosition + 1);
}
$InlineText = $this->inlineText($text);
$Elements[] = $InlineText['element'];
foreach ($Elements as &$Element)
{
if ( ! isset($Element['autobreak']))
{
$Element['autobreak'] = false;
}
}
return $Elements;
}
#
# ~
#
protected function inlineText($text)
{
$Inline = array(
'extent' => strlen($text),
'element' => array(),
);
$Inline['element']['elements'] = self::pregReplaceElements(
$this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/',
array(
array('name' => 'br'),
array('text' => "\n"),
),
$text
);
return $Inline;
}
protected function inlineCode($Excerpt)
{
$marker = $Excerpt['text'][0];
if (preg_match('/^(['.$marker.']++)[ ]*+(.+?)[ ]*+(? strlen($matches[0]),
'element' => array(
'name' => 'code',
'text' => $text,
),
);
}
}
protected function inlineEmailTag($Excerpt)
{
$hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
$commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@'
. $hostnameLabel . '(?:\.' . $hostnameLabel . ')*';
if (strpos($Excerpt['text'], '>') !== false
and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches)
){
$url = $matches[1];
if ( ! isset($matches[2]))
{
$url = "mailto:$url";
}
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'a',
'text' => $matches[1],
'attributes' => array(
'href' => $url,
),
),
);
}
}
protected function inlineEmphasis($Excerpt)
{
if ( ! isset($Excerpt['text'][1]))
{
return;
}
$marker = $Excerpt['text'][0];
if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches))
{
$emphasis = 'strong';
}
elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches))
{
$emphasis = 'em';
}
else
{
return;
}
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => $emphasis,
'handler' => array(
'function' => 'lineElements',
'argument' => $matches[1],
'destination' => 'elements',
)
),
);
}
protected function inlineEscapeSequence($Excerpt)
{
if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters))
{
return array(
'element' => array('rawHtml' => $Excerpt['text'][1]),
'extent' => 2,
);
}
}
protected function inlineImage($Excerpt)
{
if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[')
{
return;
}
$Excerpt['text']= substr($Excerpt['text'], 1);
$Link = $this->inlineLink($Excerpt);
if ($Link === null)
{
return;
}
$Inline = array(
'extent' => $Link['extent'] + 1,
'element' => array(
'name' => 'img',
'attributes' => array(
'src' => $Link['element']['attributes']['href'],
'alt' => $Link['element']['handler']['argument'],
),
'autobreak' => true,
),
);
$Inline['element']['attributes'] += $Link['element']['attributes'];
unset($Inline['element']['attributes']['href']);
return $Inline;
}
protected function inlineLink($Excerpt)
{
$Element = array(
'name' => 'a',
'handler' => array(
'function' => 'lineElements',
'argument' => null,
'destination' => 'elements',
),
'nonNestables' => array('Url', 'Link'),
'attributes' => array(
'href' => null,
'title' => null,
),
);
$extent = 0;
$remainder = $Excerpt['text'];
if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
{
$Element['handler']['argument'] = $matches[1];
$extent += strlen($matches[0]);
$remainder = substr($remainder, $extent);
}
else
{
return;
}
if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches))
{
$Element['attributes']['href'] = $matches[1];
if (isset($matches[2]))
{
$Element['attributes']['title'] = substr($matches[2], 1, - 1);
}
$extent += strlen($matches[0]);
}
else
{
if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches))
{
$definition = strlen($matches[1]) ? $matches[1] : $Element['handler']['argument'];
$definition = strtolower($definition);
$extent += strlen($matches[0]);
}
else
{
$definition = strtolower($Element['handler']['argument']);
}
if ( ! isset($this->DefinitionData['Reference'][$definition]))
{
return;
}
$Definition = $this->DefinitionData['Reference'][$definition];
$Element['attributes']['href'] = $Definition['url'];
$Element['attributes']['title'] = $Definition['title'];
}
return array(
'extent' => $extent,
'element' => $Element,
);
}
protected function inlineMarkup($Excerpt)
{
if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
{
return;
}
if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches))
{
return array(
'element' => array('rawHtml' => $matches[0]),
'extent' => strlen($matches[0]),
);
}
if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches))
{
return array(
'element' => array('rawHtml' => $matches[0]),
'extent' => strlen($matches[0]),
);
}
if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches))
{
return array(
'element' => array('rawHtml' => $matches[0]),
'extent' => strlen($matches[0]),
);
}
}
protected function inlineSpecialCharacter($Excerpt)
{
if (substr($Excerpt['text'], 1, 1) !== ' ' and strpos($Excerpt['text'], ';') !== false
and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches)
) {
return array(
'element' => array('rawHtml' => '&' . $matches[1] . ';'),
'extent' => strlen($matches[0]),
);
}
}
protected function inlineStrikethrough($Excerpt)
{
if ( ! isset($Excerpt['text'][1]))
{
return;
}
if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches))
{
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'del',
'handler' => array(
'function' => 'lineElements',
'argument' => $matches[1],
'destination' => 'elements',
)
),
);
}
}
protected function inlineUrl($Excerpt)
{
if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/')
{
return;
}
if (strpos($Excerpt['context'], 'http') !== false
and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)
) {
$url = $matches[0][0];
$Inline = array(
'extent' => strlen($matches[0][0]),
'position' => $matches[0][1],
'element' => array(
'name' => 'a',
'text' => $url,
'attributes' => array(
'href' => $url,
),
),
);
return $Inline;
}
}
protected function inlineUrlTag($Excerpt)
{
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches))
{
$url = $matches[1];
return array(
'extent' => strlen($matches[0]),
'element' => array(
'name' => 'a',
'text' => $url,
'attributes' => array(
'href' => $url,
),
),
);
}
}
# ~
protected function unmarkedText($text)
{
$Inline = $this->inlineText($text);
return $this->element($Inline['element']);
}
#
# Handlers
#
protected function handle(array $Element)
{
if (isset($Element['handler']))
{
if (!isset($Element['nonNestables']))
{
$Element['nonNestables'] = array();
}
if (is_string($Element['handler']))
{
$function = $Element['handler'];
$argument = $Element['text'];
unset($Element['text']);
$destination = 'rawHtml';
}
else
{
$function = $Element['handler']['function'];
$argument = $Element['handler']['argument'];
$destination = $Element['handler']['destination'];
}
$Element[$destination] = $this->{$function}($argument, $Element['nonNestables']);
if ($destination === 'handler')
{
$Element = $this->handle($Element);
}
unset($Element['handler']);
}
return $Element;
}
protected function handleElementRecursive(array $Element)
{
return $this->elementApplyRecursive(array($this, 'handle'), $Element);
}
protected function handleElementsRecursive(array $Elements)
{
return $this->elementsApplyRecursive(array($this, 'handle'), $Elements);
}
protected function elementApplyRecursive($closure, array $Element)
{
$Element = call_user_func($closure, $Element);
if (isset($Element['elements']))
{
$Element['elements'] = $this->elementsApplyRecursive($closure, $Element['elements']);
}
elseif (isset($Element['element']))
{
$Element['element'] = $this->elementApplyRecursive($closure, $Element['element']);
}
return $Element;
}
protected function elementApplyRecursiveDepthFirst($closure, array $Element)
{
if (isset($Element['elements']))
{
$Element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['elements']);
}
elseif (isset($Element['element']))
{
$Element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['element']);
}
$Element = call_user_func($closure, $Element);
return $Element;
}
protected function elementsApplyRecursive($closure, array $Elements)
{
foreach ($Elements as &$Element)
{
$Element = $this->elementApplyRecursive($closure, $Element);
}
return $Elements;
}
protected function elementsApplyRecursiveDepthFirst($closure, array $Elements)
{
foreach ($Elements as &$Element)
{
$Element = $this->elementApplyRecursiveDepthFirst($closure, $Element);
}
return $Elements;
}
protected function element(array $Element)
{
if ($this->safeMode)
{
$Element = $this->sanitiseElement($Element);
}
# identity map if element has no handler
$Element = $this->handle($Element);
$hasName = isset($Element['name']);
$markup = '';
if ($hasName)
{
$markup .= '<' . $Element['name'];
if (isset($Element['attributes']))
{
foreach ($Element['attributes'] as $name => $value)
{
if ($value === null)
{
continue;
}
$markup .= " $name=\"".self::escape($value).'"';
}
}
}
$permitRawHtml = false;
if (isset($Element['text']))
{
$text = $Element['text'];
}
// very strongly consider an alternative if you're writing an
// extension
elseif (isset($Element['rawHtml']))
{
$text = $Element['rawHtml'];
$allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
$permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
}
$hasContent = isset($text) || isset($Element['element']) || isset($Element['elements']);
if ($hasContent)
{
$markup .= $hasName ? '>' : '';
if (isset($Element['elements']))
{
$markup .= $this->elements($Element['elements']);
}
elseif (isset($Element['element']))
{
$markup .= $this->element($Element['element']);
}
else
{
if (!$permitRawHtml)
{
$markup .= self::escape($text, true);
}
else
{
$markup .= $text;
}
}
$markup .= $hasName ? '' . $Element['name'] . '>' : '';
}
elseif ($hasName)
{
$markup .= ' />';
}
return $markup;
}
protected function elements(array $Elements)
{
$markup = '';
$autoBreak = true;
foreach ($Elements as $Element)
{
if (empty($Element))
{
continue;
}
$autoBreakNext = (isset($Element['autobreak'])
? $Element['autobreak'] : isset($Element['name'])
);
// (autobreak === false) covers both sides of an element
$autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext;
$markup .= ($autoBreak ? "\n" : '') . $this->element($Element);
$autoBreak = $autoBreakNext;
}
$markup .= $autoBreak ? "\n" : '';
return $markup;
}
# ~
protected function li($lines)
{
$Elements = $this->linesElements($lines);
if ( ! in_array('', $lines)
and isset($Elements[0]) and isset($Elements[0]['name'])
and $Elements[0]['name'] === 'p'
) {
unset($Elements[0]['name']);
}
return $Elements;
}
#
# AST Convenience
#
/**
* Replace occurrences $regexp with $Elements in $text. Return an array of
* elements representing the replacement.
*/
protected static function pregReplaceElements($regexp, $Elements, $text)
{
$newElements = array();
while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE))
{
$offset = $matches[0][1];
$before = substr($text, 0, $offset);
$after = substr($text, $offset + strlen($matches[0][0]));
$newElements[] = array('text' => $before);
foreach ($Elements as $Element)
{
$newElements[] = $Element;
}
$text = $after;
}
$newElements[] = array('text' => $text);
return $newElements;
}
#
# Deprecated Methods
#
/**
* @deprecated use text() instead
*/
function parse($text)
{
$markup = $this->text($text);
return $markup;
}
protected function sanitiseElement(array $Element)
{
static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
static $safeUrlNameToAtt = array(
'a' => 'href',
'img' => 'src',
);
if ( ! isset($Element['name']))
{
unset($Element['attributes']);
return $Element;
}
if (isset($safeUrlNameToAtt[$Element['name']]))
{
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
}
if ( ! empty($Element['attributes']))
{
foreach ($Element['attributes'] as $att => $val)
{
# filter out badly parsed attribute
if ( ! preg_match($goodAttribute, $att))
{
unset($Element['attributes'][$att]);
}
# dump onevent attribute
elseif (self::striAtStart($att, 'on'))
{
unset($Element['attributes'][$att]);
}
}
}
return $Element;
}
protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
{
foreach ($this->safeLinksWhitelist as $scheme)
{
if (self::striAtStart($Element['attributes'][$attribute], $scheme))
{
return $Element;
}
}
$Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
return $Element;
}
#
# Static Methods
#
protected static function escape($text, $allowQuotes = false)
{
return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
}
protected static function striAtStart($string, $needle)
{
$len = strlen($needle);
if ($len > strlen($string))
{
return false;
}
else
{
return strtolower(substr($string, 0, $len)) === strtolower($needle);
}
}
static function instance($name = 'default')
{
if (isset(self::$instances[$name]))
{
return self::$instances[$name];
}
$instance = new static();
self::$instances[$name] = $instance;
return $instance;
}
private static $instances = array();
#
# Fields
#
protected $DefinitionData;
#
# Read-Only
protected $specialCharacters = array(
'\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', '~'
);
protected $StrongRegex = array(
'*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s',
'_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us',
);
protected $EmRegex = array(
'*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s',
'_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us',
);
protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+';
protected $voidElements = array(
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source',
);
protected $textLevelElements = array(
'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont',
'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
'i', 'rp', 'del', 'code', 'strike', 'marquee',
'q', 'rt', 'ins', 'font', 'strong',
's', 'tt', 'kbd', 'mark',
'u', 'xm', 'sub', 'nobr',
'sup', 'ruby',
'var', 'span',
'wbr', 'time',
);
}
================================================
FILE: composer.json
================================================
{
"name": "erusev/parsedown",
"description": "Parser for Markdown.",
"keywords": ["markdown", "parser"],
"homepage": "http://parsedown.org",
"type": "library",
"license": "MIT",
"authors": [
{
"name": "Emanuil Rusev",
"email": "hello@erusev.com",
"homepage": "http://erusev.com"
}
],
"require": {
"php": ">=7.2",
"ext-mbstring": "*"
},
"require-dev": {
"phpunit/phpunit": "^8.5.52|^9.6.33"
},
"autoload": {
"psr-0": { "Parsedown": "" }
},
"autoload-dev": {
"psr-0": {
"TestParsedown": "test/",
"ParsedownTest": "test/",
"CommonMarkTest": "test/",
"CommonMarkTestWeak": "test/"
}
}
}
================================================
FILE: phpunit.xml.dist
================================================
Hello Parsedown!
``` You can also parse inline markdown only: ```php echo $Parsedown->line('Hello _Parsedown_!'); # prints: Hello Parsedown! ``` More examples in [the wiki](https://github.com/erusev/parsedown/wiki/) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). ## Security Parsedown is capable of escaping user-input within the HTML that it generates. Additionally Parsedown will apply sanitisation to additional scripting vectors (such as scripting link destinations) that are introduced by the markdown syntax itself. To tell Parsedown that it is processing untrusted user-input, use the following: ```php $Parsedown->setSafeMode(true); ``` If instead, you wish to allow HTML within untrusted user-input, but still want output to be free from XSS it is recommended that you make use of a HTML sanitiser that allows HTML tags to be whitelisted, like [HTML Purifier](http://htmlpurifier.org/). In both cases you should strongly consider employing defence-in-depth measures, like [deploying a Content-Security-Policy](https://scotthelme.co.uk/content-security-policy-an-introduction/) (a browser security feature) so that your page is likely to be safe even if an attacker finds a vulnerability in one of the first lines of defence above. Safe mode does not necessarily yield safe results when using extensions to Parsedown. Extensions should be evaluated on their own to determine their specific safety against XSS. ## Escaping HTML > WARNING: This method is not safe from XSS! If you wish to escape HTML in trusted input, you can use the following: ```php $Parsedown->setMarkupEscaped(true); ``` Beware that this still allows users to insert unsafe scripting vectors, ex: `[xss](javascript:alert%281%29)`. ## Questions **How does Parsedown work?** It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line starts with a `-` then perhaps it belongs to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. **Is it compliant with CommonMark?** It passes most of the CommonMark tests. Most of the tests that don't pass deal with cases that are quite uncommon. Still, as CommonMark matures, compliance should improve. **Who uses it?** [Laravel Framework](https://laravel.com/), [Bolt CMS](http://bolt.cm/), [Grav CMS](http://getgrav.org/), [Herbie CMS](http://www.getherbie.org/), [Kirby CMS](http://getkirby.com/), [October CMS](http://octobercms.com/), [Pico CMS](http://picocms.org), [Statamic CMS](http://www.statamic.com/), [phpDocumentor](http://www.phpdoc.org/), [RaspberryPi.org](http://www.raspberrypi.org/), [Symfony Demo](https://github.com/symfony/demo) and [more](https://packagist.org/packages/erusev/parsedown/dependents). **How can I help?** Use it, star it, share it and if you feel generous, [sponsor me](https://github.com/sponsors/erusev). **What else should I know?** I also make [Nota](https://nota.md/) — a notes app designed for local Markdown files. ================================================ FILE: test/CommonMarkTestStrict.php ================================================ parsedown = new TestParsedown(); $this->parsedown->setUrlsLinked(false); } /** * @dataProvider data * @param $id * @param $section * @param $markdown * @param $expectedHtml */ public function testExample($id, $section, $markdown, $expectedHtml) { $actualHtml = $this->parsedown->text($markdown); $this->assertEquals($expectedHtml, $actualHtml); } /** * @return array */ public function data() { $spec = file_get_contents(self::SPEC_URL); if ($spec === false) { $this->fail('Unable to load CommonMark spec from ' . self::SPEC_URL); } $spec = str_replace("\r\n", "\n", $spec); $spec = strstr($spec, '', true); $matches = array(); preg_match_all('/^`{32} example\n((?s).*?)\n\.\n(?:|((?s).*?)\n)`{32}$|^#{1,6} *(.*?)$/m', $spec, $matches, PREG_SET_ORDER); $data = array(); $currentId = 0; $currentSection = ''; foreach ($matches as $match) { if (isset($match[3])) { $currentSection = $match[3]; } else { $currentId++; $markdown = str_replace('→', "\t", $match[1]); $expectedHtml = isset($match[2]) ? str_replace('→', "\t", $match[2]) : ''; $data[$currentId] = array( 'id' => $currentId, 'section' => $currentSection, 'markdown' => $markdown, 'expectedHtml' => $expectedHtml ); } } return $data; } } ================================================ FILE: test/CommonMarkTestWeak.php ================================================ parsedown->getTextLevelElements(); array_walk($textLevelElements, function (&$element) { $element = preg_quote($element, '/'); }); $this->textLevelElementRegex = '\b(?:' . implode('|', $textLevelElements) . ')\b'; } /** * @dataProvider data * @param $id * @param $section * @param $markdown * @param $expectedHtml */ public function testExample($id, $section, $markdown, $expectedHtml) { $expectedHtml = $this->cleanupHtml($expectedHtml); $actualHtml = $this->parsedown->text($markdown); $actualHtml = $this->cleanupHtml($actualHtml); $this->assertEquals($expectedHtml, $actualHtml); } protected function cleanupHtml($markup) { // invisible whitespaces at the beginning and end of block elements // however, whitespaces at the beginning of elements do matter
$markup = preg_replace(
array(
'/(<(?!(?:' . $this->textLevelElementRegex . '|\bpre\b))\w+\b[^>]*>(?:<' . $this->textLevelElementRegex . '[^>]*>)*)\s+/s',
'/\s+((?:<\/' . $this->textLevelElementRegex . '>)*<\/(?!' . $this->textLevelElementRegex . ')\w+\b>)/s'
),
'$1',
$markup
);
return $markup;
}
}
================================================
FILE: test/ParsedownTest.php
================================================
dirs = $this->initDirs();
$this->Parsedown = $this->initParsedown();
parent::__construct($name, $data, $dataName);
}
private $dirs;
protected $Parsedown;
/**
* @return array
*/
protected function initDirs()
{
$dirs []= dirname(__FILE__).'/data/';
return $dirs;
}
/**
* @return Parsedown
*/
protected function initParsedown()
{
$Parsedown = new TestParsedown();
return $Parsedown;
}
/**
* @dataProvider data
* @param $test
* @param $dir
*/
function test_($test, $dir)
{
$markdown = file_get_contents($dir . $test . '.md');
$expectedMarkup = file_get_contents($dir . $test . '.html');
$expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
$expectedMarkup = str_replace("\r", "\n", $expectedMarkup);
$this->Parsedown->setSafeMode(substr($test, 0, 3) === 'xss');
$this->Parsedown->setStrictMode(substr($test, 0, 6) === 'strict');
$actualMarkup = $this->Parsedown->text($markdown);
$this->assertEquals($expectedMarkup, $actualMarkup);
}
function testRawHtml()
{
$markdown = "```php\nfoobar\n```";
$expectedMarkup = 'foobar
';
$expectedSafeMarkup = '<p>foobar</p>
';
$unsafeExtension = new UnsafeExtension;
$actualMarkup = $unsafeExtension->text($markdown);
$this->assertEquals($expectedMarkup, $actualMarkup);
$unsafeExtension->setSafeMode(true);
$actualSafeMarkup = $unsafeExtension->text($markdown);
$this->assertEquals($expectedSafeMarkup, $actualSafeMarkup);
}
function testTrustDelegatedRawHtml()
{
$markdown = "```php\nfoobar\n```";
$expectedMarkup = 'foobar
';
$expectedSafeMarkup = $expectedMarkup;
$unsafeExtension = new TrustDelegatedExtension;
$actualMarkup = $unsafeExtension->text($markdown);
$this->assertEquals($expectedMarkup, $actualMarkup);
$unsafeExtension->setSafeMode(true);
$actualSafeMarkup = $unsafeExtension->text($markdown);
$this->assertEquals($expectedSafeMarkup, $actualSafeMarkup);
}
function data()
{
$data = array();
foreach ($this->dirs as $dir)
{
$Folder = new DirectoryIterator($dir);
foreach ($Folder as $File)
{
/** @var $File DirectoryIterator */
if ( ! $File->isFile())
{
continue;
}
$filename = $File->getFilename();
$extension = pathinfo($filename, PATHINFO_EXTENSION);
if ($extension !== 'md')
{
continue;
}
$basename = $File->getBasename('.md');
if (file_exists($dir . $basename . '.html'))
{
$data []= array($basename, $dir);
}
}
}
return $data;
}
public function test_no_markup()
{
$markdownWithHtml = <<_content_
sparse:
_content_
paragraph
comment
MARKDOWN_WITH_MARKUP;
$expectedHtml = <<<div>content</div>
sparse:
<div>
<div class="inner">
content
</div>
</div>
paragraph
<style type="text/css">
p {
color: red;
}
</style>
comment
<!-- html comment -->
EXPECTED_HTML;
$parsedownWithNoMarkup = new TestParsedown();
$parsedownWithNoMarkup->setMarkupEscaped(true);
$this->assertEquals($expectedHtml, $parsedownWithNoMarkup->text($markdownWithHtml));
}
public function testLateStaticBinding()
{
$parsedown = Parsedown::instance();
$this->assertInstanceOf('Parsedown', $parsedown);
// After instance is already called on Parsedown
// subsequent calls with the same arguments return the same instance
$sameParsedown = TestParsedown::instance();
$this->assertInstanceOf('Parsedown', $sameParsedown);
$this->assertSame($parsedown, $sameParsedown);
$testParsedown = TestParsedown::instance('test late static binding');
$this->assertInstanceOf('TestParsedown', $testParsedown);
$sameInstanceAgain = TestParsedown::instance('test late static binding');
$this->assertSame($testParsedown, $sameInstanceAgain);
}
}
================================================
FILE: test/SampleExtensions.php
================================================
$text";
return $Block;
}
}
class TrustDelegatedExtension extends Parsedown
{
protected function blockFencedCodeComplete($Block)
{
$text = $Block['element']['element']['text'];
unset($Block['element']['element']['text']);
// WARNING: There is almost always a better way of doing things!
//
// This behaviour is NOT needed in the demonstrated case.
// Only use this if you are sure that the result being added into
// rawHtml is safe.
// (e.g. using an external parser with escaping capabilities).
$Block['element']['element']['rawHtml'] = "$text
";
$Block['element']['element']['allowRawHtmlInSafeMode'] = true;
return $Block;
}
}
================================================
FILE: test/TestParsedown.php
================================================
textLevelElements;
}
}
================================================
FILE: test/data/aesthetic_table.html
================================================
header 1
header 2
cell 1.1
cell 1.2
cell 2.1
cell 2.2
================================================
FILE: test/data/aligned_table.html
================================================
header 1
header 2
header 2
cell 1.1
cell 1.2
cell 1.3
cell 2.1
cell 2.2
cell 2.3
================================================
FILE: test/data/atx_heading.html
================================================
h1
h2
h3
h4
h5
h6
####### not a heading
closed h1
# of levels
# of levels #
heading
================================================
FILE: test/data/automatic_link.html
================================================
================================================
FILE: test/data/block-level_html.html
================================================
_content_
paragraph
_content_
================================================
FILE: test/data/code_block.html
================================================
<?php
$message = 'Hello World!';
echo $message;
> not a quote
- not a list item
[not a reference]: http://foo.com
foo
bar
================================================
FILE: test/data/code_span.html
================================================
a code span
this is also a codespan trailing text
and look at this one!
single backtick in a code span: `
backtick-delimited string in a code span: `foo`
sth `` sth
================================================
FILE: test/data/compound_blockquote.html
================================================
header
paragraph
- li
paragraph
================================================
FILE: test/data/compound_emphasis.html
================================================
code code
codecodecode
================================================
FILE: test/data/compound_list.html
================================================
-
paragraph
paragraph
-
paragraph
quote
================================================
FILE: test/data/deeply_nested_list.html
================================================
- li
- li
- li
- li
- li
- li
- level 1
- level 2
- level 3
- level 4
- level 5
- a
- b
- c
- d
- e
- f
- g
- h
- i
================================================
FILE: test/data/em_strong.html
================================================
em strong
em strong strong
strong em strong
strong em strong strong
em strong
em strong strong
strong em strong
strong em strong strong
================================================
FILE: test/data/email.html
================================================
my email is me@example.com
html tags shouldn't start an email autolink first.last@example.com
================================================
FILE: test/data/emphasis.html
================================================
underscore, asterisk, one two, three four, a, b
strong and em and strong and em
line
line
line
this_is_not_an_emphasis
an empty emphasis __ ** is not an emphasis
*mixed *double and single asterisk** spans
================================================
FILE: test/data/escaping.html
================================================
escaped *emphasis*.
escaped \*emphasis\* in a code span
escaped \*emphasis\* in a code block
\ ` * _ { } [ ] ( ) > # + - . !
one_two one_two
one*two one*two
================================================
FILE: test/data/fenced_code_block.html
================================================
<?php
$message = 'fenced code block';
echo $message;
tilde
echo 'language identifier';
echo 'language identifier with non words';
<?php
echo "Hello World";
?>
<a href="http://auraphp.com" >Aura Project</a>
the following isn't quite enough to close
```
still a fenced code block
foo
bar
<?php
echo "Hello World";
================================================
FILE: test/data/horizontal_rule.html
================================================
================================================
FILE: test/data/html_comment.html
================================================
paragraph
paragraph
abc
- abcd
- bbbb
- cccc
================================================
FILE: test/data/html_entity.html
================================================
& © {
================================================
FILE: test/data/image_reference.html
================================================

![missing reference]
================================================
FILE: test/data/image_title.html
================================================


================================================
FILE: test/data/implicit_reference.html
================================================
an implicit reference link
an implicit reference link with an empty link definition
an implicit reference link followed by another
an explicit reference link with a title
================================================
FILE: test/data/inline_link.html
================================================
link with parentheses in URL
(link) in parentheses
================================================
FILE: test/data/inline_link_title.html
================================================
================================================
FILE: test/data/inline_title.html
================================================
single quotes and double quotes
================================================
FILE: test/data/lazy_blockquote.html
================================================
quote
the rest of it
another paragraph
the rest of it
================================================
FILE: test/data/lazy_list.html
================================================
- li
the rest of it
================================================
FILE: test/data/line_break.html
================================================
line
line
================================================
FILE: test/data/markup_consecutive_one.html
================================================
Markup
_No markdown_ without blank line for **strict** compliance with CommonMark.
Markdown
================================================
FILE: test/data/markup_consecutive_one_line.html
================================================
One markup on
two lines
_No markdown_
Markdown
================================================
FILE: test/data/markup_consecutive_one_stripped.html
================================================
Stripped markup
_No markdown_
Markdown
================================================
FILE: test/data/markup_consecutive_two.html
================================================
First markupand second markup on the same line.
_No markdown_
Markdown
================================================
FILE: test/data/markup_consecutive_two_lines.html
================================================
First markupand partial markup
on two lines.
_No markdown_
Markdown
================================================
FILE: test/data/markup_consecutive_two_stripped.html
================================================
Stripped markup
on two lines
_No markdown_
Markdown
================================================
FILE: test/data/multiline_list_paragraph.html
================================================
-
li
line
line
================================================
FILE: test/data/multiline_lists.html
================================================
-
One
First body copy
-
Two
Last body copy
================================================
FILE: test/data/nested_block-level_html.html
================================================
_parent_
_child_
_adopted child_
outside
================================================
FILE: test/data/ordered_list.html
================================================
- one
- two
repeating numbers:
- one
- two
large numbers:
- one
foo 1. the following should not start a list
100.
200.
================================================
FILE: test/data/paragraph_list.html
================================================
paragraph
- li
- li
paragraph
-
li
-
li
================================================
FILE: test/data/reference_title.html
================================================
double quotes and single quotes and parentheses
[invalid title]: http://example.com example title
================================================
FILE: test/data/self-closing_html.html
================================================
paragraph
paragraph
paragraph
paragraph
paragraph
paragraph
================================================
FILE: test/data/separated_nested_list.html
================================================
-
li
- li
- li
================================================
FILE: test/data/setext_header.html
================================================
h1
h2
single character
not a header
================================================
FILE: test/data/setext_header_spaces.html
================================================
trailing space
trailing space
leading and trailing space
leading and trailing space
1 leading space
1 leading space
3 leading spaces
3 leading spaces
too many leading spaces
==
too many leading spaces
--
================================================
FILE: test/data/simple_blockquote.html
================================================
quote
indented:
quote
no space after >:
quote
Info 1 text
Info 2 text
================================================
FILE: test/data/simple_table.html
================================================
header 1
header 2
cell 1.1
cell 1.2
cell 2.1
cell 2.2
header 1
header 2
cell 1.1
cell 1.2
cell 2.1
cell 2.2
header 1
cell 1.1
cell 2.1
header 1
cell 1.1
cell 2.1
Not a table, we haven't ended the paragraph:
header 1 | header 2
-------- | --------
cell 1.1 | cell 1.2
cell 2.1 | cell 2.2
================================================
FILE: test/data/span-level_html.html
================================================
an important link
broken
line
inline tag at the beginning
================================================
FILE: test/data/sparse_dense_list.html
================================================
-
li
-
li
-
li
================================================
FILE: test/data/sparse_html.html
================================================
line 1
line 2
line 3
line 4
================================================
FILE: test/data/sparse_list.html
================================================
-
li
-
li
-
li
- indented li
================================================
FILE: test/data/special_characters.html
================================================
AT&T has an ampersand in their name
this & that
4 < 5 and 6 > 5
http://example.com/autolink?a=1&b=2
================================================
FILE: test/data/strict_atx_heading.html
================================================
h1
h2
h3
h4
h5
h6
####### not a heading
#not a heading
closed h1
# of levels
# of levels #
================================================
FILE: test/data/strikethrough.html
================================================
strikethrough
here's one followed by another one
~~ this ~~ is not one neither is ~this~
escaped ~~this~~
================================================
FILE: test/data/strong_em.html
================================================
em strong em
strong em em
em strong em em
em strong em
strong em em
em strong em em
================================================
FILE: test/data/tab-indented_code_block.html
================================================
<?php
$message = 'Hello World!';
echo $message;
echo "following a blank line";
================================================
FILE: test/data/table_inline_markdown.html
================================================
header 1
header 2
cell 1.1
cell 1.2
| 2.1
| 2.2
\| 2.1
link
================================================
FILE: test/data/text_reference.html
================================================
one with a semantic name
[one][404] with no definition
multiline
one defined on 2 lines
one with a mixed case label and an upper case definition
one with the a label on the next line
================================================
FILE: test/data/unordered_list.html
================================================
- li
- li
mixed unordered markers:
- li
- li
- li
mixed ordered markers:
- starting at 1, list one
- number 2, list one
- starting at 3, list two
================================================
FILE: test/data/untidy_table.html
================================================
header 1
header 2
cell 1.1
cell 1.2
cell 2.1
cell 2.2
================================================
FILE: test/data/url_autolinking.html
================================================
an autolink http://example.com
inside of brackets [http://example.com], inside of braces {http://example.com}, inside of parentheses (http://example.com)
trailing slash http://example.com/ and http://example.com/path/
================================================
FILE: test/data/whitespace.html
================================================
code
================================================
FILE: test/data/xss_attribute_encoding.html
================================================




================================================
FILE: test/data/xss_bad_url.html
================================================
)
)
)
)




================================================
FILE: test/data/xss_text_encoding.html
================================================
<script>alert(1)</script>
<script>
alert(1)
</script>
<script>
alert(1)
</script>