Repository: erusev/parsedown Branch: master Commit: 4e433a8d5707 Files: 77 Total size: 88.7 KB Directory structure: gitextract_8dv4ybdv/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── unit-tests.yaml ├── .gitignore ├── LICENSE.txt ├── Parsedown.php ├── composer.json ├── phpunit.xml.dist ├── readme.md └── test/ ├── CommonMarkTestStrict.php ├── CommonMarkTestWeak.php ├── ParsedownTest.php ├── SampleExtensions.php ├── TestParsedown.php └── data/ ├── aesthetic_table.html ├── aligned_table.html ├── atx_heading.html ├── automatic_link.html ├── block-level_html.html ├── code_block.html ├── code_span.html ├── compound_blockquote.html ├── compound_emphasis.html ├── compound_list.html ├── deeply_nested_list.html ├── em_strong.html ├── email.html ├── emphasis.html ├── escaping.html ├── fenced_code_block.html ├── horizontal_rule.html ├── html_comment.html ├── html_entity.html ├── image_reference.html ├── image_title.html ├── implicit_reference.html ├── inline_link.html ├── inline_link_title.html ├── inline_title.html ├── lazy_blockquote.html ├── lazy_list.html ├── line_break.html ├── markup_consecutive_one.html ├── markup_consecutive_one_line.html ├── markup_consecutive_one_stripped.html ├── markup_consecutive_two.html ├── markup_consecutive_two_lines.html ├── markup_consecutive_two_stripped.html ├── multiline_list_paragraph.html ├── multiline_lists.html ├── nested_block-level_html.html ├── ordered_list.html ├── paragraph_list.html ├── reference_title.html ├── self-closing_html.html ├── separated_nested_list.html ├── setext_header.html ├── setext_header_spaces.html ├── simple_blockquote.html ├── simple_table.html ├── span-level_html.html ├── sparse_dense_list.html ├── sparse_html.html ├── sparse_list.html ├── special_characters.html ├── strict_atx_heading.html ├── strikethrough.html ├── strong_em.html ├── tab-indented_code_block.html ├── table_inline_markdown.html ├── text_reference.html ├── unordered_list.html ├── untidy_table.html ├── url_autolinking.html ├── whitespace.html ├── xss_attribute_encoding.html ├── xss_bad_url.html └── xss_text_encoding.html ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ # Ignore all tests for archive /test export-ignore /.gitattributes export-ignore /.gitignore export-ignore /.travis.yml export-ignore /phpunit.xml.dist export-ignore ================================================ FILE: .github/workflows/unit-tests.yaml ================================================ on: - push - pull_request jobs: phpunit: runs-on: ubuntu-latest strategy: matrix: php: - '7.2' - '7.3' - '7.4' - '8.0' - '8.1' - '8.2' - '8.3' - '8.4' steps: - name: Checkout the source code uses: actions/checkout@v4 - name: Set up PHP uses: shivammathur/setup-php@v2 with: php-version: '${{ matrix.php }}' - name: Install dependencies run: composer install - name: Run tests run: | vendor/bin/phpunit vendor/bin/phpunit test/CommonMarkTestWeak.php || true ================================================ FILE: .gitignore ================================================ *.md !readme.md composer.lock vendor/ .phpunit.result.cache ================================================ FILE: LICENSE.txt ================================================ The MIT License (MIT) Copyright (c) 2013-2018 Emanuil Rusev, erusev.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Parsedown.php ================================================ textElements($text); # convert to markup $markup = $this->elements($Elements); # trim line breaks $markup = trim($markup, "\n"); return $markup; } protected function textElements($text) { # make sure no definitions are set $this->DefinitionData = array(); # standardize line breaks $text = str_replace(array("\r\n", "\r"), "\n", $text); # remove surrounding line breaks $text = trim($text, "\n"); # split text into lines $lines = explode("\n", $text); # iterate through lines to identify blocks return $this->linesElements($lines); } # # Setters # function setBreaksEnabled($breaksEnabled) { $this->breaksEnabled = $breaksEnabled; return $this; } protected $breaksEnabled; function setMarkupEscaped($markupEscaped) { $this->markupEscaped = $markupEscaped; return $this; } protected $markupEscaped; function setUrlsLinked($urlsLinked) { $this->urlsLinked = $urlsLinked; return $this; } protected $urlsLinked = true; function setSafeMode($safeMode) { $this->safeMode = (bool) $safeMode; return $this; } protected $safeMode; function setStrictMode($strictMode) { $this->strictMode = (bool) $strictMode; return $this; } protected $strictMode; protected $safeLinksWhitelist = array( 'http://', 'https://', 'ftp://', 'ftps://', 'mailto:', 'tel:', 'data:image/png;base64,', 'data:image/gif;base64,', 'data:image/jpeg;base64,', 'irc:', 'ircs:', 'git:', 'ssh:', 'news:', 'steam:', ); # # Lines # protected $BlockTypes = array( '#' => array('Header'), '*' => array('Rule', 'List'), '+' => array('List'), '-' => array('SetextHeader', 'Table', 'Rule', 'List'), '0' => array('List'), '1' => array('List'), '2' => array('List'), '3' => array('List'), '4' => array('List'), '5' => array('List'), '6' => array('List'), '7' => array('List'), '8' => array('List'), '9' => array('List'), ':' => array('Table'), '<' => array('Comment', 'Markup'), '=' => array('SetextHeader'), '>' => array('Quote'), '[' => array('Reference'), '_' => array('Rule'), '`' => array('FencedCode'), '|' => array('Table'), '~' => array('FencedCode'), ); # ~ protected $unmarkedBlockTypes = array( 'Code', ); # # Blocks # protected function lines(array $lines) { return $this->elements($this->linesElements($lines)); } protected function linesElements(array $lines) { $Elements = array(); $CurrentBlock = null; foreach ($lines as $line) { if (chop($line) === '') { if (isset($CurrentBlock)) { $CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted']) ? $CurrentBlock['interrupted'] + 1 : 1 ); } continue; } while (($beforeTab = strstr($line, "\t", true)) !== false) { $shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4; $line = $beforeTab . str_repeat(' ', $shortage) . substr($line, strlen($beforeTab) + 1) ; } $indent = strspn($line, ' '); $text = $indent > 0 ? substr($line, $indent) : $line; # ~ $Line = array('body' => $line, 'indent' => $indent, 'text' => $text); # ~ if (isset($CurrentBlock['continuable'])) { $methodName = 'block' . $CurrentBlock['type'] . 'Continue'; $Block = $this->$methodName($Line, $CurrentBlock); if (isset($Block)) { $CurrentBlock = $Block; continue; } else { if ($this->isBlockCompletable($CurrentBlock['type'])) { $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; $CurrentBlock = $this->$methodName($CurrentBlock); } } } # ~ $marker = $text[0]; # ~ $blockTypes = $this->unmarkedBlockTypes; if (isset($this->BlockTypes[$marker])) { foreach ($this->BlockTypes[$marker] as $blockType) { $blockTypes []= $blockType; } } # # ~ foreach ($blockTypes as $blockType) { $Block = $this->{"block$blockType"}($Line, $CurrentBlock); if (isset($Block)) { $Block['type'] = $blockType; if ( ! isset($Block['identified'])) { if (isset($CurrentBlock)) { $Elements[] = $this->extractElement($CurrentBlock); } $Block['identified'] = true; } if ($this->isBlockContinuable($blockType)) { $Block['continuable'] = true; } $CurrentBlock = $Block; continue 2; } } # ~ if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph') { $Block = $this->paragraphContinue($Line, $CurrentBlock); } if (isset($Block)) { $CurrentBlock = $Block; } else { if (isset($CurrentBlock)) { $Elements[] = $this->extractElement($CurrentBlock); } $CurrentBlock = $this->paragraph($Line); $CurrentBlock['identified'] = true; } } # ~ if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) { $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; $CurrentBlock = $this->$methodName($CurrentBlock); } # ~ if (isset($CurrentBlock)) { $Elements[] = $this->extractElement($CurrentBlock); } # ~ return $Elements; } protected function extractElement(array $Component) { if ( ! isset($Component['element'])) { if (isset($Component['markup'])) { $Component['element'] = array('rawHtml' => $Component['markup']); } elseif (isset($Component['hidden'])) { $Component['element'] = array(); } } return $Component['element']; } protected function isBlockContinuable($Type) { return method_exists($this, 'block' . $Type . 'Continue'); } protected function isBlockCompletable($Type) { return method_exists($this, 'block' . $Type . 'Complete'); } # # Code protected function blockCode($Line, $Block = null) { if (isset($Block) and $Block['type'] === 'Paragraph' and ! isset($Block['interrupted'])) { return; } if ($Line['indent'] >= 4) { $text = substr($Line['body'], 4); $Block = array( 'element' => array( 'name' => 'pre', 'element' => array( 'name' => 'code', 'text' => $text, ), ), ); return $Block; } } protected function blockCodeContinue($Line, $Block) { if ($Line['indent'] >= 4) { if (isset($Block['interrupted'])) { $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } $Block['element']['element']['text'] .= "\n"; $text = substr($Line['body'], 4); $Block['element']['element']['text'] .= $text; return $Block; } } protected function blockCodeComplete($Block) { return $Block; } # # Comment protected function blockComment($Line) { if ($this->markupEscaped or $this->safeMode) { return; } if (strpos($Line['text'], '') !== false) { $Block['closed'] = true; } return $Block; } } protected function blockCommentContinue($Line, array $Block) { if (isset($Block['closed'])) { return; } $Block['element']['rawHtml'] .= "\n" . $Line['body']; if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } return $Block; } # # Fenced Code protected function blockFencedCode($Line) { $marker = $Line['text'][0]; $openerLength = strspn($Line['text'], $marker); if ($openerLength < 3) { return; } $infostring = trim(substr($Line['text'], $openerLength), "\t "); if (strpos($infostring, '`') !== false) { return; } $Element = array( 'name' => 'code', 'text' => '', ); if ($infostring !== '') { /** * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes * Every HTML element may have a class attribute specified. * The attribute, if specified, must have a value that is a set * of space-separated tokens representing the various classes * that the element belongs to. * [...] * The space characters, for the purposes of this specification, * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and * U+000D CARRIAGE RETURN (CR). */ $language = substr($infostring, 0, strcspn($infostring, " \t\n\f\r")); $Element['attributes'] = array('class' => "language-$language"); } $Block = array( 'char' => $marker, 'openerLength' => $openerLength, 'element' => array( 'name' => 'pre', 'element' => $Element, ), ); return $Block; } protected function blockFencedCodeContinue($Line, $Block) { if (isset($Block['complete'])) { return; } if (isset($Block['interrupted'])) { $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength'] and chop(substr($Line['text'], $len), ' ') === '' ) { $Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1); $Block['complete'] = true; return $Block; } $Block['element']['element']['text'] .= "\n" . $Line['body']; return $Block; } protected function blockFencedCodeComplete($Block) { return $Block; } # # Header protected function blockHeader($Line) { $level = strspn($Line['text'], '#'); if ($level > 6) { return; } $text = trim($Line['text'], '#'); if ($this->strictMode and isset($text[0]) and $text[0] !== ' ') { return; } $text = trim($text, ' '); $Block = array( 'element' => array( 'name' => 'h' . $level, 'handler' => array( 'function' => 'lineElements', 'argument' => $text, 'destination' => 'elements', ) ), ); return $Block; } # # List protected function blockList($Line, ?array $CurrentBlock = null) { list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]'); if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches)) { $contentIndent = strlen($matches[2]); if ($contentIndent >= 5) { $contentIndent -= 1; $matches[1] = substr($matches[1], 0, -$contentIndent); $matches[3] = str_repeat(' ', $contentIndent) . $matches[3]; } elseif ($contentIndent === 0) { $matches[1] .= ' '; } $markerWithoutWhitespace = strstr($matches[1], ' ', true); $Block = array( 'indent' => $Line['indent'], 'pattern' => $pattern, 'data' => array( 'type' => $name, 'marker' => $matches[1], 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)), ), 'element' => array( 'name' => $name, 'elements' => array(), ), ); $Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/'); if ($name === 'ol') { $listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0'; if ($listStart !== '1') { if ( isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph' and ! isset($CurrentBlock['interrupted']) ) { return; } $Block['element']['attributes'] = array('start' => $listStart); } } $Block['li'] = array( 'name' => 'li', 'handler' => array( 'function' => 'li', 'argument' => !empty($matches[3]) ? array($matches[3]) : array(), 'destination' => 'elements' ) ); $Block['element']['elements'] []= & $Block['li']; return $Block; } } protected function blockListContinue($Line, array $Block) { if (isset($Block['interrupted']) and empty($Block['li']['handler']['argument'])) { return null; } $requiredIndent = ($Block['indent'] + strlen($Block['data']['marker'])); if ($Line['indent'] < $requiredIndent and ( ( $Block['data']['type'] === 'ol' and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) ) or ( $Block['data']['type'] === 'ul' and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) ) ) ) { if (isset($Block['interrupted'])) { $Block['li']['handler']['argument'] []= ''; $Block['loose'] = true; unset($Block['interrupted']); } unset($Block['li']); $text = isset($matches[1]) ? $matches[1] : ''; $Block['indent'] = $Line['indent']; $Block['li'] = array( 'name' => 'li', 'handler' => array( 'function' => 'li', 'argument' => array($text), 'destination' => 'elements' ) ); $Block['element']['elements'] []= & $Block['li']; return $Block; } elseif ($Line['indent'] < $requiredIndent and $this->blockList($Line)) { return null; } if ($Line['text'][0] === '[' and $this->blockReference($Line)) { return $Block; } if ($Line['indent'] >= $requiredIndent) { if (isset($Block['interrupted'])) { $Block['li']['handler']['argument'] []= ''; $Block['loose'] = true; unset($Block['interrupted']); } $text = substr($Line['body'], $requiredIndent); $Block['li']['handler']['argument'] []= $text; return $Block; } if ( ! isset($Block['interrupted'])) { $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']); $Block['li']['handler']['argument'] []= $text; return $Block; } } protected function blockListComplete(array $Block) { if (isset($Block['loose'])) { foreach ($Block['element']['elements'] as &$li) { if (end($li['handler']['argument']) !== '') { $li['handler']['argument'] []= ''; } } } return $Block; } # # Quote protected function blockQuote($Line) { if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block = array( 'element' => array( 'name' => 'blockquote', 'handler' => array( 'function' => 'linesElements', 'argument' => (array) $matches[1], 'destination' => 'elements', ) ), ); return $Block; } } protected function blockQuoteContinue($Line, array $Block) { if (isset($Block['interrupted'])) { return; } if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block['element']['handler']['argument'] []= $matches[1]; return $Block; } if ( ! isset($Block['interrupted'])) { $Block['element']['handler']['argument'] []= $Line['text']; return $Block; } } # # Rule protected function blockRule($Line) { $marker = $Line['text'][0]; if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '') { $Block = array( 'element' => array( 'name' => 'hr', ), ); return $Block; } } # # Setext protected function blockSetextHeader($Line, ?array $Block = null) { if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '') { $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; return $Block; } } # # Markup protected function blockMarkup($Line) { if ($this->markupEscaped or $this->safeMode) { return; } if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches)) { $element = strtolower($matches[1]); if (in_array($element, $this->textLevelElements)) { return; } $Block = array( 'name' => $matches[1], 'element' => array( 'rawHtml' => $Line['text'], 'autobreak' => true, ), ); return $Block; } } protected function blockMarkupContinue($Line, array $Block) { if (isset($Block['closed']) or isset($Block['interrupted'])) { return; } $Block['element']['rawHtml'] .= "\n" . $Line['body']; return $Block; } # # Reference protected function blockReference($Line) { if (strpos($Line['text'], ']') !== false and preg_match('/^\[(.+?)\]:[ ]*+?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches) ) { $id = strtolower($matches[1]); $Data = array( 'url' => $matches[2], 'title' => isset($matches[3]) ? $matches[3] : null, ); $this->DefinitionData['Reference'][$id] = $Data; $Block = array( 'element' => array(), ); return $Block; } } # # Table protected function blockTable($Line, ?array $Block = null) { if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } if ( strpos($Block['element']['handler']['argument'], '|') === false and strpos($Line['text'], '|') === false and strpos($Line['text'], ':') === false or strpos($Block['element']['handler']['argument'], "\n") !== false ) { return; } if (chop($Line['text'], ' -:|') !== '') { return; } $alignments = array(); $divider = $Line['text']; $divider = trim($divider); $divider = trim($divider, '|'); $dividerCells = explode('|', $divider); foreach ($dividerCells as $dividerCell) { $dividerCell = trim($dividerCell); if ($dividerCell === '') { return; } $alignment = null; if ($dividerCell[0] === ':') { $alignment = 'left'; } if (substr($dividerCell, - 1) === ':') { $alignment = $alignment === 'left' ? 'center' : 'right'; } $alignments []= $alignment; } # ~ $HeaderElements = array(); $header = $Block['element']['handler']['argument']; $header = trim($header); $header = trim($header, '|'); $headerCells = explode('|', $header); if (count($headerCells) !== count($alignments)) { return; } foreach ($headerCells as $index => $headerCell) { $headerCell = trim($headerCell); $HeaderElement = array( 'name' => 'th', 'handler' => array( 'function' => 'lineElements', 'argument' => $headerCell, 'destination' => 'elements', ) ); if (isset($alignments[$index])) { $alignment = $alignments[$index]; $HeaderElement['attributes'] = array( 'style' => "text-align: $alignment;", ); } $HeaderElements []= $HeaderElement; } # ~ $Block = array( 'alignments' => $alignments, 'identified' => true, 'element' => array( 'name' => 'table', 'elements' => array(), ), ); $Block['element']['elements'] []= array( 'name' => 'thead', ); $Block['element']['elements'] []= array( 'name' => 'tbody', 'elements' => array(), ); $Block['element']['elements'][0]['elements'] []= array( 'name' => 'tr', 'elements' => $HeaderElements, ); return $Block; } protected function blockTableContinue($Line, array $Block) { if (isset($Block['interrupted'])) { return; } if (count($Block['alignments']) === 1 or $Line['text'][0] === '|' or strpos($Line['text'], '|')) { $Elements = array(); $row = $Line['text']; $row = trim($row); $row = trim($row, '|'); preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); $cells = array_slice($matches[0], 0, count($Block['alignments'])); foreach ($cells as $index => $cell) { $cell = trim($cell); $Element = array( 'name' => 'td', 'handler' => array( 'function' => 'lineElements', 'argument' => $cell, 'destination' => 'elements', ) ); if (isset($Block['alignments'][$index])) { $Element['attributes'] = array( 'style' => 'text-align: ' . $Block['alignments'][$index] . ';', ); } $Elements []= $Element; } $Element = array( 'name' => 'tr', 'elements' => $Elements, ); $Block['element']['elements'][1]['elements'] []= $Element; return $Block; } } # # ~ # protected function paragraph($Line) { return array( 'type' => 'Paragraph', 'element' => array( 'name' => 'p', 'handler' => array( 'function' => 'lineElements', 'argument' => $Line['text'], 'destination' => 'elements', ), ), ); } protected function paragraphContinue($Line, array $Block) { if (isset($Block['interrupted'])) { return; } $Block['element']['handler']['argument'] .= "\n".$Line['text']; return $Block; } # # Inline Elements # protected $InlineTypes = array( '!' => array('Image'), '&' => array('SpecialCharacter'), '*' => array('Emphasis'), ':' => array('Url'), '<' => array('UrlTag', 'EmailTag', 'Markup'), '[' => array('Link'), '_' => array('Emphasis'), '`' => array('Code'), '~' => array('Strikethrough'), '\\' => array('EscapeSequence'), ); # ~ protected $inlineMarkerList = '!*_&[:<`~\\'; # # ~ # public function line($text, $nonNestables = array()) { return $this->elements($this->lineElements($text, $nonNestables)); } protected function lineElements($text, $nonNestables = array()) { # standardize line breaks $text = str_replace(array("\r\n", "\r"), "\n", $text); $Elements = array(); $nonNestables = (empty($nonNestables) ? array() : array_combine($nonNestables, $nonNestables) ); # $excerpt is based on the first occurrence of a marker while ($excerpt = strpbrk($text, $this->inlineMarkerList)) { $marker = $excerpt[0]; $markerPosition = strlen($text) - strlen($excerpt); $Excerpt = array('text' => $excerpt, 'context' => $text); foreach ($this->InlineTypes[$marker] as $inlineType) { # check to see if the current inline type is nestable in the current context if (isset($nonNestables[$inlineType])) { continue; } $Inline = $this->{"inline$inlineType"}($Excerpt); if ( ! isset($Inline)) { continue; } # makes sure that the inline belongs to "our" marker if (isset($Inline['position']) and $Inline['position'] > $markerPosition) { continue; } # sets a default inline position if ( ! isset($Inline['position'])) { $Inline['position'] = $markerPosition; } # cause the new element to 'inherit' our non nestables $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) ? array_merge($Inline['element']['nonNestables'], $nonNestables) : $nonNestables ; # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); # compile the unmarked text $InlineText = $this->inlineText($unmarkedText); $Elements[] = $InlineText['element']; # compile the inline $Elements[] = $this->extractElement($Inline); # remove the examined text $text = substr($text, $Inline['position'] + $Inline['extent']); continue 2; } # the marker does not belong to an inline $unmarkedText = substr($text, 0, $markerPosition + 1); $InlineText = $this->inlineText($unmarkedText); $Elements[] = $InlineText['element']; $text = substr($text, $markerPosition + 1); } $InlineText = $this->inlineText($text); $Elements[] = $InlineText['element']; foreach ($Elements as &$Element) { if ( ! isset($Element['autobreak'])) { $Element['autobreak'] = false; } } return $Elements; } # # ~ # protected function inlineText($text) { $Inline = array( 'extent' => strlen($text), 'element' => array(), ); $Inline['element']['elements'] = self::pregReplaceElements( $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', array( array('name' => 'br'), array('text' => "\n"), ), $text ); return $Inline; } protected function inlineCode($Excerpt) { $marker = $Excerpt['text'][0]; if (preg_match('/^(['.$marker.']++)[ ]*+(.+?)[ ]*+(? strlen($matches[0]), 'element' => array( 'name' => 'code', 'text' => $text, ), ); } } protected function inlineEmailTag($Excerpt) { $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*'; if (strpos($Excerpt['text'], '>') !== false and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches) ){ $url = $matches[1]; if ( ! isset($matches[2])) { $url = "mailto:$url"; } return array( 'extent' => strlen($matches[0]), 'element' => array( 'name' => 'a', 'text' => $matches[1], 'attributes' => array( 'href' => $url, ), ), ); } } protected function inlineEmphasis($Excerpt) { if ( ! isset($Excerpt['text'][1])) { return; } $marker = $Excerpt['text'][0]; if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) { $emphasis = 'strong'; } elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) { $emphasis = 'em'; } else { return; } return array( 'extent' => strlen($matches[0]), 'element' => array( 'name' => $emphasis, 'handler' => array( 'function' => 'lineElements', 'argument' => $matches[1], 'destination' => 'elements', ) ), ); } protected function inlineEscapeSequence($Excerpt) { if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) { return array( 'element' => array('rawHtml' => $Excerpt['text'][1]), 'extent' => 2, ); } } protected function inlineImage($Excerpt) { if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') { return; } $Excerpt['text']= substr($Excerpt['text'], 1); $Link = $this->inlineLink($Excerpt); if ($Link === null) { return; } $Inline = array( 'extent' => $Link['extent'] + 1, 'element' => array( 'name' => 'img', 'attributes' => array( 'src' => $Link['element']['attributes']['href'], 'alt' => $Link['element']['handler']['argument'], ), 'autobreak' => true, ), ); $Inline['element']['attributes'] += $Link['element']['attributes']; unset($Inline['element']['attributes']['href']); return $Inline; } protected function inlineLink($Excerpt) { $Element = array( 'name' => 'a', 'handler' => array( 'function' => 'lineElements', 'argument' => null, 'destination' => 'elements', ), 'nonNestables' => array('Url', 'Link'), 'attributes' => array( 'href' => null, 'title' => null, ), ); $extent = 0; $remainder = $Excerpt['text']; if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { $Element['handler']['argument'] = $matches[1]; $extent += strlen($matches[0]); $remainder = substr($remainder, $extent); } else { return; } if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) { $Element['attributes']['href'] = $matches[1]; if (isset($matches[2])) { $Element['attributes']['title'] = substr($matches[2], 1, - 1); } $extent += strlen($matches[0]); } else { if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) { $definition = strlen($matches[1]) ? $matches[1] : $Element['handler']['argument']; $definition = strtolower($definition); $extent += strlen($matches[0]); } else { $definition = strtolower($Element['handler']['argument']); } if ( ! isset($this->DefinitionData['Reference'][$definition])) { return; } $Definition = $this->DefinitionData['Reference'][$definition]; $Element['attributes']['href'] = $Definition['url']; $Element['attributes']['title'] = $Definition['title']; } return array( 'extent' => $extent, 'element' => $Element, ); } protected function inlineMarkup($Excerpt) { if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches)) { return array( 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) { return array( 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches)) { return array( 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } } protected function inlineSpecialCharacter($Excerpt) { if (substr($Excerpt['text'], 1, 1) !== ' ' and strpos($Excerpt['text'], ';') !== false and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches) ) { return array( 'element' => array('rawHtml' => '&' . $matches[1] . ';'), 'extent' => strlen($matches[0]), ); } } protected function inlineStrikethrough($Excerpt) { if ( ! isset($Excerpt['text'][1])) { return; } if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) { return array( 'extent' => strlen($matches[0]), 'element' => array( 'name' => 'del', 'handler' => array( 'function' => 'lineElements', 'argument' => $matches[1], 'destination' => 'elements', ) ), ); } } protected function inlineUrl($Excerpt) { if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/') { return; } if (strpos($Excerpt['context'], 'http') !== false and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE) ) { $url = $matches[0][0]; $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', 'text' => $url, 'attributes' => array( 'href' => $url, ), ), ); return $Inline; } } protected function inlineUrlTag($Excerpt) { if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches)) { $url = $matches[1]; return array( 'extent' => strlen($matches[0]), 'element' => array( 'name' => 'a', 'text' => $url, 'attributes' => array( 'href' => $url, ), ), ); } } # ~ protected function unmarkedText($text) { $Inline = $this->inlineText($text); return $this->element($Inline['element']); } # # Handlers # protected function handle(array $Element) { if (isset($Element['handler'])) { if (!isset($Element['nonNestables'])) { $Element['nonNestables'] = array(); } if (is_string($Element['handler'])) { $function = $Element['handler']; $argument = $Element['text']; unset($Element['text']); $destination = 'rawHtml'; } else { $function = $Element['handler']['function']; $argument = $Element['handler']['argument']; $destination = $Element['handler']['destination']; } $Element[$destination] = $this->{$function}($argument, $Element['nonNestables']); if ($destination === 'handler') { $Element = $this->handle($Element); } unset($Element['handler']); } return $Element; } protected function handleElementRecursive(array $Element) { return $this->elementApplyRecursive(array($this, 'handle'), $Element); } protected function handleElementsRecursive(array $Elements) { return $this->elementsApplyRecursive(array($this, 'handle'), $Elements); } protected function elementApplyRecursive($closure, array $Element) { $Element = call_user_func($closure, $Element); if (isset($Element['elements'])) { $Element['elements'] = $this->elementsApplyRecursive($closure, $Element['elements']); } elseif (isset($Element['element'])) { $Element['element'] = $this->elementApplyRecursive($closure, $Element['element']); } return $Element; } protected function elementApplyRecursiveDepthFirst($closure, array $Element) { if (isset($Element['elements'])) { $Element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['elements']); } elseif (isset($Element['element'])) { $Element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['element']); } $Element = call_user_func($closure, $Element); return $Element; } protected function elementsApplyRecursive($closure, array $Elements) { foreach ($Elements as &$Element) { $Element = $this->elementApplyRecursive($closure, $Element); } return $Elements; } protected function elementsApplyRecursiveDepthFirst($closure, array $Elements) { foreach ($Elements as &$Element) { $Element = $this->elementApplyRecursiveDepthFirst($closure, $Element); } return $Elements; } protected function element(array $Element) { if ($this->safeMode) { $Element = $this->sanitiseElement($Element); } # identity map if element has no handler $Element = $this->handle($Element); $hasName = isset($Element['name']); $markup = ''; if ($hasName) { $markup .= '<' . $Element['name']; if (isset($Element['attributes'])) { foreach ($Element['attributes'] as $name => $value) { if ($value === null) { continue; } $markup .= " $name=\"".self::escape($value).'"'; } } } $permitRawHtml = false; if (isset($Element['text'])) { $text = $Element['text']; } // very strongly consider an alternative if you're writing an // extension elseif (isset($Element['rawHtml'])) { $text = $Element['rawHtml']; $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; } $hasContent = isset($text) || isset($Element['element']) || isset($Element['elements']); if ($hasContent) { $markup .= $hasName ? '>' : ''; if (isset($Element['elements'])) { $markup .= $this->elements($Element['elements']); } elseif (isset($Element['element'])) { $markup .= $this->element($Element['element']); } else { if (!$permitRawHtml) { $markup .= self::escape($text, true); } else { $markup .= $text; } } $markup .= $hasName ? '' : ''; } elseif ($hasName) { $markup .= ' />'; } return $markup; } protected function elements(array $Elements) { $markup = ''; $autoBreak = true; foreach ($Elements as $Element) { if (empty($Element)) { continue; } $autoBreakNext = (isset($Element['autobreak']) ? $Element['autobreak'] : isset($Element['name']) ); // (autobreak === false) covers both sides of an element $autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext; $markup .= ($autoBreak ? "\n" : '') . $this->element($Element); $autoBreak = $autoBreakNext; } $markup .= $autoBreak ? "\n" : ''; return $markup; } # ~ protected function li($lines) { $Elements = $this->linesElements($lines); if ( ! in_array('', $lines) and isset($Elements[0]) and isset($Elements[0]['name']) and $Elements[0]['name'] === 'p' ) { unset($Elements[0]['name']); } return $Elements; } # # AST Convenience # /** * Replace occurrences $regexp with $Elements in $text. Return an array of * elements representing the replacement. */ protected static function pregReplaceElements($regexp, $Elements, $text) { $newElements = array(); while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE)) { $offset = $matches[0][1]; $before = substr($text, 0, $offset); $after = substr($text, $offset + strlen($matches[0][0])); $newElements[] = array('text' => $before); foreach ($Elements as $Element) { $newElements[] = $Element; } $text = $after; } $newElements[] = array('text' => $text); return $newElements; } # # Deprecated Methods # /** * @deprecated use text() instead */ function parse($text) { $markup = $this->text($text); return $markup; } protected function sanitiseElement(array $Element) { static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; static $safeUrlNameToAtt = array( 'a' => 'href', 'img' => 'src', ); if ( ! isset($Element['name'])) { unset($Element['attributes']); return $Element; } if (isset($safeUrlNameToAtt[$Element['name']])) { $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); } if ( ! empty($Element['attributes'])) { foreach ($Element['attributes'] as $att => $val) { # filter out badly parsed attribute if ( ! preg_match($goodAttribute, $att)) { unset($Element['attributes'][$att]); } # dump onevent attribute elseif (self::striAtStart($att, 'on')) { unset($Element['attributes'][$att]); } } } return $Element; } protected function filterUnsafeUrlInAttribute(array $Element, $attribute) { foreach ($this->safeLinksWhitelist as $scheme) { if (self::striAtStart($Element['attributes'][$attribute], $scheme)) { return $Element; } } $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); return $Element; } # # Static Methods # protected static function escape($text, $allowQuotes = false) { return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); } protected static function striAtStart($string, $needle) { $len = strlen($needle); if ($len > strlen($string)) { return false; } else { return strtolower(substr($string, 0, $len)) === strtolower($needle); } } static function instance($name = 'default') { if (isset(self::$instances[$name])) { return self::$instances[$name]; } $instance = new static(); self::$instances[$name] = $instance; return $instance; } private static $instances = array(); # # Fields # protected $DefinitionData; # # Read-Only protected $specialCharacters = array( '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', '~' ); protected $StrongRegex = array( '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s', '_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us', ); protected $EmRegex = array( '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', ); protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+'; protected $voidElements = array( 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', ); protected $textLevelElements = array( 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'i', 'rp', 'del', 'code', 'strike', 'marquee', 'q', 'rt', 'ins', 'font', 'strong', 's', 'tt', 'kbd', 'mark', 'u', 'xm', 'sub', 'nobr', 'sup', 'ruby', 'var', 'span', 'wbr', 'time', ); } ================================================ FILE: composer.json ================================================ { "name": "erusev/parsedown", "description": "Parser for Markdown.", "keywords": ["markdown", "parser"], "homepage": "http://parsedown.org", "type": "library", "license": "MIT", "authors": [ { "name": "Emanuil Rusev", "email": "hello@erusev.com", "homepage": "http://erusev.com" } ], "require": { "php": ">=7.2", "ext-mbstring": "*" }, "require-dev": { "phpunit/phpunit": "^8.5.52|^9.6.33" }, "autoload": { "psr-0": { "Parsedown": "" } }, "autoload-dev": { "psr-0": { "TestParsedown": "test/", "ParsedownTest": "test/", "CommonMarkTest": "test/", "CommonMarkTestWeak": "test/" } } } ================================================ FILE: phpunit.xml.dist ================================================ test/ParsedownTest.php ================================================ FILE: readme.md ================================================ # Parsedown [![Total Downloads](https://poser.pugx.org/erusev/parsedown/d/total.svg)](https://packagist.org/packages/erusev/parsedown) [![Version](https://poser.pugx.org/erusev/parsedown/v/stable.svg)](https://packagist.org/packages/erusev/parsedown) [![License](https://poser.pugx.org/erusev/parsedown/license.svg)](https://packagist.org/packages/erusev/parsedown) Better Markdown Parser in PHP — demo ## Features - One file - No dependencies - [Super fast](http://parsedown.org/speed) - Extensible - [GitHub flavored](https://github.github.com/gfm) - [Tested](http://parsedown.org/tests/) in PHP 7.1+ - [Markdown Extra extension](https://github.com/erusev/parsedown-extra) ## Installation Install the [composer package]: ```sh composer require erusev/parsedown ``` Or download the [latest release] and include `Parsedown.php` [composer package]: https://packagist.org/packages/erusev/parsedown "The Parsedown package on packagist.org" [latest release]: https://github.com/erusev/parsedown/releases/latest "The latest release of Parsedown" ## Example ```php $Parsedown = new Parsedown(); echo $Parsedown->text('Hello _Parsedown_!'); # prints:

Hello Parsedown!

``` You can also parse inline markdown only: ```php echo $Parsedown->line('Hello _Parsedown_!'); # prints: Hello Parsedown! ``` More examples in [the wiki](https://github.com/erusev/parsedown/wiki/) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). ## Security Parsedown is capable of escaping user-input within the HTML that it generates. Additionally Parsedown will apply sanitisation to additional scripting vectors (such as scripting link destinations) that are introduced by the markdown syntax itself. To tell Parsedown that it is processing untrusted user-input, use the following: ```php $Parsedown->setSafeMode(true); ``` If instead, you wish to allow HTML within untrusted user-input, but still want output to be free from XSS it is recommended that you make use of a HTML sanitiser that allows HTML tags to be whitelisted, like [HTML Purifier](http://htmlpurifier.org/). In both cases you should strongly consider employing defence-in-depth measures, like [deploying a Content-Security-Policy](https://scotthelme.co.uk/content-security-policy-an-introduction/) (a browser security feature) so that your page is likely to be safe even if an attacker finds a vulnerability in one of the first lines of defence above. Safe mode does not necessarily yield safe results when using extensions to Parsedown. Extensions should be evaluated on their own to determine their specific safety against XSS. ## Escaping HTML > WARNING: This method is not safe from XSS! If you wish to escape HTML in trusted input, you can use the following: ```php $Parsedown->setMarkupEscaped(true); ``` Beware that this still allows users to insert unsafe scripting vectors, ex: `[xss](javascript:alert%281%29)`. ## Questions **How does Parsedown work?** It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line starts with a `-` then perhaps it belongs to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. **Is it compliant with CommonMark?** It passes most of the CommonMark tests. Most of the tests that don't pass deal with cases that are quite uncommon. Still, as CommonMark matures, compliance should improve. **Who uses it?** [Laravel Framework](https://laravel.com/), [Bolt CMS](http://bolt.cm/), [Grav CMS](http://getgrav.org/), [Herbie CMS](http://www.getherbie.org/), [Kirby CMS](http://getkirby.com/), [October CMS](http://octobercms.com/), [Pico CMS](http://picocms.org), [Statamic CMS](http://www.statamic.com/), [phpDocumentor](http://www.phpdoc.org/), [RaspberryPi.org](http://www.raspberrypi.org/), [Symfony Demo](https://github.com/symfony/demo) and [more](https://packagist.org/packages/erusev/parsedown/dependents). **How can I help?** Use it, star it, share it and if you feel generous, [sponsor me](https://github.com/sponsors/erusev). **What else should I know?** I also make [Nota](https://nota.md/) — a notes app designed for local Markdown files. ================================================ FILE: test/CommonMarkTestStrict.php ================================================ parsedown = new TestParsedown(); $this->parsedown->setUrlsLinked(false); } /** * @dataProvider data * @param $id * @param $section * @param $markdown * @param $expectedHtml */ public function testExample($id, $section, $markdown, $expectedHtml) { $actualHtml = $this->parsedown->text($markdown); $this->assertEquals($expectedHtml, $actualHtml); } /** * @return array */ public function data() { $spec = file_get_contents(self::SPEC_URL); if ($spec === false) { $this->fail('Unable to load CommonMark spec from ' . self::SPEC_URL); } $spec = str_replace("\r\n", "\n", $spec); $spec = strstr($spec, '', true); $matches = array(); preg_match_all('/^`{32} example\n((?s).*?)\n\.\n(?:|((?s).*?)\n)`{32}$|^#{1,6} *(.*?)$/m', $spec, $matches, PREG_SET_ORDER); $data = array(); $currentId = 0; $currentSection = ''; foreach ($matches as $match) { if (isset($match[3])) { $currentSection = $match[3]; } else { $currentId++; $markdown = str_replace('→', "\t", $match[1]); $expectedHtml = isset($match[2]) ? str_replace('→', "\t", $match[2]) : ''; $data[$currentId] = array( 'id' => $currentId, 'section' => $currentSection, 'markdown' => $markdown, 'expectedHtml' => $expectedHtml ); } } return $data; } } ================================================ FILE: test/CommonMarkTestWeak.php ================================================ parsedown->getTextLevelElements(); array_walk($textLevelElements, function (&$element) { $element = preg_quote($element, '/'); }); $this->textLevelElementRegex = '\b(?:' . implode('|', $textLevelElements) . ')\b'; } /** * @dataProvider data * @param $id * @param $section * @param $markdown * @param $expectedHtml */ public function testExample($id, $section, $markdown, $expectedHtml) { $expectedHtml = $this->cleanupHtml($expectedHtml); $actualHtml = $this->parsedown->text($markdown); $actualHtml = $this->cleanupHtml($actualHtml); $this->assertEquals($expectedHtml, $actualHtml); } protected function cleanupHtml($markup) { // invisible whitespaces at the beginning and end of block elements // however, whitespaces at the beginning of
 elements do matter
        $markup = preg_replace(
            array(
                '/(<(?!(?:' . $this->textLevelElementRegex . '|\bpre\b))\w+\b[^>]*>(?:<' . $this->textLevelElementRegex . '[^>]*>)*)\s+/s',
                '/\s+((?:<\/' . $this->textLevelElementRegex . '>)*<\/(?!' . $this->textLevelElementRegex . ')\w+\b>)/s'
            ),
            '$1',
            $markup
        );

        return $markup;
    }
}


================================================
FILE: test/ParsedownTest.php
================================================
dirs = $this->initDirs();
        $this->Parsedown = $this->initParsedown();

        parent::__construct($name, $data, $dataName);
    }

    private $dirs;
    protected $Parsedown;

    /**
     * @return array
     */
    protected function initDirs()
    {
        $dirs []= dirname(__FILE__).'/data/';

        return $dirs;
    }

    /**
     * @return Parsedown
     */
    protected function initParsedown()
    {
        $Parsedown = new TestParsedown();

        return $Parsedown;
    }

    /**
     * @dataProvider data
     * @param $test
     * @param $dir
     */
    function test_($test, $dir)
    {
        $markdown = file_get_contents($dir . $test . '.md');

        $expectedMarkup = file_get_contents($dir . $test . '.html');

        $expectedMarkup = str_replace("\r\n", "\n", $expectedMarkup);
        $expectedMarkup = str_replace("\r", "\n", $expectedMarkup);

        $this->Parsedown->setSafeMode(substr($test, 0, 3) === 'xss');
        $this->Parsedown->setStrictMode(substr($test, 0, 6) === 'strict');

        $actualMarkup = $this->Parsedown->text($markdown);

        $this->assertEquals($expectedMarkup, $actualMarkup);
    }

    function testRawHtml()
    {
        $markdown = "```php\nfoobar\n```";
        $expectedMarkup = '

foobar

'; $expectedSafeMarkup = '
<p>foobar</p>
'; $unsafeExtension = new UnsafeExtension; $actualMarkup = $unsafeExtension->text($markdown); $this->assertEquals($expectedMarkup, $actualMarkup); $unsafeExtension->setSafeMode(true); $actualSafeMarkup = $unsafeExtension->text($markdown); $this->assertEquals($expectedSafeMarkup, $actualSafeMarkup); } function testTrustDelegatedRawHtml() { $markdown = "```php\nfoobar\n```"; $expectedMarkup = '

foobar

'; $expectedSafeMarkup = $expectedMarkup; $unsafeExtension = new TrustDelegatedExtension; $actualMarkup = $unsafeExtension->text($markdown); $this->assertEquals($expectedMarkup, $actualMarkup); $unsafeExtension->setSafeMode(true); $actualSafeMarkup = $unsafeExtension->text($markdown); $this->assertEquals($expectedSafeMarkup, $actualSafeMarkup); } function data() { $data = array(); foreach ($this->dirs as $dir) { $Folder = new DirectoryIterator($dir); foreach ($Folder as $File) { /** @var $File DirectoryIterator */ if ( ! $File->isFile()) { continue; } $filename = $File->getFilename(); $extension = pathinfo($filename, PATHINFO_EXTENSION); if ($extension !== 'md') { continue; } $basename = $File->getBasename('.md'); if (file_exists($dir . $basename . '.html')) { $data []= array($basename, $dir); } } } return $data; } public function test_no_markup() { $markdownWithHtml = <<_content_ sparse:
_content_
paragraph comment MARKDOWN_WITH_MARKUP; $expectedHtml = <<<div>content</div>

sparse:

<div> <div class="inner"> content </div> </div>

paragraph

<style type="text/css"> p { color: red; } </style>

comment

<!-- html comment -->

EXPECTED_HTML; $parsedownWithNoMarkup = new TestParsedown(); $parsedownWithNoMarkup->setMarkupEscaped(true); $this->assertEquals($expectedHtml, $parsedownWithNoMarkup->text($markdownWithHtml)); } public function testLateStaticBinding() { $parsedown = Parsedown::instance(); $this->assertInstanceOf('Parsedown', $parsedown); // After instance is already called on Parsedown // subsequent calls with the same arguments return the same instance $sameParsedown = TestParsedown::instance(); $this->assertInstanceOf('Parsedown', $sameParsedown); $this->assertSame($parsedown, $sameParsedown); $testParsedown = TestParsedown::instance('test late static binding'); $this->assertInstanceOf('TestParsedown', $testParsedown); $sameInstanceAgain = TestParsedown::instance('test late static binding'); $this->assertSame($testParsedown, $sameInstanceAgain); } } ================================================ FILE: test/SampleExtensions.php ================================================ $text

"; return $Block; } } class TrustDelegatedExtension extends Parsedown { protected function blockFencedCodeComplete($Block) { $text = $Block['element']['element']['text']; unset($Block['element']['element']['text']); // WARNING: There is almost always a better way of doing things! // // This behaviour is NOT needed in the demonstrated case. // Only use this if you are sure that the result being added into // rawHtml is safe. // (e.g. using an external parser with escaping capabilities). $Block['element']['element']['rawHtml'] = "

$text

"; $Block['element']['element']['allowRawHtmlInSafeMode'] = true; return $Block; } } ================================================ FILE: test/TestParsedown.php ================================================ textLevelElements; } } ================================================ FILE: test/data/aesthetic_table.html ================================================
header 1 header 2
cell 1.1 cell 1.2
cell 2.1 cell 2.2
================================================ FILE: test/data/aligned_table.html ================================================
header 1 header 2 header 2
cell 1.1 cell 1.2 cell 1.3
cell 2.1 cell 2.2 cell 2.3
================================================ FILE: test/data/atx_heading.html ================================================

h1

h2

h3

h4

h5
h6

####### not a heading

closed h1

# of levels

# of levels #

heading

================================================ FILE: test/data/automatic_link.html ================================================

http://example.com

================================================ FILE: test/data/block-level_html.html ================================================
_content_

paragraph

_content_
home
================================================ FILE: test/data/code_block.html ================================================
<?php

$message = 'Hello World!';
echo $message;

> not a quote
- not a list item
[not a reference]: http://foo.com

foo


bar
================================================ FILE: test/data/code_span.html ================================================

a code span

this is also a codespan trailing text

and look at this one!

single backtick in a code span: `

backtick-delimited string in a code span: `foo`

sth `` sth

================================================ FILE: test/data/compound_blockquote.html ================================================

header

paragraph

  • li

paragraph

================================================ FILE: test/data/compound_emphasis.html ================================================

code code

codecodecode

================================================ FILE: test/data/compound_list.html ================================================
  • paragraph

    paragraph

  • paragraph

    quote

================================================ FILE: test/data/deeply_nested_list.html ================================================
  • li
    • li
      • li
      • li
    • li
  • li

  • level 1
    • level 2
      • level 3
        • level 4
          • level 5

  • a
  • b
  • c
  • d
  • e
  • f
  • g
  • h
  • i
================================================ FILE: test/data/em_strong.html ================================================

em strong

em strong strong

strong em strong

strong em strong strong

em strong

em strong strong

strong em strong

strong em strong strong

================================================ FILE: test/data/email.html ================================================

my email is me@example.com

html tags shouldn't start an email autolink first.last@example.com

================================================ FILE: test/data/emphasis.html ================================================

underscore, asterisk, one two, three four, a, b

strong and em and strong and em

line line line

this_is_not_an_emphasis

an empty emphasis __ ** is not an emphasis

*mixed *double and single asterisk** spans

================================================ FILE: test/data/escaping.html ================================================

escaped *emphasis*.

escaped \*emphasis\* in a code span

escaped \*emphasis\* in a code block

\ ` * _ { } [ ] ( ) > # + - . !

one_two one_two

one*two one*two

================================================ FILE: test/data/fenced_code_block.html ================================================
<?php

$message = 'fenced code block';
echo $message;
tilde
echo 'language identifier';
echo 'language identifier with non words';
<?php
echo "Hello World";
?>
<a href="http://auraphp.com" >Aura Project</a>
the following isn't quite enough to close
```
still a fenced code block
foo


bar
<?php
echo "Hello World";
================================================ FILE: test/data/horizontal_rule.html ================================================




================================================ FILE: test/data/html_comment.html ================================================

paragraph

paragraph

abc
  • abcd
  • bbbb
  • cccc
================================================ FILE: test/data/html_entity.html ================================================

& © {

================================================ FILE: test/data/image_reference.html ================================================

Markdown Logo

![missing reference]

================================================ FILE: test/data/image_title.html ================================================

alt

blank title

================================================ FILE: test/data/implicit_reference.html ================================================

an implicit reference link

an implicit reference link with an empty link definition

an implicit reference link followed by another

an explicit reference link with a title

================================================ FILE: test/data/inline_link.html ================================================

link

link with parentheses in URL

(link) in parentheses

link

MD Logo

MD Logo and text

MD Logo and text

================================================ FILE: test/data/inline_link_title.html ================================================

single quotes

double quotes

single quotes blank

double quotes blank

space

parentheses

================================================ FILE: test/data/inline_title.html ================================================

single quotes and double quotes

================================================ FILE: test/data/lazy_blockquote.html ================================================

quote the rest of it

another paragraph the rest of it

================================================ FILE: test/data/lazy_list.html ================================================
  • li the rest of it
================================================ FILE: test/data/line_break.html ================================================

line
line

================================================ FILE: test/data/markup_consecutive_one.html ================================================
Markup
_No markdown_ without blank line for **strict** compliance with CommonMark.

Markdown

================================================ FILE: test/data/markup_consecutive_one_line.html ================================================
One markup on two lines
_No markdown_

Markdown

================================================ FILE: test/data/markup_consecutive_one_stripped.html ================================================

Stripped markup

_No markdown_

Markdown

================================================ FILE: test/data/markup_consecutive_two.html ================================================
First markup

and second markup on the same line.

_No markdown_

Markdown

================================================ FILE: test/data/markup_consecutive_two_lines.html ================================================
First markup

and partial markup on two lines.

_No markdown_

Markdown

================================================ FILE: test/data/markup_consecutive_two_stripped.html ================================================

Stripped markup on two lines

_No markdown_

Markdown

================================================ FILE: test/data/multiline_list_paragraph.html ================================================
  • li

    line line

================================================ FILE: test/data/multiline_lists.html ================================================
  1. One First body copy

  2. Two Last body copy

================================================ FILE: test/data/nested_block-level_html.html ================================================
_parent_
_child_
_adopted child_

outside

================================================ FILE: test/data/ordered_list.html ================================================
  1. one
  2. two

repeating numbers:

  1. one
  2. two

large numbers:

  1. one

foo 1. the following should not start a list 100.
200.

================================================ FILE: test/data/paragraph_list.html ================================================

paragraph

  • li
  • li

paragraph

  • li

  • li

================================================ FILE: test/data/reference_title.html ================================================

double quotes and single quotes and parentheses

[invalid title]: http://example.com example title

================================================ FILE: test/data/self-closing_html.html ================================================

paragraph


paragraph


paragraph


paragraph


paragraph


paragraph

================================================ FILE: test/data/separated_nested_list.html ================================================
  • li

    • li
    • li
================================================ FILE: test/data/setext_header.html ================================================

h1

h2

single character

not a header


================================================ FILE: test/data/setext_header_spaces.html ================================================

trailing space

trailing space

leading and trailing space

leading and trailing space

1 leading space

1 leading space

3 leading spaces

3 leading spaces

too many leading spaces ==

too many leading spaces --

================================================ FILE: test/data/simple_blockquote.html ================================================

quote

indented:

quote

no space after >:

quote


Info 1 text

Info 2 text

================================================ FILE: test/data/simple_table.html ================================================
header 1 header 2
cell 1.1 cell 1.2
cell 2.1 cell 2.2

header 1 header 2
cell 1.1 cell 1.2
cell 2.1 cell 2.2

header 1
cell 1.1
cell 2.1

header 1
cell 1.1
cell 2.1

Not a table, we haven't ended the paragraph: header 1 | header 2 -------- | -------- cell 1.1 | cell 1.2 cell 2.1 | cell 2.2

================================================ FILE: test/data/span-level_html.html ================================================

an important link

broken
line

inline tag at the beginning

http://example.com

================================================ FILE: test/data/sparse_dense_list.html ================================================
  • li

  • li

  • li

================================================ FILE: test/data/sparse_html.html ================================================
line 1

line 2 line 3

line 4

================================================ FILE: test/data/sparse_list.html ================================================
  • li

  • li


  • li

    • indented li
================================================ FILE: test/data/special_characters.html ================================================

AT&T has an ampersand in their name

this & that

4 < 5 and 6 > 5

http://example.com/autolink?a=1&b=2

inline link

reference link

================================================ FILE: test/data/strict_atx_heading.html ================================================

h1

h2

h3

h4

h5
h6

####### not a heading

#not a heading

closed h1

# of levels

# of levels #

================================================ FILE: test/data/strikethrough.html ================================================

strikethrough

here's one followed by another one

~~ this ~~ is not one neither is ~this~

escaped ~~this~~

================================================ FILE: test/data/strong_em.html ================================================

em strong em

strong em em

em strong em em

em strong em

strong em em

em strong em em

================================================ FILE: test/data/tab-indented_code_block.html ================================================
<?php

$message = 'Hello World!';
echo $message;

echo "following a blank line";
================================================ FILE: test/data/table_inline_markdown.html ================================================
header 1 header 2
cell 1.1 cell 1.2
| 2.1 | 2.2
\| 2.1 link
================================================ FILE: test/data/text_reference.html ================================================

reference link

one with a semantic name

[one][404] with no definition

multiline one defined on 2 lines

one with a mixed case label and an upper case definition

one with the a label on the next line

link

================================================ FILE: test/data/unordered_list.html ================================================
  • li
  • li

mixed unordered markers:

  • li
  • li
  • li

mixed ordered markers:

  1. starting at 1, list one
  2. number 2, list one
  1. starting at 3, list two
================================================ FILE: test/data/untidy_table.html ================================================
header 1 header 2
cell 1.1 cell 1.2
cell 2.1 cell 2.2
================================================ FILE: test/data/url_autolinking.html ================================================

an autolink http://example.com

inside of brackets [http://example.com], inside of braces {http://example.com}, inside of parentheses (http://example.com)

trailing slash http://example.com/ and http://example.com/path/

================================================ FILE: test/data/whitespace.html ================================================
code
================================================ FILE: test/data/xss_attribute_encoding.html ================================================

xss

xss

xss

xss

xss"

xss'

================================================ FILE: test/data/xss_bad_url.html ================================================

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

xss

================================================ FILE: test/data/xss_text_encoding.html ================================================

<script>alert(1)</script>

<script>

alert(1)

</script>

<script> alert(1) </script>