CoreTagHooks.php 0000666 00000017463 15133501634 0007625 0 ustar 00 <?php
/**
* Tag hooks provided by MediaWiki core
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Parser
*/
use MediaWiki\Config\ServiceOptions;
use MediaWiki\Html\Html;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\Sanitizer;
/**
* Various tag hooks, registered in every Parser
* @ingroup Parser
*/
class CoreTagHooks {
/**
* @internal
*/
public const REGISTER_OPTIONS = [
// See documentation for the corresponding config options
MainConfigNames::RawHtml,
];
/**
* @param Parser $parser
* @param ServiceOptions $options
*
* @return void
* @internal
*/
public static function register( Parser $parser, ServiceOptions $options ) {
$options->assertRequiredOptions( self::REGISTER_OPTIONS );
$rawHtml = $options->get( MainConfigNames::RawHtml );
$parser->setHook( 'pre', [ __CLASS__, 'pre' ] );
$parser->setHook( 'nowiki', [ __CLASS__, 'nowiki' ] );
$parser->setHook( 'gallery', [ __CLASS__, 'gallery' ] );
$parser->setHook( 'indicator', [ __CLASS__, 'indicator' ] );
$parser->setHook( 'langconvert', [ __CLASS__, 'langconvert' ] );
if ( $rawHtml ) {
$parser->setHook( 'html', [ __CLASS__, 'html' ] );
}
}
/**
* Core parser tag hook function for 'pre'.
* Text is treated roughly as 'nowiki' wrapped in an HTML 'pre' tag;
* valid HTML attributes are passed on.
*
* @param ?string $content
* @param array $attribs
* @param Parser $parser
* @return string HTML
* @internal
*/
public static function pre( ?string $content, array $attribs, Parser $parser ): string {
// Backwards-compatibility hack
$content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $content ?? '', 'i' );
$attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
// We need to let both '"' and '&' through,
// for strip markers and entities respectively.
$content = str_replace(
[ '>', '<' ],
[ '>', '<' ],
$content
);
// @phan-suppress-next-line SecurityCheck-XSS Ad-hoc escaping above.
return Html::rawElement( 'pre', $attribs, $content );
}
/**
* Core parser tag hook function for 'html', used only when
* $wgRawHtml is enabled.
*
* This is potentially unsafe and should be used only in very careful
* circumstances, as the contents are emitted as raw HTML.
*
* Uses undocumented extended tag hook return values, introduced in r61913.
*
* @suppress SecurityCheck-XSS
* @param ?string $content
* @param array $attributes
* @param Parser $parser
* @return array|string Output of tag hook
* @internal
*/
public static function html( ?string $content, array $attributes, Parser $parser ) {
$rawHtml = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::RawHtml );
if ( $rawHtml ) {
if ( $parser->getOptions()->getAllowUnsafeRawHtml() ) {
return [ $content ?? '', 'markerType' => 'nowiki' ];
} else {
// In a system message where raw html is
// not allowed (but it is allowed in other
// contexts).
return Html::rawElement(
'span',
[ 'class' => 'error' ],
// Using ->text() not ->parse() as
// a paranoia measure against a loop.
$parser->msg( 'rawhtml-notallowed' )->escaped()
);
}
} else {
throw new UnexpectedValueException( '<html> extension tag encountered unexpectedly' );
}
}
/**
* Core parser tag hook function for 'nowiki'. Text within this section
* gets interpreted as a string of text with HTML-compatible character
* references, and wiki markup within it will not be expanded.
*
* Uses undocumented extended tag hook return values, introduced in r61913.
*
* Uses custom html escaping which phan-taint-check won't recognize
* hence we suppress the error.
* @suppress SecurityCheck-XSS
*
* @param ?string $content
* @param array $attributes
* @param Parser $parser
* @return array
* @internal
*/
public static function nowiki( ?string $content, array $attributes, Parser $parser ): array {
$content = strtr( $content ?? '', [
// lang converter
'-{' => '-{',
'}-' => '}-',
// html tags
'<' => '<',
'>' => '>'
// Note: Both '"' and '&' are not converted.
// This allows strip markers and entities through.
] );
return [ $content, 'markerType' => 'nowiki' ];
}
/**
* Core parser tag hook function for 'gallery'.
*
* Renders a thumbnail list of the given images, with optional captions.
* Full syntax documented on the wiki:
*
* https://www.mediawiki.org/wiki/Help:Images#Gallery_syntax
*
* @todo break Parser::renderImageGallery out here too.
*
* @param ?string $content
* @param array $attributes
* @param Parser $parser
* @return string HTML
* @internal
*/
public static function gallery( ?string $content, array $attributes, Parser $parser ): string {
return $parser->renderImageGallery( $content ?? '', $attributes );
}
/**
* XML-style tag for page status indicators: icons (or short text snippets) usually displayed in
* the top-right corner of the page, outside of the main content.
*
* @param ?string $content
* @param array $attributes
* @param Parser $parser
* @param PPFrame $frame
* @return string
* @since 1.25
* @internal
*/
public static function indicator( ?string $content, array $attributes, Parser $parser, PPFrame $frame ): string {
if ( !isset( $attributes['name'] ) || trim( $attributes['name'] ) === '' ) {
return '<span class="error">' .
$parser->msg( 'invalid-indicator-name' )->parse() .
'</span>';
}
$parser->getOutput()->setIndicator(
trim( $attributes['name'] ),
Parser::stripOuterParagraph( $parser->recursiveTagParseFully( $content ?? '', $frame ) )
);
return '';
}
/**
* Returns content converted into the requested language variant, using LanguageConverter.
*
* @param ?string $content
* @param array $attributes
* @param Parser $parser
* @param PPFrame $frame
* @return string
* @since 1.36
* @internal
*/
public static function langconvert( ?string $content, array $attributes, Parser $parser, PPFrame $frame ): string {
if ( isset( $attributes['from'] ) && isset( $attributes['to'] ) ) {
$fromArg = trim( $attributes['from'] );
$toArg = trim( $attributes['to'] );
$fromLangCode = explode( '-', $fromArg )[0];
if ( $fromLangCode && $fromLangCode === explode( '-', $toArg )[0] ) {
$lang = MediaWikiServices::getInstance()->getLanguageFactory()
->getLanguage( $fromLangCode );
$converter = MediaWikiServices::getInstance()->getLanguageConverterFactory()
->getLanguageConverter( $lang );
# ensure that variants are available,
# and the variants are valid BCP 47 codes
if ( $converter->hasVariants()
&& strcasecmp( $fromArg, LanguageCode::bcp47( $fromArg ) ) === 0
&& strcasecmp( $toArg, LanguageCode::bcp47( $toArg ) ) === 0
) {
$toVariant = $converter->validateVariant( $toArg );
if ( $toVariant ) {
return $converter->autoConvert(
$parser->recursiveTagParse( $content ?? '', $frame ),
$toVariant
);
}
}
}
}
return Html::rawElement(
'span',
[ 'class' => 'error' ],
$parser->msg( 'invalid-langconvert-attrs' )->parse()
);
}
}
RemexRemoveTagHandler.php 0000666 00000012330 15133501634 0011451 0 ustar 00 <?php
namespace MediaWiki\Parser;
use Wikimedia\RemexHtml\Tokenizer\Attributes;
use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
use Wikimedia\RemexHtml\Tokenizer\TokenHandler;
/**
* Helper class for Sanitizer::removeSomeTags().
* @internal
*/
class RemexRemoveTagHandler extends RelayTokenHandler {
/**
* @var string The original HTML source string (used for fallback text
* when rejecting an HTML tag).
*/
private $source;
/**
* @var array<string,true> Set of HTML tags which can be self-closed.
*/
private $htmlsingle;
/**
* @var array<string,true> Self-closed tags which are on $htmlsingle
* but not on $htmlsingleonly will be emitted as an empty element.
*/
private $htmlsingleonly;
/**
* @var array<string,true> Set of allowed HTML open/close tags.
*/
private $htmlelements;
/**
* @var ?callable(Attributes,mixed...):Attributes Callback to mutate or
* sanitize attributes.
*/
private $attrCallback;
/**
* @var ?array $args Optional extra arguments to provide to the
* $attrCallback.
*/
private $callbackArgs;
/**
* @param TokenHandler $nextHandler Handler to relay accepted tokens.
* @param string $source Input source string.
* @param array $tagData Information about allowed/rejected tags.
* @param ?callable $attrCallback Attribute handler callback.
* The full signature is ?callable(Attributes,mixed...):Attributes
* @param ?array $callbackArgs Optional arguments to attribute handler.
*/
public function __construct(
TokenHandler $nextHandler,
string $source,
array $tagData,
?callable $attrCallback,
?array $callbackArgs
) {
parent::__construct( $nextHandler );
$this->source = $source;
$this->htmlsingle = $tagData['htmlsingle'];
$this->htmlsingleonly = $tagData['htmlsingleonly'];
$this->htmlelements = $tagData['htmlelements'];
$this->attrCallback = $attrCallback;
$this->callbackArgs = $callbackArgs ?? [];
}
/**
* @inheritDoc
*/
public function comment( $text, $sourceStart, $sourceLength ) {
// Don't relay comments.
}
/**
* Takes attribute names and values for a tag and the tag name and
* validates that the tag is allowed to be present.
* This DOES NOT validate the attributes, nor does it validate the
* tags themselves. This method only handles the special circumstances
* where we may want to allow a tag within content but ONLY when it has
* specific attributes set.
*
* @param string $element
* @param Attributes $attrs
* @return bool
*
* @see Sanitizer::validateTag()
*/
private static function validateTag( string $element, Attributes $attrs ): bool {
if ( $element == 'meta' || $element == 'link' ) {
$params = $attrs->getValues();
if ( !isset( $params['itemprop'] ) ) {
// <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
return false;
}
if ( $element == 'meta' && !isset( $params['content'] ) ) {
// <meta> must have a content="" for the itemprop
return false;
}
if ( $element == 'link' && !isset( $params['href'] ) ) {
// <link> must have an associated href=""
return false;
}
}
return true;
}
/**
* @inheritDoc
*/
public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
// Handle a start tag from the tokenizer: either relay it to the
// next stage, or re-emit it as raw text.
$badtag = false;
$t = strtolower( $name );
if ( isset( $this->htmlelements[$t] ) ) {
if ( $this->attrCallback ) {
$attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
}
if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
// Remove the self-closing slash, to be consistent with
// HTML5 semantics. T134423
$selfClose = false;
}
if ( !self::validateTag( $t, $attrs ) ) {
$badtag = true;
}
$fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
$attrs = new PlainAttributes( $fixedAttrs );
if ( !$badtag ) {
if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
// Interpret self-closing tags as empty tags even when
// HTML5 would interpret them as start tags. Such input
// is commonly seen on Wikimedia wikis with this intention.
$this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
$this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
} else {
$this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
}
return;
}
}
// Emit this as a text node instead.
$this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
}
/**
* @inheritDoc
*/
public function endTag( $name, $sourceStart, $sourceLength ) {
// Handle an end tag from the tokenizer: either relay it to the
// next stage, or re-emit it as raw text.
$t = strtolower( $name );
if ( isset( $this->htmlelements[$t] ) ) {
// This is a good tag, relay it.
$this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
} else {
// Emit this as a text node instead.
$this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
}
}
}
RemexStripTagHandler.php 0000666 00000006453 15133501634 0011326 0 ustar 00 <?php
namespace MediaWiki\Parser;
use Wikimedia\RemexHtml\Tokenizer\Attributes;
use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;
/**
* Helper class for Sanitizer::stripAllTags().
* @internal
*/
class RemexStripTagHandler extends NullTokenHandler {
private $insideNonVisibleTag = false;
private $text = '';
public function getResult() {
return $this->text;
}
public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
if ( !$this->insideNonVisibleTag ) {
$this->text .= substr( $text, $start, $length );
}
}
public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
if ( $this->isNonVisibleTag( $name ) ) {
$this->insideNonVisibleTag = true;
}
// Inject whitespace for typical block-level tags to
// prevent merging unrelated<br>words.
if ( $this->isBlockLevelTag( $name ) ) {
$this->text .= ' ';
}
}
public function endTag( $name, $sourceStart, $sourceLength ) {
if ( $this->isNonVisibleTag( $name ) ) {
$this->insideNonVisibleTag = false;
}
// Inject whitespace for typical block-level tags to
// prevent merging unrelated<br>words.
if ( $this->isBlockLevelTag( $name ) ) {
$this->text .= ' ';
}
}
// Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
// retrieved on sept 12, 2018. <br> is not block level but was added anyways.
// The following is a complete list of all HTML block level elements
// (although "block-level" is not technically defined for elements that are
// new in HTML5).
// Structured as tag => true to allow O(1) membership test.
private const BLOCK_LEVEL_TAGS = [
'address' => true,
'article' => true,
'aside' => true,
'blockquote' => true,
'br' => true,
'canvas' => true,
'dd' => true,
'div' => true,
'dl' => true,
'dt' => true,
'fieldset' => true,
'figcaption' => true,
'figure' => true,
'footer' => true,
'form' => true,
'h1' => true,
'h2' => true,
'h3' => true,
'h4' => true,
'h5' => true,
'h6' => true,
'header' => true,
'hgroup' => true,
'hr' => true,
'li' => true,
'main' => true,
'nav' => true,
'noscript' => true,
'ol' => true,
'output' => true,
'p' => true,
'pre' => true,
'section' => true,
'table' => true,
'td' => true,
'tfoot' => true,
'th' => true,
'tr' => true,
'ul' => true,
'video' => true,
];
/**
* Detect block level tags. Of course css can make anything a block
* level tag, but this is still better than nothing.
*
* @param string $tagName HTML tag name
* @return bool True when tag is an html block level element
*/
private function isBlockLevelTag( $tagName ) {
$key = strtolower( trim( $tagName ) );
return isset( self::BLOCK_LEVEL_TAGS[$key] );
}
private const NON_VISIBLE_TAGS = [
'style' => true,
'script' => true,
];
/**
* Detect block tags which by default are non-visible items.
* Of course css can make anything non-visible,
* but this is still better than nothing.
*
* We use this primarily to hide TemplateStyles
* from output in notifications/emails etc.
*
* @param string $tagName HTML tag name
* @return bool True when tag is a html element which should be filtered out
*/
private function isNonVisibleTag( $tagName ) {
$key = strtolower( trim( $tagName ) );
return isset( self::NON_VISIBLE_TAGS[$key] );
}
}
preTags-standalone-knownFailures.json 0000666 00000004305 15133510051 0014017 0 ustar 00 {
"<nowiki> inside <pre> (T15238)": {
"html2html": "<pre data-parsoid='{\"dsr\":[0,15,1,0]}'><span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&lt;\",\"srcContent\":\"<\",\"dsr\":[1,5,null,null]}'><</span>nowiki<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&gt;\",\"srcContent\":\">\",\"dsr\":[11,15,null,null]}'>></span></pre>\n<p data-parsoid='{\"dsr\":[16,18,0,0]}'>\n <br data-parsoid='{\"dsr\":[18,18,0,0]}'/></p>\n\n<pre data-parsoid='{\"dsr\":[20,53,1,0]}'><span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&lt;\",\"srcContent\":\"<\",\"dsr\":[21,25,null,null]}'><</span>nowiki<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&gt;\",\"srcContent\":\">\",\"dsr\":[31,35,null,null]}'>></span>Foo<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&lt;\",\"srcContent\":\"<\",\"dsr\":[38,42,null,null]}'><</span>/nowiki<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&gt;\",\"srcContent\":\">\",\"dsr\":[49,53,null,null]}'>></span></pre>\n",
"html2wt": " <nowiki>\n\n \n\n <nowiki>Foo</nowiki>\n"
},
"<pre> with attributes (T5202)": {
"html2html": "<pre data-parsoid='{\"dsr\":[0,24,1,0]}'>Bluescreen of WikiDeath</pre>\n",
"html2wt": " Bluescreen of WikiDeath\n"
},
"<pre> with forbidden attribute (T5202)": {
"html2html": "<pre data-parsoid='{\"dsr\":[0,22,1,0]}'>Narrow screen goodies</pre>\n",
"html2wt": " Narrow screen goodies\n"
},
"<pre> with forbidden attribute values (T5202)": {
"html2html": "<pre data-parsoid='{\"dsr\":[0,22,1,0]}'>Narrow screen goodies</pre>\n",
"html2wt": " Narrow screen goodies\n"
},
"<pre> with width attribute (T5202)": {
"html2html": "<pre data-parsoid='{\"dsr\":[0,22,1,0]}'>Narrow screen goodies</pre>\n",
"html2wt": " Narrow screen goodies\n"
},
"Entities inside <pre>": {
"html2wt": " <\n"
},
"HTML pre followed by indent-pre": {
"html2wt": " foo\n\n bar\n"
},
"HTML-pre: 2: indented text": {
"html2wt": " foo\n"
},
"Less than in attribute position": {
"selser [0,2,[2]]": "<pre <pre>123</pre>6njtnz\n\n\n\n<div <div>zghs00123</div>"
}
}
preTags.txt 0000666 00000020430 15133510051 0006707 0 ustar 00 # The parsoid-compatible option below is only relevant when we are running
# parser tests in integrated mode with Parsoid. This option is ignored
# when this test file is run with Parsoid in standalone mode.
!! options
parsoid-compatible=wt2html,wt2wt
version=2
!! end
!! article
Template:1x
!! text
{{{1}}}
!! endarticle
!! test
<pre> with attributes (T5202)
!! wikitext
<pre style="background: blue; color:white">Bluescreen of WikiDeath</pre>
!! html
<pre style="background: blue; color:white">Bluescreen of WikiDeath</pre>
!! end
!! test
<pre> with width attribute (T5202)
!! wikitext
<pre width="8">Narrow screen goodies</pre>
!! html
<pre width="8">Narrow screen goodies</pre>
!! end
!! test
<pre> with forbidden attribute (T5202)
!! wikitext
<pre width="8" onmouseover="alert(document.cookie)">Narrow screen goodies</pre>
!! html
<pre width="8">Narrow screen goodies</pre>
!! end
!! test
Entities inside <pre>
!! wikitext
<pre><</pre>
!! html
<pre><</pre>
!! end
!! test
<pre> with forbidden attribute values (T5202)
!! wikitext
<pre width="8" style="border-width: expression(alert(document.cookie))">Narrow screen goodies</pre>
!! html
<pre width="8" style="/* insecure input */">Narrow screen goodies</pre>
!! end
!! test
<nowiki> inside <pre> (T15238)
!! wikitext
<pre>
<nowiki>
</pre>
<pre>
<nowiki></nowiki>
</pre>
<pre><nowiki><nowiki></nowiki>Foo<nowiki></nowiki></nowiki></pre>
!! html
<pre><nowiki>
</pre>
<pre>
</pre>
<pre><nowiki>Foo</nowiki></pre>
!! end
!! test
<nowiki> inside of #tag:pre
!! wikitext
{{#tag:pre|Foo <nowiki>→bar</nowiki>}}
!! html/php
<pre>Foo →bar</pre>
!! html/parsoid+standalone
<pre about="#mwt1" typeof="mw:Transclusion" data-parsoid='{"pi":[[{"k":"1"}]]}' data-mw='{"parts":[{"template":{"target":{"wt":"#tag:pre","function":"tag"},"params":{"1":{"wt":"Foo <nowiki>&rarr;bar</nowiki>"}},"i":0}}]}'>Foo <span typeof="mw:Entity">→</span>bar</pre>
!! html/parsoid+integrated
<pre about="#mwt1" typeof="mw:Extension/pre mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#tag:pre","function":"tag"},"params":{"1":{"wt":"Foo <nowiki>&rarr;bar</nowiki>"}},"i":0}}]}'>Foo →bar</pre>
!! end
## Don't expect this to rt, Parsoid drops the unmatched closing pre tags that
## aren't enclosed in nowikis.
!! test
<nowiki> and <pre> preference (first one wins)
!! options
parsoid=wt2html
!! wikitext
<pre>
<nowiki>
</pre>
</nowiki>
</pre>
<nowiki>
<pre>
<nowiki>
</pre>
</nowiki>
</pre>
!! html/php
<pre><nowiki>
</pre>
<p></nowiki>
</p>
<p>
<pre>
<nowiki>
</pre>
</p>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n<nowiki>\n"}}'><nowiki>
</pre>
<p></nowiki></p>
<p><span typeof="mw:Nowiki">
<pre>
<nowiki>
</pre>
</span></p>
!! end
!! test
</pre> inside nowiki
!! wikitext
<nowiki></pre></nowiki>
!! html
<p></pre>
</p>
!! end
!! test
Empty pre; pre inside other HTML tags (T56946)
!! wikitext
a
<div><pre>
foo
</pre></div>
<pre></pre>
!! html/php
<p>a
</p>
<div><pre>foo
</pre></div>
<pre></pre>
!! html/parsoid
<p>a</p>
<div data-parsoid='{"stx":"html"}'><pre typeof="mw:Extension/pre" about="#mwt2" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\nfoo\n"}}'>foo
</pre></div>
<pre typeof="mw:Extension/pre" about="#mwt4" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":""}}'></pre>
!! end
!! test
HTML pre followed by indent-pre
!! wikitext
<pre>foo</pre>
bar
!! html
<pre>foo</pre>
<pre>bar
</pre>
!! end
!! test
Block tag pre
!! wikitext
<p><pre>foo</pre></p>
!! html/php
<p class="mw-empty-elt"></p><pre>foo</pre><p class="mw-empty-elt"></p>
!! html/parsoid
<p class='mw-empty-elt' data-parsoid='{"stx":"html","autoInsertedEnd":true}'></p><pre typeof="mw:Extension/pre" about="#mwt2" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"foo"}}'>foo</pre><p class='mw-empty-elt' data-parsoid='{"autoInsertedStart":true,"stx":"html"}'></p>
!! end
## Hmm, should Parsoid rt this?
!! test
Pres with newline attributes
!! options
parsoid=wt2html,html2html
!! wikitext
<pre class="one
two">hi</pre>
!! html/php
<pre class="one two">hi</pre>
!! html/parsoid
<pre class="one two" typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{"class":"one two"},"body":{"extsrc":"hi"}}'>hi</pre>
!! end
!! test
Things that look like <pre> tags aren't treated as such
!! wikitext
Barack Obama <President> of the United States
<President></President>
!! html
<p>Barack Obama <President> of the United States
<President></President>
</p>
!! end
## Remex doesn't account for fostered content.
## The difference between Parsoid and the PHP parser can be attributed to core
## commit 674e8388cba and 710618f89af in Parsoid's repo. Parsoid doesn't
## tokenize unmatched extension tags that shadow html tags as strings to ease
## an escaping mechanism. See the comment in `maybeExtensionTag`.
!! test
Handle broken pre-like tags (T66025)
!! options
parsoid=wt2html
!! wikitext
{{1x|<pre <pre>x</pre>}}
<table><pre </table>
!! html/php
<pre>x</pre>
<pre <table></table>
!! html/parsoid
<pre typeof="mw:Extension/pre mw:Transclusion" about="#mwt2" data-parsoid='{"stx":"html","a":{"<pre":null},"sa":{"<pre":""},"pi":[[{"k":"1"}]]}' data-mw='{"parts":[{"template":{"target":{"wt":"1x","href":"./Template:1x"},"params":{"1":{"wt":"<pre <pre>x</pre>"}},"i":0}}]}'>x</pre>
<pre data-parsoid='{"stx":"html","src":"<pre </table>","tagWidths":[13,0],"a":{"<":null,"table":null},"sa":{"<":"","table":""},"fostered":true,"autoInsertedEnd":true}'></pre><table data-parsoid='{"stx":"html","autoInsertedEnd":true}'></table>
!! end
## Similar to the above, but shows the difference between extension and html tags
!! test
Less than in attribute position
!! wikitext
<pre <pre>123</pre>
<div <div>123</div>
!! html/php
<pre>123</pre><p>
<div </p><div>123</div>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{"<pre":""},"body":{"extsrc":"123"}}'>123</pre><p>
<div </p><div>123</div>
!! end
!! test
Parsoid: handle pre with space after attribute
!! options
parsoid=wt2html
!! wikitext
<pre style="width:50%;" >{{1x|foo}}</pre>
!! html/php
<pre style="width:50%;">{{1x|foo}}</pre>
!! html/parsoid
<pre style="width:50%;" typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{"style":"width:50%;"},"body":{"extsrc":"{{1x|foo}}"}}'>{{1x|foo}}</pre>
!! end
!! test
Self-closed pre
!! wikitext
<pre />
!! html/php
<pre></pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{}}'></pre>
!! end
###
### HTML-pre (some to spec PHP parser behavior and some Parsoid-RT-centric)
###
!!test
HTML-pre: 1. embedded newlines
!! wikitext
<pre>foo</pre>
<pre>
foo
</pre>
<pre>
foo
</pre>
<pre>
foo
</pre>
!! html/php
<pre>foo</pre>
<pre>foo
</pre>
<pre>
foo
</pre>
<pre>
foo
</pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"foo"}}'>foo</pre>
<pre typeof="mw:Extension/pre" about="#mwt4" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\nfoo\n"}}'>foo
</pre>
<pre typeof="mw:Extension/pre" about="#mwt6" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n\nfoo\n"}}'>
foo
</pre>
<pre typeof="mw:Extension/pre" about="#mwt8" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n\n\nfoo\n"}}'>
foo
</pre>
!!end
!! test
HTML-pre: big spaces
!! wikitext
<pre>
haha
haha
</pre>
!! html/php
<pre>
haha
haha
</pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n\n\n\n\nhaha\n\n\n\n\nhaha\n\n\n\n\n"}}'>
haha
haha
</pre>
!! end
!!test
HTML-pre: 2: indented text
!! wikitext
<pre>
foo
</pre>
!! html
<pre> foo
</pre>
!!end
!!test
HTML-pre: 3: other wikitext
!! wikitext
<pre>
* foo
# bar
= no-h =
'' no-italic ''
[[ NoLink ]]
</pre>
!! html/php
<pre>* foo
# bar
= no-h =
'' no-italic ''
[[ NoLink ]]
</pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n* foo\n# bar\n= no-h =\n'' no-italic ''\n[[ NoLink ]]\n"}}'>* foo
# bar
= no-h =
'' no-italic ''
[[ NoLink ]]
</pre>
!!end
redirects.txt 0000666 00000017577 15133510051 0007310 0 ustar 00 # The parsoid-compatible option below is only relevant when we are running
# parser tests in integrated mode with Parsoid. This option is ignored
# when this test file is run with Parsoid in standalone mode.
!! options
parsoid-compatible=wt2html,wt2wt
version=2
!! end
!! article
Main Page
!! text
blah blah
!! endarticle
!! article
Template:1x
!! text
{{{1}}}
!! endarticle
### Redirects, Parsoid-only
!! test
1. Simple redirect to page
!! wikitext
#REDIRECT [[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end
!! test
2. Other redirect variants
!! wikitext
#REDIRECT [[Main_Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page" data-parsoid='{"src":"#REDIRECT ","a":{"href":"./Main_Page"},"sa":{"href":"Main_Page"}}'/>
!! end
# Not a valid redirect in PHP (although perhaps it was, once upon a time)
# This tests the Parsoid bail-out code.
!! test
3. Other redirect variants
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[<nowiki>[[Bar]]</nowiki>]]
!! html/parsoid
<ol><li>REDIRECT [[<span typeof="mw:Nowiki">[[Bar]]</span>]]</li></ol>
!! end
!! test
4. Redirect to a templated destination
!! wikitext
#REDIRECT [[{{1x|Foo}}bar]]
!! html/parsoid
<link about="#mwt2" typeof="mw:ExpandedAttrs" rel="mw:PageProp/redirect" href="./Foobar" data-parsoid='{"a":{"href":"./Foobar"},"sa":{"href":"{{1x|Foo}}bar"}}' data-mw='{"attribs":[[{"txt":"href"},{"html":"<span about=\"#mwt1\" typeof=\"mw:Transclusion\" data-parsoid='{\"pi\":[[{\"k\":\"1\"}]]}' data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"1x\",\"href\":\"./Template:1x\"},\"params\":{\"1\":{\"wt\":\"Foo\"}},\"i\":0}}]}'>Foo</span>bar"}]]}'/>
!! end
!! test
Empty redirect
!! options
parsoid=wt2html,wt2wt
!! wikitext
#REDIRECT [[]]
!! html/parsoid
<ol>
<li>REDIRECT [[]]</li></ol>
!! end
!! test
Optional colon in #REDIRECT
!! options
# the colon is archaic syntax. we support it for wt2html, but we
# don't care that it roundtrips back to the modern syntax.
parsoid=wt2html,html2html
!! wikitext
#REDIRECT:[[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end
!! test
Whitespace in #REDIRECT with optional colon
!! options
# the colon and gratuitous whitespace is archaic syntax. we support
# it for wt2html, but we don't care that it roundtrips back to the
# modern syntax (without extra whitespace)
parsoid=wt2html,html2html
!! wikitext
#REDIRECT
:
[[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end
!! test
Piped link in #REDIRECT
!! options
# content after piped link is ignored. we support this syntax,
# but don't care that the piped link is lost when we roundtrip this.
parsoid=wt2html
!! wikitext
#REDIRECT [[Main Page|bar]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end
!! test
Redirect to category (T104502)
!! options
parsoid=wt2html,wt2wt
!! wikitext
#REDIRECT [[Category:Foo]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Category:Foo"/>
!! end
!! test
Redirect to category with URL encoding (T104502)
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[Category%3AFoo]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Category:Foo"/>
!! end
!! test
Redirect to category page
!! wikitext
#REDIRECT [[:Category:Foo]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Category:Foo"/>
!! end
!! test
Redirect to image page (1)
!! wikitext
#REDIRECT [[File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./File:Wiki.png"/>
!! end
!! test
Redirect to image page (2)
!! wikitext
#REDIRECT [[Image:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./File:Wiki.png" data-parsoid='{"src":"#REDIRECT ","a":{"href":"./File:Wiki.png"},"sa":{"href":"Image:Wiki.png"}}'/>
!! end
# html2wt disabled because wts serializes as "#REDIRECT [[:en:File:Wiki.png]]"
# Next test confirms this.
!! test
Redirect to language (1) (T104918)
!! options
parsoid=wt2html,wt2wt,html2html
!! wikitext
#REDIRECT [[en:File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="//en.wikipedia.org/wiki/File:Wiki.png"/>
!! end
!! test
Redirect to language (2) (T104918)
!! wikitext
#REDIRECT [[:en:File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="//en.wikipedia.org/wiki/File:Wiki.png"/>
!! end
!! test
Redirect to interwiki (T104918)
!! wikitext
#REDIRECT [[meatball:File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="http://www.usemod.com/cgi-bin/mb.pl?File:Wiki.png"/>
!! end
!! test
Non-English #REDIRECT
!! options
language=is
!! wikitext
#TILVÍSUN [[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page" data-parsoid='{"src":"#TILVÍSUN ","a":{"href":"./Main_Page"},"sa":{"href":"Main Page"}}'/>
!! end
!! test
Redirect syntax under text isn't considered a redirect
!! wikitext
some text
#redirect [[Main Page]]
!! html/parsoid
<p>some text</p>
<ol data-parsoid='{}'><li data-parsoid='{}'>redirect <a rel="mw:WikiLink" href="./Main_Page" title="Main Page" data-parsoid='{"stx":"simple","a":{"href":"./Main_Page"},"sa":{"href":"Main Page"}}'>Main Page</a></li></ol>
!! end
!! test
New redirect
!! options
parsoid=html2wt
!! html/parsoid
<p>Foo<link rel="mw:PageProp/redirect" href="./Foo"/></p>
!! wikitext
#REDIRECT [[Foo]]
Foo
!! end
## WikiContent::getRedirectTargetAndText() strips the redirect from text so,
## in practice, the legacy parse never sees it from the source. Hence, the
## the differnce in output.
!! test
Redirect followed by block on the same line
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[Main Page]]<!-- haha -->==hi==
!! html/php
<ol><li>REDIRECT <a href="/wiki/Main_Page" title="Main Page">Main Page</a>==hi==</li></ol>
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/><!-- haha --><h2 id="hi">hi</h2>
!! end
## WikiContent::getRedirectTargetAndText() strips the redirect from text so,
## in practice, the legacy parse never sees it from the source. Hence, the
## the differnce in output.
!! test
Redirect followed by horizontal rule on the same line
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[Main Page]]----
!! html/php
<ol><li>REDIRECT <a href="/wiki/Main_Page" title="Main Page">Main Page</a>----</li></ol>
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/><hr/>
!! end
!! test
Redirect followed by a newline
!! wikitext
#REDIRECT [[Main Page]]
A newline
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
<p>A newline</p>
!! end
!! test
Redirect followed by multiple newlines
!! wikitext
#REDIRECT [[Main Page]]
A newline
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
<p><br/>
A newline</p>
!! end
!! test
Drop duplicate redirects
!! options
parsoid=html2wt
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Foo"/>
<link rel="mw:PageProp/redirect" href="./Bar"/>
<link rel="mw:PageProp/redirect" href="./Baz"/>
!! wikitext
#REDIRECT [[Foo]]
!! end
!! test
Redirect containing double quotes and spaces
!! wikitext
#REDIRECT [[Cool "Gator"]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Cool_%22Gator%22" data-parsoid='{"src":"#REDIRECT ","a":{"href":"./Cool_%22Gator%22"},"sa":{"href":"Cool \"Gator\""}}'/>
!! end
!! test
Broken redirect shouldn't crash parsoid (T332242)
!! wikitext
<!-- Not a valid redirect nor a valid parser function -->
{{#REDIRECT [[]]}}
!! html/php
<p>{{#REDIRECT [[]]}}
</p>
!! html/parsoid+integrated
<!-- Not a valid redirect nor a valid parser function -->
<p><span typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#REDIRECT [[]]","function":"REDIRECT [[]]"},"params":{},"i":0}}]}'>{{#REDIRECT [[]]}}</span></p>
!! html/parsoid+standalone
<!-- Not a valid redirect nor a valid parser function -->
<p><span typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#REDIRECT [[]]","function":"REDIRECT [[]]"},"params":{},"i":0}}]}'>Parser function implementation for pf_REDIRECT [[]] missing in Parsoid.</span></p>
!! end
preTags-knownFailures.json 0000666 00000000003 15133510051 0011660 0 ustar 00 {}
redirects-knownFailures.json 0000666 00000000003 15133510051 0012237 0 ustar 00 {}
redirects-standalone-knownFailures.json 0000666 00000000210 15133510051 0014365 0 ustar 00 {
"Redirect followed by multiple newlines": {
"selser [0,2,0]": "#REDIRECT [[Main Page]]\n1kgt7nw\n\n\n\nA newline"
}
}
AnsiTermColorer.php 0000666 00000002615 15133510371 0010334 0 ustar 00 <?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Tests;
/**
* Terminal that supports ANSI escape sequences.
*
* @ingroup Testing
*/
class AnsiTermColorer {
/**
* Return ANSI terminal escape code for changing text attribs/color
*
* @param string|int $color Semicolon-separated list of attribute/color codes
* @return string
*/
public function color( $color ) {
global $wgCommandLineDarkBg;
$light = $wgCommandLineDarkBg ? "1;" : "0;";
return "\x1b[{$light}{$color}m";
}
/**
* Return ANSI terminal escape code for restoring default text attributes
*
* @return string
*/
public function reset() {
return $this->color( '0' );
}
}
DummyTermColorer.php 0000666 00000002011 15133510371 0010523 0 ustar 00 <?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Tests;
/**
* A colour-less terminal, drop-in replacement for AnsiTermColorer.
*
* @ingroup Testing
*/
class DummyTermColorer {
public function color( $color ) {
return '';
}
public function reset() {
return '';
}
}
TagHooksTest.php 0000666 00000003420 15133511315 0007635 0 ustar 00 <?php
use MediaWiki\Title\Title;
use MediaWiki\User\User;
/**
* @group Database
* @group Parser
*
* @covers Parser
* @covers BlockLevelPass
* @covers StripState
*
* @covers Preprocessor_Hash
* @covers PPDStack_Hash
* @covers PPDStackElement_Hash
* @covers PPDPart_Hash
* @covers PPFrame_Hash
* @covers PPTemplateFrame_Hash
* @covers PPCustomFrame_Hash
* @covers PPNode_Hash_Tree
* @covers PPNode_Hash_Text
* @covers PPNode_Hash_Array
* @covers PPNode_Hash_Attr
*/
class TagHooksTest extends MediaWikiIntegrationTestCase {
public static function provideValidNames() {
return [
[ 'foo' ],
[ 'foo-bar' ],
[ 'foo_bar' ],
[ 'FOO-BAR' ],
[ 'foo bar' ]
];
}
public static function provideBadNames() {
return [ [ "foo<bar" ], [ "foo>bar" ], [ "foo\nbar" ], [ "foo\rbar" ] ];
}
private function getParserOptions() {
$popt = ParserOptions::newFromUserAndLang( new User,
$this->getServiceContainer()->getContentLanguage() );
return $popt;
}
/**
* @dataProvider provideValidNames
*/
public function testTagHooks( $tag ) {
$parser = $this->getServiceContainer()->getParserFactory()->create();
$parser->setHook( $tag, [ $this, 'tagCallback' ] );
$parserOutput = $parser->parse(
"Foo<$tag>Bar</$tag>Baz",
Title::makeTitle( NS_MAIN, 'Test' ),
$this->getParserOptions()
);
$this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText( [ 'unwrap' => true ] ) );
}
/**
* @dataProvider provideBadNames
*/
public function testBadTagHooks( $tag ) {
$parser = $this->getServiceContainer()->getParserFactory()->create();
$this->expectException( InvalidArgumentException::class );
$parser->setHook( $tag, [ $this, 'tagCallback' ] );
}
public function tagCallback( $text, $params, $parser ) {
return str_rot13( $text );
}
}