Viewing File: /home/omtekel/www/wp-content/upgrade/backup/parser.tar

CoreTagHooks.php000066600000017463151335016340007625 0ustar00<?php
/**
 * Tag hooks provided by MediaWiki core
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Parser
 */

use MediaWiki\Config\ServiceOptions;
use MediaWiki\Html\Html;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\Sanitizer;

/**
 * Various tag hooks, registered in every Parser
 * @ingroup Parser
 */
class CoreTagHooks {

	/**
	 * @internal
	 */
	public const REGISTER_OPTIONS = [
		// See documentation for the corresponding config options
		MainConfigNames::RawHtml,
	];

	/**
	 * @param Parser $parser
	 * @param ServiceOptions $options
	 *
	 * @return void
	 * @internal
	 */
	public static function register( Parser $parser, ServiceOptions $options ) {
		$options->assertRequiredOptions( self::REGISTER_OPTIONS );
		$rawHtml = $options->get( MainConfigNames::RawHtml );
		$parser->setHook( 'pre', [ __CLASS__, 'pre' ] );
		$parser->setHook( 'nowiki', [ __CLASS__, 'nowiki' ] );
		$parser->setHook( 'gallery', [ __CLASS__, 'gallery' ] );
		$parser->setHook( 'indicator', [ __CLASS__, 'indicator' ] );
		$parser->setHook( 'langconvert', [ __CLASS__, 'langconvert' ] );
		if ( $rawHtml ) {
			$parser->setHook( 'html', [ __CLASS__, 'html' ] );
		}
	}

	/**
	 * Core parser tag hook function for 'pre'.
	 * Text is treated roughly as 'nowiki' wrapped in an HTML 'pre' tag;
	 * valid HTML attributes are passed on.
	 *
	 * @param ?string $content
	 * @param array $attribs
	 * @param Parser $parser
	 * @return string HTML
	 * @internal
	 */
	public static function pre( ?string $content, array $attribs, Parser $parser ): string {
		// Backwards-compatibility hack
		$content = StringUtils::delimiterReplace( '<nowiki>', '</nowiki>', '$1', $content ?? '', 'i' );

		$attribs = Sanitizer::validateTagAttributes( $attribs, 'pre' );
		// We need to let both '"' and '&' through,
		// for strip markers and entities respectively.
		$content = str_replace(
			[ '>', '<' ],
			[ '&gt;', '&lt;' ],
			$content
		);
		// @phan-suppress-next-line SecurityCheck-XSS Ad-hoc escaping above.
		return Html::rawElement( 'pre', $attribs, $content );
	}

	/**
	 * Core parser tag hook function for 'html', used only when
	 * $wgRawHtml is enabled.
	 *
	 * This is potentially unsafe and should be used only in very careful
	 * circumstances, as the contents are emitted as raw HTML.
	 *
	 * Uses undocumented extended tag hook return values, introduced in r61913.
	 *
	 * @suppress SecurityCheck-XSS
	 * @param ?string $content
	 * @param array $attributes
	 * @param Parser $parser
	 * @return array|string Output of tag hook
	 * @internal
	 */
	public static function html( ?string $content, array $attributes, Parser $parser ) {
		$rawHtml = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::RawHtml );
		if ( $rawHtml ) {
			if ( $parser->getOptions()->getAllowUnsafeRawHtml() ) {
				return [ $content ?? '', 'markerType' => 'nowiki' ];
			} else {
				// In a system message where raw html is
				// not allowed (but it is allowed in other
				// contexts).
				return Html::rawElement(
					'span',
					[ 'class' => 'error' ],
					// Using ->text() not ->parse() as
					// a paranoia measure against a loop.
					$parser->msg( 'rawhtml-notallowed' )->escaped()
				);
			}
		} else {
			throw new UnexpectedValueException( '<html> extension tag encountered unexpectedly' );
		}
	}

	/**
	 * Core parser tag hook function for 'nowiki'. Text within this section
	 * gets interpreted as a string of text with HTML-compatible character
	 * references, and wiki markup within it will not be expanded.
	 *
	 * Uses undocumented extended tag hook return values, introduced in r61913.
	 *
	 * Uses custom html escaping which phan-taint-check won't recognize
	 * hence we suppress the error.
	 * @suppress SecurityCheck-XSS
	 *
	 * @param ?string $content
	 * @param array $attributes
	 * @param Parser $parser
	 * @return array
	 * @internal
	 */
	public static function nowiki( ?string $content, array $attributes, Parser $parser ): array {
		$content = strtr( $content ?? '', [
			// lang converter
			'-{' => '-&#123;',
			'}-' => '&#125;-',
			// html tags
			'<' => '&lt;',
			'>' => '&gt;'
			// Note: Both '"' and '&' are not converted.
			// This allows strip markers and entities through.
		] );
		return [ $content, 'markerType' => 'nowiki' ];
	}

	/**
	 * Core parser tag hook function for 'gallery'.
	 *
	 * Renders a thumbnail list of the given images, with optional captions.
	 * Full syntax documented on the wiki:
	 *
	 *   https://www.mediawiki.org/wiki/Help:Images#Gallery_syntax
	 *
	 * @todo break Parser::renderImageGallery out here too.
	 *
	 * @param ?string $content
	 * @param array $attributes
	 * @param Parser $parser
	 * @return string HTML
	 * @internal
	 */
	public static function gallery( ?string $content, array $attributes, Parser $parser ): string {
		return $parser->renderImageGallery( $content ?? '', $attributes );
	}

	/**
	 * XML-style tag for page status indicators: icons (or short text snippets) usually displayed in
	 * the top-right corner of the page, outside of the main content.
	 *
	 * @param ?string $content
	 * @param array $attributes
	 * @param Parser $parser
	 * @param PPFrame $frame
	 * @return string
	 * @since 1.25
	 * @internal
	 */
	public static function indicator( ?string $content, array $attributes, Parser $parser, PPFrame $frame ): string {
		if ( !isset( $attributes['name'] ) || trim( $attributes['name'] ) === '' ) {
			return '<span class="error">' .
				$parser->msg( 'invalid-indicator-name' )->parse() .
				'</span>';
		}

		$parser->getOutput()->setIndicator(
			trim( $attributes['name'] ),
			Parser::stripOuterParagraph( $parser->recursiveTagParseFully( $content ?? '', $frame ) )
		);

		return '';
	}

	/**
	 * Returns content converted into the requested language variant, using LanguageConverter.
	 *
	 * @param ?string $content
	 * @param array $attributes
	 * @param Parser $parser
	 * @param PPFrame $frame
	 * @return string
	 * @since 1.36
	 * @internal
	 */
	public static function langconvert( ?string $content, array $attributes, Parser $parser, PPFrame $frame ): string {
		if ( isset( $attributes['from'] ) && isset( $attributes['to'] ) ) {
			$fromArg = trim( $attributes['from'] );
			$toArg = trim( $attributes['to'] );
			$fromLangCode = explode( '-', $fromArg )[0];
			if ( $fromLangCode && $fromLangCode === explode( '-', $toArg )[0] ) {
				$lang = MediaWikiServices::getInstance()->getLanguageFactory()
					->getLanguage( $fromLangCode );
				$converter = MediaWikiServices::getInstance()->getLanguageConverterFactory()
					->getLanguageConverter( $lang );

				# ensure that variants are available,
				# and the variants are valid BCP 47 codes
				if ( $converter->hasVariants()
					&& strcasecmp( $fromArg, LanguageCode::bcp47( $fromArg ) ) === 0
					&& strcasecmp( $toArg, LanguageCode::bcp47( $toArg ) ) === 0
				) {
					$toVariant = $converter->validateVariant( $toArg );

					if ( $toVariant ) {
						return $converter->autoConvert(
							$parser->recursiveTagParse( $content ?? '', $frame ),
							$toVariant
						);
					}
				}
			}
		}

		return Html::rawElement(
			'span',
			[ 'class' => 'error' ],
			$parser->msg( 'invalid-langconvert-attrs' )->parse()
		);
	}

}
RemexRemoveTagHandler.php000066600000012330151335016340011451 0ustar00<?php

namespace MediaWiki\Parser;

use Wikimedia\RemexHtml\Tokenizer\Attributes;
use Wikimedia\RemexHtml\Tokenizer\PlainAttributes;
use Wikimedia\RemexHtml\Tokenizer\RelayTokenHandler;
use Wikimedia\RemexHtml\Tokenizer\TokenHandler;

/**
 * Helper class for Sanitizer::removeSomeTags().
 * @internal
 */
class RemexRemoveTagHandler extends RelayTokenHandler {
	/**
	 * @var string The original HTML source string (used for fallback text
	 * when rejecting an HTML tag).
	 */
	private $source;

	/**
	 * @var array<string,true> Set of HTML tags which can be self-closed.
	 */
	private $htmlsingle;

	/**
	 * @var array<string,true> Self-closed tags which are on $htmlsingle
	 * but not on $htmlsingleonly will be emitted as an empty element.
	 */
	private $htmlsingleonly;

	/**
	 * @var array<string,true> Set of allowed HTML open/close tags.
	 */
	private $htmlelements;

	/**
	 * @var ?callable(Attributes,mixed...):Attributes Callback to mutate or
	 * sanitize attributes.
	 */
	private $attrCallback;

	/**
	 * @var ?array $args Optional extra arguments to provide to the
	 * $attrCallback.
	 */
	private $callbackArgs;

	/**
	 * @param TokenHandler $nextHandler Handler to relay accepted tokens.
	 * @param string $source Input source string.
	 * @param array $tagData Information about allowed/rejected tags.
	 * @param ?callable $attrCallback Attribute handler callback.
	 *   The full signature is ?callable(Attributes,mixed...):Attributes
	 * @param ?array $callbackArgs Optional arguments to attribute handler.
	 */
	public function __construct(
		TokenHandler $nextHandler,
		string $source,
		array $tagData,
		?callable $attrCallback,
		?array $callbackArgs
	) {
		parent::__construct( $nextHandler );
		$this->source = $source;
		$this->htmlsingle = $tagData['htmlsingle'];
		$this->htmlsingleonly = $tagData['htmlsingleonly'];
		$this->htmlelements = $tagData['htmlelements'];
		$this->attrCallback = $attrCallback;
		$this->callbackArgs = $callbackArgs ?? [];
	}

	/**
	 * @inheritDoc
	 */
	public function comment( $text, $sourceStart, $sourceLength ) {
		// Don't relay comments.
	}

	/**
	 * Takes attribute names and values for a tag and the tag name and
	 * validates that the tag is allowed to be present.
	 * This DOES NOT validate the attributes, nor does it validate the
	 * tags themselves. This method only handles the special circumstances
	 * where we may want to allow a tag within content but ONLY when it has
	 * specific attributes set.
	 *
	 * @param string $element
	 * @param Attributes $attrs
	 * @return bool
	 *
	 * @see Sanitizer::validateTag()
	 */
	private static function validateTag( string $element, Attributes $attrs ): bool {
		if ( $element == 'meta' || $element == 'link' ) {
			$params = $attrs->getValues();
			if ( !isset( $params['itemprop'] ) ) {
				// <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
				return false;
			}
			if ( $element == 'meta' && !isset( $params['content'] ) ) {
				// <meta> must have a content="" for the itemprop
				return false;
			}
			if ( $element == 'link' && !isset( $params['href'] ) ) {
				// <link> must have an associated href=""
				return false;
			}
		}

		return true;
	}

	/**
	 * @inheritDoc
	 */
	public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
		// Handle a start tag from the tokenizer: either relay it to the
		// next stage, or re-emit it as raw text.

		$badtag = false;
		$t = strtolower( $name );
		if ( isset( $this->htmlelements[$t] ) ) {
			if ( $this->attrCallback ) {
				$attrs = ( $this->attrCallback )( $attrs, ...$this->callbackArgs );
			}
			if ( $selfClose && !( isset( $this->htmlsingle[$t] ) || isset( $this->htmlsingleonly[$t] ) ) ) {
				// Remove the self-closing slash, to be consistent with
				// HTML5 semantics. T134423
				$selfClose = false;
			}
			if ( !self::validateTag( $t, $attrs ) ) {
				$badtag = true;
			}
			$fixedAttrs = Sanitizer::validateTagAttributes( $attrs->getValues(), $t );
			$attrs = new PlainAttributes( $fixedAttrs );
			if ( !$badtag ) {
				if ( $selfClose && !isset( $this->htmlsingleonly[$t] ) ) {
					// Interpret self-closing tags as empty tags even when
					// HTML5 would interpret them as start tags.  Such input
					// is commonly seen on Wikimedia wikis with this intention.
					$this->nextHandler->startTag( $name, $attrs, false, $sourceStart, $sourceLength );
					$this->nextHandler->endTag( $name, $sourceStart + $sourceLength, 0 );
				} else {
					$this->nextHandler->startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
				}
				return;
			}
		}
		// Emit this as a text node instead.
		$this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
	}

	/**
	 * @inheritDoc
	 */
	public function endTag( $name, $sourceStart, $sourceLength ) {
		// Handle an end tag from the tokenizer: either relay it to the
		// next stage, or re-emit it as raw text.

		$t = strtolower( $name );
		if ( isset( $this->htmlelements[$t] ) ) {
			// This is a good tag, relay it.
			$this->nextHandler->endTag( $name, $sourceStart, $sourceLength );
		} else {
			// Emit this as a text node instead.
			$this->nextHandler->characters( $this->source, $sourceStart, $sourceLength, $sourceStart, $sourceLength );
		}
	}

}
RemexStripTagHandler.php000066600000006453151335016340011326 0ustar00<?php

namespace MediaWiki\Parser;

use Wikimedia\RemexHtml\Tokenizer\Attributes;
use Wikimedia\RemexHtml\Tokenizer\NullTokenHandler;

/**
 * Helper class for Sanitizer::stripAllTags().
 * @internal
 */
class RemexStripTagHandler extends NullTokenHandler {
	private $insideNonVisibleTag = false;
	private $text = '';

	public function getResult() {
		return $this->text;
	}

	public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
		if ( !$this->insideNonVisibleTag ) {
			$this->text .= substr( $text, $start, $length );
		}
	}

	public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
		if ( $this->isNonVisibleTag( $name ) ) {
			$this->insideNonVisibleTag = true;
		}
		// Inject whitespace for typical block-level tags to
		// prevent merging unrelated<br>words.
		if ( $this->isBlockLevelTag( $name ) ) {
			$this->text .= ' ';
		}
	}

	public function endTag( $name, $sourceStart, $sourceLength ) {
		if ( $this->isNonVisibleTag( $name ) ) {
			$this->insideNonVisibleTag = false;
		}
		// Inject whitespace for typical block-level tags to
		// prevent merging unrelated<br>words.
		if ( $this->isBlockLevelTag( $name ) ) {
			$this->text .= ' ';
		}
	}

	// Per https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
	// retrieved on sept 12, 2018. <br> is not block level but was added anyways.
	// The following is a complete list of all HTML block level elements
	// (although "block-level" is not technically defined for elements that are
	// new in HTML5).
	// Structured as tag => true to allow O(1) membership test.
	private const BLOCK_LEVEL_TAGS = [
		'address' => true,
		'article' => true,
		'aside' => true,
		'blockquote' => true,
		'br' => true,
		'canvas' => true,
		'dd' => true,
		'div' => true,
		'dl' => true,
		'dt' => true,
		'fieldset' => true,
		'figcaption' => true,
		'figure' => true,
		'footer' => true,
		'form' => true,
		'h1' => true,
		'h2' => true,
		'h3' => true,
		'h4' => true,
		'h5' => true,
		'h6' => true,
		'header' => true,
		'hgroup' => true,
		'hr' => true,
		'li' => true,
		'main' => true,
		'nav' => true,
		'noscript' => true,
		'ol' => true,
		'output' => true,
		'p' => true,
		'pre' => true,
		'section' => true,
		'table' => true,
		'td' => true,
		'tfoot' => true,
		'th' => true,
		'tr' => true,
		'ul' => true,
		'video' => true,
	];

	/**
	 * Detect block level tags. Of course css can make anything a block
	 * level tag, but this is still better than nothing.
	 *
	 * @param string $tagName HTML tag name
	 * @return bool True when tag is an html block level element
	 */
	private function isBlockLevelTag( $tagName ) {
		$key = strtolower( trim( $tagName ) );
		return isset( self::BLOCK_LEVEL_TAGS[$key] );
	}

	private const NON_VISIBLE_TAGS = [
		'style' => true,
		'script' => true,
	];

	/**
	 * Detect block tags which by default are non-visible items.
	 * Of course css can make anything non-visible,
	 * but this is still better than nothing.
	 *
	 * We use this primarily to hide TemplateStyles
	 * from output in notifications/emails etc.
	 *
	 * @param string $tagName HTML tag name
	 * @return bool True when tag is a html element which should be filtered out
	 */
	private function isNonVisibleTag( $tagName ) {
		$key = strtolower( trim( $tagName ) );
		return isset( self::NON_VISIBLE_TAGS[$key] );
	}

}
preTags-standalone-knownFailures.json000066600000004305151335100510014017 0ustar00{
    "<nowiki> inside <pre> (T15238)": {
        "html2html": "<pre data-parsoid='{\"dsr\":[0,15,1,0]}'><span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&amp;lt;\",\"srcContent\":\"&lt;\",\"dsr\":[1,5,null,null]}'>&lt;</span>nowiki<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&amp;gt;\",\"srcContent\":\">\",\"dsr\":[11,15,null,null]}'>></span></pre>\n<p data-parsoid='{\"dsr\":[16,18,0,0]}'>\n <br data-parsoid='{\"dsr\":[18,18,0,0]}'/></p>\n\n<pre data-parsoid='{\"dsr\":[20,53,1,0]}'><span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&amp;lt;\",\"srcContent\":\"&lt;\",\"dsr\":[21,25,null,null]}'>&lt;</span>nowiki<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&amp;gt;\",\"srcContent\":\">\",\"dsr\":[31,35,null,null]}'>></span>Foo<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&amp;lt;\",\"srcContent\":\"&lt;\",\"dsr\":[38,42,null,null]}'>&lt;</span>/nowiki<span typeof=\"mw:Entity\" data-parsoid='{\"src\":\"&amp;gt;\",\"srcContent\":\">\",\"dsr\":[49,53,null,null]}'>></span></pre>\n",
        "html2wt": " &lt;nowiki&gt;\n\n \n\n &lt;nowiki&gt;Foo&lt;/nowiki&gt;\n"
    },
    "<pre> with attributes (T5202)": {
        "html2html": "<pre data-parsoid='{\"dsr\":[0,24,1,0]}'>Bluescreen of WikiDeath</pre>\n",
        "html2wt": " Bluescreen of WikiDeath\n"
    },
    "<pre> with forbidden attribute (T5202)": {
        "html2html": "<pre data-parsoid='{\"dsr\":[0,22,1,0]}'>Narrow screen goodies</pre>\n",
        "html2wt": " Narrow screen goodies\n"
    },
    "<pre> with forbidden attribute values (T5202)": {
        "html2html": "<pre data-parsoid='{\"dsr\":[0,22,1,0]}'>Narrow screen goodies</pre>\n",
        "html2wt": " Narrow screen goodies\n"
    },
    "<pre> with width attribute (T5202)": {
        "html2html": "<pre data-parsoid='{\"dsr\":[0,22,1,0]}'>Narrow screen goodies</pre>\n",
        "html2wt": " Narrow screen goodies\n"
    },
    "Entities inside <pre>": {
        "html2wt": " <\n"
    },
    "HTML pre followed by indent-pre": {
        "html2wt": " foo\n\n bar\n"
    },
    "HTML-pre: 2: indented text": {
        "html2wt": "  foo\n"
    },
    "Less than in attribute position": {
        "selser [0,2,[2]]": "<pre <pre>123</pre>6njtnz\n\n\n\n<div <div>zghs00123</div>"
    }
}
preTags.txt000066600000020430151335100510006707 0ustar00# The parsoid-compatible option below is only relevant when we are running
# parser tests in integrated mode with Parsoid. This option is ignored
# when this test file is run with Parsoid in standalone mode.
!! options
parsoid-compatible=wt2html,wt2wt
version=2
!! end

!! article
Template:1x
!! text
{{{1}}}
!! endarticle

!! test
<pre> with attributes (T5202)
!! wikitext
<pre style="background: blue; color:white">Bluescreen of WikiDeath</pre>
!! html
<pre style="background: blue; color:white">Bluescreen of WikiDeath</pre>
!! end

!! test
<pre> with width attribute (T5202)
!! wikitext
<pre width="8">Narrow screen goodies</pre>
!! html
<pre width="8">Narrow screen goodies</pre>
!! end

!! test
<pre> with forbidden attribute (T5202)
!! wikitext
<pre width="8" onmouseover="alert(document.cookie)">Narrow screen goodies</pre>
!! html
<pre width="8">Narrow screen goodies</pre>
!! end

!! test
Entities inside <pre>
!! wikitext
<pre>&lt;</pre>
!! html
<pre>&lt;</pre>
!! end

!! test
<pre> with forbidden attribute values (T5202)
!! wikitext
<pre width="8" style="border-width: expression(alert(document.cookie))">Narrow screen goodies</pre>
!! html
<pre width="8" style="/* insecure input */">Narrow screen goodies</pre>
!! end

!! test
<nowiki> inside <pre> (T15238)
!! wikitext
<pre>
<nowiki>
</pre>
<pre>
<nowiki></nowiki>
</pre>
<pre><nowiki><nowiki></nowiki>Foo<nowiki></nowiki></nowiki></pre>
!! html
<pre>&lt;nowiki&gt;
</pre>
<pre>

</pre>
<pre>&lt;nowiki&gt;Foo&lt;/nowiki&gt;</pre>
!! end

!! test
<nowiki> inside of #tag:pre
!! wikitext
{{#tag:pre|Foo <nowiki>&rarr;bar</nowiki>}}
!! html/php
<pre>Foo &#8594;bar</pre>
!! html/parsoid+standalone
<pre about="#mwt1" typeof="mw:Transclusion" data-parsoid='{"pi":[[{"k":"1"}]]}' data-mw='{"parts":[{"template":{"target":{"wt":"#tag:pre","function":"tag"},"params":{"1":{"wt":"Foo &lt;nowiki>&amp;rarr;bar&lt;/nowiki>"}},"i":0}}]}'>Foo <span typeof="mw:Entity">→</span>bar</pre>
!! html/parsoid+integrated
<pre about="#mwt1" typeof="mw:Extension/pre mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#tag:pre","function":"tag"},"params":{"1":{"wt":"Foo &lt;nowiki>&amp;rarr;bar&lt;/nowiki>"}},"i":0}}]}'>Foo →bar</pre>
!! end

## Don't expect this to rt, Parsoid drops the unmatched closing pre tags that
## aren't enclosed in nowikis.
!! test
<nowiki> and <pre> preference (first one wins)
!! options
parsoid=wt2html
!! wikitext
<pre>
<nowiki>
</pre>
</nowiki>
</pre>

<nowiki>
<pre>
<nowiki>
</pre>
</nowiki>
</pre>

!! html/php
<pre>&lt;nowiki&gt;
</pre>
<p>&lt;/nowiki&gt;
</p>

<p>
&lt;pre&gt;
&lt;nowiki&gt;
&lt;/pre&gt;

</p>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n&lt;nowiki>\n"}}'>&lt;nowiki>
</pre>
<p>&lt;/nowiki></p>


<p><span typeof="mw:Nowiki">
&lt;pre>
&lt;nowiki>
&lt;/pre>
</span></p>
!! end

!! test
</pre> inside nowiki
!! wikitext
<nowiki></pre></nowiki>
!! html
<p>&lt;/pre&gt;
</p>
!! end

!! test
Empty pre; pre inside other HTML tags (T56946)
!! wikitext
a

<div><pre>
foo
</pre></div>
<pre></pre>
!! html/php
<p>a
</p>
<div><pre>foo
</pre></div>
<pre></pre>
!! html/parsoid
<p>a</p>

<div data-parsoid='{"stx":"html"}'><pre typeof="mw:Extension/pre" about="#mwt2" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\nfoo\n"}}'>foo
</pre></div>
<pre typeof="mw:Extension/pre" about="#mwt4" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":""}}'></pre>
!! end

!! test
HTML pre followed by indent-pre
!! wikitext
<pre>foo</pre>
 bar
!! html
<pre>foo</pre>
<pre>bar
</pre>
!! end

!! test
Block tag pre
!! wikitext
<p><pre>foo</pre></p>
!! html/php
<p class="mw-empty-elt"></p><pre>foo</pre><p class="mw-empty-elt"></p>
!! html/parsoid
<p class='mw-empty-elt' data-parsoid='{"stx":"html","autoInsertedEnd":true}'></p><pre typeof="mw:Extension/pre" about="#mwt2" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"foo"}}'>foo</pre><p class='mw-empty-elt' data-parsoid='{"autoInsertedStart":true,"stx":"html"}'></p>
!! end

## Hmm, should Parsoid rt this?
!! test
Pres with newline attributes
!! options
parsoid=wt2html,html2html
!! wikitext
<pre class="one
two">hi</pre>
!! html/php
<pre class="one two">hi</pre>
!! html/parsoid
<pre class="one two" typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{"class":"one two"},"body":{"extsrc":"hi"}}'>hi</pre>
!! end

!! test
Things that look like <pre> tags aren't treated as such
!! wikitext
Barack Obama <President> of the United States
<President></President>
!! html
<p>Barack Obama &lt;President&gt; of the United States
&lt;President&gt;&lt;/President&gt;
</p>
!! end

## Remex doesn't account for fostered content.
## The difference between Parsoid and the PHP parser can be attributed to core
## commit 674e8388cba and 710618f89af in Parsoid's repo.  Parsoid doesn't
## tokenize unmatched extension tags that shadow html tags as strings to ease
## an escaping mechanism.  See the comment in `maybeExtensionTag`.
!! test
Handle broken pre-like tags (T66025)
!! options
parsoid=wt2html
!! wikitext
{{1x|<pre <pre>x</pre>}}

<table><pre </table>
!! html/php
<pre>x</pre>
&lt;pre <table></table>
!! html/parsoid
<pre typeof="mw:Extension/pre mw:Transclusion" about="#mwt2" data-parsoid='{"stx":"html","a":{"&lt;pre":null},"sa":{"&lt;pre":""},"pi":[[{"k":"1"}]]}' data-mw='{"parts":[{"template":{"target":{"wt":"1x","href":"./Template:1x"},"params":{"1":{"wt":"&lt;pre &lt;pre>x&lt;/pre>"}},"i":0}}]}'>x</pre>

<pre data-parsoid='{"stx":"html","src":"&lt;pre &lt;/table>","tagWidths":[13,0],"a":{"&lt;":null,"table":null},"sa":{"&lt;":"","table":""},"fostered":true,"autoInsertedEnd":true}'></pre><table data-parsoid='{"stx":"html","autoInsertedEnd":true}'></table>
!! end

## Similar to the above, but shows the difference between extension and html tags
!! test
Less than in attribute position
!! wikitext
<pre <pre>123</pre>

<div <div>123</div>
!! html/php
<pre>123</pre><p>
&lt;div </p><div>123</div>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{"&lt;pre":""},"body":{"extsrc":"123"}}'>123</pre><p>

&lt;div </p><div>123</div>
!! end

!! test
Parsoid: handle pre with space after attribute
!! options
parsoid=wt2html
!! wikitext
<pre style="width:50%;" >{{1x|foo}}</pre>
!! html/php
<pre style="width:50%;">{{1x|foo}}</pre>
!! html/parsoid
<pre style="width:50%;" typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{"style":"width:50%;"},"body":{"extsrc":"{{1x|foo}}"}}'>{{1x|foo}}</pre>
!! end

!! test
Self-closed pre
!! wikitext
<pre />
!! html/php
<pre></pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{}}'></pre>
!! end

###
### HTML-pre (some to spec PHP parser behavior and some Parsoid-RT-centric)
###

!!test
HTML-pre: 1. embedded newlines
!! wikitext
<pre>foo</pre>

<pre>
foo
</pre>

<pre>

foo
</pre>

<pre>


foo
</pre>
!! html/php
<pre>foo</pre>
<pre>foo
</pre>
<pre>

foo
</pre>
<pre>


foo
</pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"foo"}}'>foo</pre>

<pre typeof="mw:Extension/pre" about="#mwt4" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\nfoo\n"}}'>foo
</pre>

<pre typeof="mw:Extension/pre" about="#mwt6" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n\nfoo\n"}}'>

foo
</pre>

<pre typeof="mw:Extension/pre" about="#mwt8" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n\n\nfoo\n"}}'>


foo
</pre>
!!end

!! test
HTML-pre: big spaces
!! wikitext
<pre>




haha




haha




</pre>
!! html/php
<pre>




haha




haha




</pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-parsoid='{"stx":"html"}' data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n\n\n\n\nhaha\n\n\n\n\nhaha\n\n\n\n\n"}}'>




haha




haha




</pre>
!! end

!!test
HTML-pre: 2: indented text
!! wikitext
<pre>
 foo
</pre>
!! html
<pre> foo
</pre>
!!end

!!test
HTML-pre: 3: other wikitext
!! wikitext
<pre>
* foo
# bar
= no-h =
'' no-italic ''
[[ NoLink ]]
</pre>
!! html/php
<pre>* foo
# bar
= no-h =
'' no-italic ''
[[ NoLink ]]
</pre>
!! html/parsoid
<pre typeof="mw:Extension/pre" about="#mwt2" data-mw='{"name":"pre","attrs":{},"body":{"extsrc":"\n* foo\n# bar\n= no-h =\n&#39;&#39; no-italic &#39;&#39;\n[[ NoLink ]]\n"}}'>* foo
# bar
= no-h =
'' no-italic ''
[[ NoLink ]]
</pre>
!!end
redirects.txt000066600000017577151335100510007310 0ustar00# The parsoid-compatible option below is only relevant when we are running
# parser tests in integrated mode with Parsoid. This option is ignored
# when this test file is run with Parsoid in standalone mode.
!! options
parsoid-compatible=wt2html,wt2wt
version=2
!! end

!! article
Main Page
!! text
blah blah
!! endarticle

!! article
Template:1x
!! text
{{{1}}}
!! endarticle

### Redirects, Parsoid-only

!! test
1. Simple redirect to page
!! wikitext
#REDIRECT [[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end

!! test
2. Other redirect variants
!! wikitext
#REDIRECT [[Main_Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page" data-parsoid='{"src":"#REDIRECT ","a":{"href":"./Main_Page"},"sa":{"href":"Main_Page"}}'/>
!! end

# Not a valid redirect in PHP (although perhaps it was, once upon a time)
# This tests the Parsoid bail-out code.
!! test
3. Other redirect variants
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[<nowiki>[[Bar]]</nowiki>]]
!! html/parsoid
<ol><li>REDIRECT [[<span typeof="mw:Nowiki">[[Bar]]</span>]]</li></ol>
!! end

!! test
4. Redirect to a templated destination
!! wikitext
#REDIRECT [[{{1x|Foo}}bar]]
!! html/parsoid
<link about="#mwt2" typeof="mw:ExpandedAttrs" rel="mw:PageProp/redirect" href="./Foobar" data-parsoid='{"a":{"href":"./Foobar"},"sa":{"href":"{{1x|Foo}}bar"}}' data-mw='{"attribs":[[{"txt":"href"},{"html":"&lt;span about=\"#mwt1\" typeof=\"mw:Transclusion\" data-parsoid=&#39;{\"pi\":[[{\"k\":\"1\"}]]}&#39; data-mw=&#39;{\"parts\":[{\"template\":{\"target\":{\"wt\":\"1x\",\"href\":\"./Template:1x\"},\"params\":{\"1\":{\"wt\":\"Foo\"}},\"i\":0}}]}&#39;>Foo&lt;/span>bar"}]]}'/>
!! end

!! test
Empty redirect
!! options
parsoid=wt2html,wt2wt
!! wikitext
#REDIRECT [[]]
!! html/parsoid
<ol>
<li>REDIRECT [[]]</li></ol>
!! end

!! test
Optional colon in #REDIRECT
!! options
# the colon is archaic syntax.  we support it for wt2html, but we
# don't care that it roundtrips back to the modern syntax.
parsoid=wt2html,html2html
!! wikitext
#REDIRECT:[[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end

!! test
Whitespace in #REDIRECT with optional colon
!! options
# the colon and gratuitous whitespace is archaic syntax.  we support
# it for wt2html, but we don't care that it roundtrips back to the
# modern syntax (without extra whitespace)
parsoid=wt2html,html2html
!! wikitext
 
 #REDIRECT 
: 
[[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end

!! test
Piped link in #REDIRECT
!! options
# content after piped link is ignored.  we support this syntax,
# but don't care that the piped link is lost when we roundtrip this.
parsoid=wt2html
!! wikitext
#REDIRECT [[Main Page|bar]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
!! end

!! test
Redirect to category (T104502)
!! options
parsoid=wt2html,wt2wt
!! wikitext
#REDIRECT [[Category:Foo]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Category:Foo"/>
!! end

!! test
Redirect to category with URL encoding (T104502)
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[Category%3AFoo]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Category:Foo"/>
!! end

!! test
Redirect to category page
!! wikitext
#REDIRECT [[:Category:Foo]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Category:Foo"/>
!! end

!! test
Redirect to image page (1)
!! wikitext
#REDIRECT [[File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./File:Wiki.png"/>
!! end

!! test
Redirect to image page (2)
!! wikitext
#REDIRECT [[Image:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./File:Wiki.png"  data-parsoid='{"src":"#REDIRECT ","a":{"href":"./File:Wiki.png"},"sa":{"href":"Image:Wiki.png"}}'/>
!! end

# html2wt disabled because wts serializes as "#REDIRECT [[:en:File:Wiki.png]]"
# Next test confirms this.
!! test
Redirect to language (1) (T104918)
!! options
parsoid=wt2html,wt2wt,html2html
!! wikitext
#REDIRECT [[en:File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="//en.wikipedia.org/wiki/File:Wiki.png"/>
!! end

!! test
Redirect to language (2) (T104918)
!! wikitext
#REDIRECT [[:en:File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="//en.wikipedia.org/wiki/File:Wiki.png"/>
!! end

!! test
Redirect to interwiki (T104918)
!! wikitext
#REDIRECT [[meatball:File:Wiki.png]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="http://www.usemod.com/cgi-bin/mb.pl?File:Wiki.png"/>
!! end

!! test
Non-English #REDIRECT
!! options
language=is
!! wikitext
#TILVÍSUN [[Main Page]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page" data-parsoid='{"src":"#TILVÍSUN ","a":{"href":"./Main_Page"},"sa":{"href":"Main Page"}}'/>
!! end

!! test
Redirect syntax under text isn't considered a redirect
!! wikitext
some text

#redirect [[Main Page]]
!! html/parsoid
<p>some text</p>

<ol data-parsoid='{}'><li data-parsoid='{}'>redirect <a rel="mw:WikiLink" href="./Main_Page" title="Main Page" data-parsoid='{"stx":"simple","a":{"href":"./Main_Page"},"sa":{"href":"Main Page"}}'>Main Page</a></li></ol>
!! end

!! test
New redirect
!! options
parsoid=html2wt
!! html/parsoid
<p>Foo<link rel="mw:PageProp/redirect" href="./Foo"/></p>
!! wikitext
#REDIRECT [[Foo]]
Foo
!! end

## WikiContent::getRedirectTargetAndText() strips the redirect from text so,
## in practice, the legacy parse never sees it from the source.  Hence, the
## the differnce in output.
!! test
Redirect followed by block on the same line
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[Main Page]]<!-- haha -->==hi==
!! html/php
<ol><li>REDIRECT <a href="/wiki/Main_Page" title="Main Page">Main Page</a>==hi==</li></ol>
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/><!-- haha --><h2 id="hi">hi</h2>
!! end

## WikiContent::getRedirectTargetAndText() strips the redirect from text so,
## in practice, the legacy parse never sees it from the source.  Hence, the
## the differnce in output.
!! test
Redirect followed by horizontal rule on the same line
!! options
parsoid=wt2html
!! wikitext
#REDIRECT [[Main Page]]----
!! html/php
<ol><li>REDIRECT <a href="/wiki/Main_Page" title="Main Page">Main Page</a>----</li></ol>
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/><hr/>
!! end

!! test
Redirect followed by a newline
!! wikitext
#REDIRECT [[Main Page]]
A newline
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>
<p>A newline</p>
!! end

!! test
Redirect followed by multiple newlines
!! wikitext
#REDIRECT [[Main Page]]


A newline
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Main_Page"/>

<p><br/>
A newline</p>
!! end

!! test
Drop duplicate redirects
!! options
parsoid=html2wt
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Foo"/>
<link rel="mw:PageProp/redirect" href="./Bar"/>
<link rel="mw:PageProp/redirect" href="./Baz"/>
!! wikitext
#REDIRECT [[Foo]]
!! end

!! test
Redirect containing double quotes and spaces
!! wikitext
#REDIRECT [[Cool "Gator"]]
!! html/parsoid
<link rel="mw:PageProp/redirect" href="./Cool_%22Gator%22" data-parsoid='{"src":"#REDIRECT ","a":{"href":"./Cool_%22Gator%22"},"sa":{"href":"Cool \"Gator\""}}'/>
!! end

!! test
Broken redirect shouldn't crash parsoid (T332242)
!! wikitext
<!-- Not a valid redirect nor a valid parser function -->
{{#REDIRECT [[]]}}
!! html/php
<p>{{#REDIRECT [[]]}}
</p>
!! html/parsoid+integrated
<!-- Not a valid redirect nor a valid parser function -->
<p><span typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#REDIRECT [[]]","function":"REDIRECT [[]]"},"params":{},"i":0}}]}'>{{#REDIRECT [[]]}}</span></p>
!! html/parsoid+standalone
<!-- Not a valid redirect nor a valid parser function -->
<p><span typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"#REDIRECT [[]]","function":"REDIRECT [[]]"},"params":{},"i":0}}]}'>Parser function implementation for pf_REDIRECT [[]] missing in Parsoid.</span></p>
!! end
preTags-knownFailures.json000066600000000003151335100510011660 0ustar00{}
redirects-knownFailures.json000066600000000003151335100510012237 0ustar00{}
redirects-standalone-knownFailures.json000066600000000210151335100510014365 0ustar00{
    "Redirect followed by multiple newlines": {
        "selser [0,2,0]": "#REDIRECT [[Main Page]]\n1kgt7nw\n\n\n\nA newline"
    }
}
AnsiTermColorer.php000066600000002615151335103710010334 0ustar00<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

namespace MediaWiki\Tests;

/**
 * Terminal that supports ANSI escape sequences.
 *
 * @ingroup Testing
 */
class AnsiTermColorer {
	/**
	 * Return ANSI terminal escape code for changing text attribs/color
	 *
	 * @param string|int $color Semicolon-separated list of attribute/color codes
	 * @return string
	 */
	public function color( $color ) {
		global $wgCommandLineDarkBg;

		$light = $wgCommandLineDarkBg ? "1;" : "0;";

		return "\x1b[{$light}{$color}m";
	}

	/**
	 * Return ANSI terminal escape code for restoring default text attributes
	 *
	 * @return string
	 */
	public function reset() {
		return $this->color( '0' );
	}
}
DummyTermColorer.php000066600000002011151335103710010523 0ustar00<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

namespace MediaWiki\Tests;

/**
 * A colour-less terminal, drop-in replacement for AnsiTermColorer.
 *
 * @ingroup Testing
 */
class DummyTermColorer {
	public function color( $color ) {
		return '';
	}

	public function reset() {
		return '';
	}
}
TagHooksTest.php000066600000003420151335113150007635 0ustar00<?php

use MediaWiki\Title\Title;
use MediaWiki\User\User;

/**
 * @group Database
 * @group Parser
 *
 * @covers Parser
 * @covers BlockLevelPass
 * @covers StripState
 *
 * @covers Preprocessor_Hash
 * @covers PPDStack_Hash
 * @covers PPDStackElement_Hash
 * @covers PPDPart_Hash
 * @covers PPFrame_Hash
 * @covers PPTemplateFrame_Hash
 * @covers PPCustomFrame_Hash
 * @covers PPNode_Hash_Tree
 * @covers PPNode_Hash_Text
 * @covers PPNode_Hash_Array
 * @covers PPNode_Hash_Attr
 */
class TagHooksTest extends MediaWikiIntegrationTestCase {
	public static function provideValidNames() {
		return [
			[ 'foo' ],
			[ 'foo-bar' ],
			[ 'foo_bar' ],
			[ 'FOO-BAR' ],
			[ 'foo bar' ]
		];
	}

	public static function provideBadNames() {
		return [ [ "foo<bar" ], [ "foo>bar" ], [ "foo\nbar" ], [ "foo\rbar" ] ];
	}

	private function getParserOptions() {
		$popt = ParserOptions::newFromUserAndLang( new User,
			$this->getServiceContainer()->getContentLanguage() );
		return $popt;
	}

	/**
	 * @dataProvider provideValidNames
	 */
	public function testTagHooks( $tag ) {
		$parser = $this->getServiceContainer()->getParserFactory()->create();

		$parser->setHook( $tag, [ $this, 'tagCallback' ] );
		$parserOutput = $parser->parse(
			"Foo<$tag>Bar</$tag>Baz",
			Title::makeTitle( NS_MAIN, 'Test' ),
			$this->getParserOptions()
		);
		$this->assertEquals( "<p>FooOneBaz\n</p>", $parserOutput->getText( [ 'unwrap' => true ] ) );
	}

	/**
	 * @dataProvider provideBadNames
	 */
	public function testBadTagHooks( $tag ) {
		$parser = $this->getServiceContainer()->getParserFactory()->create();

		$this->expectException( InvalidArgumentException::class );
		$parser->setHook( $tag, [ $this, 'tagCallback' ] );
	}

	public function tagCallback( $text, $params, $parser ) {
		return str_rot13( $text );
	}
}
Back to Directory File Manager