blob: b8d282d7db5925dbf7e6773a0e8ac29fe59b632c [file] [log] [blame]
<?php
/**
* Cache for outputs of the PHP parser
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Cache Parser
*/
use MediaWiki\HookContainer\HookContainer;
use MediaWiki\HookContainer\HookRunner;
use MediaWiki\Json\JsonCodec;
use MediaWiki\Page\PageRecord;
use MediaWiki\Page\WikiPageFactory;
use MediaWiki\Parser\ParserCacheFilter;
use MediaWiki\Parser\ParserCacheMetadata;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Title\TitleFactory;
use Psr\Log\LoggerInterface;
use Wikimedia\ObjectCache\BagOStuff;
use Wikimedia\ObjectCache\EmptyBagOStuff;
use Wikimedia\ObjectCache\HashBagOStuff;
use Wikimedia\Stats\StatsFactory;
use Wikimedia\UUID\GlobalIdGenerator;
/**
* Cache for ParserOutput objects corresponding to the latest page revisions.
*
* The ParserCache is a two-tiered cache backed by BagOStuff which supports
* varying the stored content on the values of ParserOptions used during
* a page parse.
*
* First tier is keyed by the page ID and stores ParserCacheMetadata, which
* contains information about cache expiration and the list of ParserOptions
* used during the parse of the page. For example, if only 'dateformat' and
* 'userlang' options were accessed by the parser when producing output for the
* page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
* This means none of the other existing options had any effect on the output.
*
* The second tier of the cache contains ParserOutput objects. The key for the
* second tier is constructed from the page ID and values of those ParserOptions
* used during a page parse which affected the output. Upon cache lookup, the list
* of used option names is retrieved from tier 1 cache, and only the values of
* those options are hashed together with the page ID to produce a key, while
* the rest of the options are ignored. Following the example above where
* only [ 'dateformat', 'userlang' ] options changed the parser output for a
* page, the key will look like 'page_id!dateformat=default:userlang=ru'.
* Thus any cache lookup with dateformat=default and userlang=ru will hit the
* same cache entry regardless of the values of the rest of the options, since they
* were not accessed during a parse and thus did not change the output.
*
* @see ParserOutput::recordOption()
* @see ParserOutput::getUsedOptions()
* @see ParserOptions::allCacheVaryingOptions()
* @ingroup Cache Parser
*/
class ParserCache {
/**
* Constants for self::getKey()
* @since 1.30
* @since 1.36 the constants were made public
*/
/** Use only current data */
public const USE_CURRENT_ONLY = 0;
/** Use expired data if current data is unavailable */
public const USE_EXPIRED = 1;
/** Use expired data or data from different revisions if current data is unavailable */
public const USE_OUTDATED = 2;
/**
* Use expired data and data from different revisions, and if all else
* fails vary on all variable options
*/
private const USE_ANYTHING = 3;
/** @var string The name of this ParserCache. Used as a root of the cache key. */
private $name;
/** @var BagOStuff */
private $cache;
/**
* Anything cached prior to this is invalidated
*
* @var string
*/
private $cacheEpoch;
/** @var HookRunner */
private $hookRunner;
/** @var JsonCodec */
private $jsonCodec;
/** @var StatsFactory */
private $stats;
/** @var LoggerInterface */
private $logger;
/** @var TitleFactory */
private $titleFactory;
/** @var WikiPageFactory */
private $wikiPageFactory;
private ?ParserCacheFilter $filter = null;
private GlobalIdGenerator $globalIdGenerator;
/**
* @var BagOStuff small in-process cache to store metadata.
* It's needed multiple times during the request, for example
* to build a PoolWorkArticleView key, and then to fetch the
* actual ParserCache entry.
*/
private $metadataProcCache;
/**
* Setup a cache pathway with a given back-end storage mechanism.
*
* This class use an invalidation strategy that is compatible with
* MultiWriteBagOStuff in async replication mode.
*
* @param string $name
* @param BagOStuff $cache
* @param string $cacheEpoch Anything before this timestamp is invalidated
* @param HookContainer $hookContainer
* @param JsonCodec $jsonCodec
* @param StatsFactory $stats
* @param LoggerInterface $logger
* @param TitleFactory $titleFactory
* @param WikiPageFactory $wikiPageFactory
* @param GlobalIdGenerator $globalIdGenerator
*/
public function __construct(
string $name,
BagOStuff $cache,
string $cacheEpoch,
HookContainer $hookContainer,
JsonCodec $jsonCodec,
StatsFactory $stats,
LoggerInterface $logger,
TitleFactory $titleFactory,
WikiPageFactory $wikiPageFactory,
GlobalIdGenerator $globalIdGenerator
) {
$this->name = $name;
$this->cache = $cache;
$this->cacheEpoch = $cacheEpoch;
$this->hookRunner = new HookRunner( $hookContainer );
$this->jsonCodec = $jsonCodec;
$this->stats = $stats;
$this->logger = $logger;
$this->titleFactory = $titleFactory;
$this->wikiPageFactory = $wikiPageFactory;
$this->globalIdGenerator = $globalIdGenerator;
$this->metadataProcCache = new HashBagOStuff( [ 'maxKeys' => 2 ] );
}
/**
* @since 1.41
* @param ParserCacheFilter $filter
*/
public function setFilter( ParserCacheFilter $filter ): void {
$this->filter = $filter;
}
/**
* @param PageRecord $page
* @since 1.28
*/
public function deleteOptionsKey( PageRecord $page ) {
$page->assertWiki( PageRecord::LOCAL );
$key = $this->makeMetadataKey( $page );
$this->metadataProcCache->delete( $key );
$this->cache->delete( $key );
}
/**
* Retrieve the ParserOutput from ParserCache, even if it's outdated.
* @param PageRecord $page
* @param ParserOptions $popts
* @return ParserOutput|false
*/
public function getDirty( PageRecord $page, $popts ) {
$page->assertWiki( PageRecord::LOCAL );
$value = $this->get( $page, $popts, true );
return is_object( $value ) ? $value : false;
}
/**
* @param PageRecord $page
* @return string
*/
private function getContentModelFromPage( PageRecord $page ) {
$wikiPage = $this->wikiPageFactory->newFromTitle( $page );
return str_replace( '.', '_', $wikiPage->getContentModel() );
}
/**
* @param PageRecord $page
* @param string $status
* @param string|null $reason
*/
private function incrementStats( PageRecord $page, $status, $reason = null ) {
$contentModel = $this->getContentModelFromPage( $page );
$metricSuffix = $reason ? "{$status}_{$reason}" : $status;
$this->stats->getCounter( 'ParserCache_operation_total' )
->setLabel( 'name', $this->name )
->setLabel( 'contentModel', $contentModel )
->setLabel( 'status', $status )
->setLabel( 'reason', $reason ?: 'n/a' )
->copyToStatsdAt( "{$this->name}.{$contentModel}.{$metricSuffix}" )
->increment();
}
/**
* @param PageRecord $page
* @param string $renderReason
*/
private function incrementRenderReasonStats( PageRecord $page, $renderReason ) {
$contentModel = $this->getContentModelFromPage( $page );
$renderReason = preg_replace( '/\W+/', '_', $renderReason );
$this->stats->getCounter( 'ParserCache_render_total' )
->setLabel( 'name', $this->name )
->setLabel( 'contentModel', $contentModel )
->setLabel( 'reason', $renderReason )
->copyToStatsdAt( "{$this->name}.{$contentModel}.reason.{$renderReason}" )
->increment();
}
/**
* Returns the ParserCache metadata about the given page
* considering the given options.
*
* @note Which parser options influence the cache key
* is controlled via ParserOutput::recordOption() or
* ParserOptions::addExtraKey().
*
* @param PageRecord $page
* @param int $staleConstraint one of the self::USE_ constants
* @return ParserCacheMetadata|null
* @since 1.36
*/
public function getMetadata(
PageRecord $page,
int $staleConstraint = self::USE_ANYTHING
): ?ParserCacheMetadata {
$page->assertWiki( PageRecord::LOCAL );
$pageKey = $this->makeMetadataKey( $page );
$metadata = $this->metadataProcCache->get( $pageKey );
if ( !$metadata ) {
$metadata = $this->cache->get(
$pageKey,
BagOStuff::READ_VERIFIED
);
}
if ( $metadata === false ) {
$this->incrementStats( $page, 'miss', 'absent_metadata' );
$this->logger->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name ] );
return null;
}
// NOTE: If the value wasn't serialized to JSON when being stored,
// we may already have a ParserOutput object here. This used
// to be the default behavior before 1.36. We need to retain
// support so we can handle cached objects after an update
// from an earlier revision.
// NOTE: Support for reading string values from the cache must be
// deployed a while before starting to write JSON to the cache,
// in case we have to revert either change.
if ( is_string( $metadata ) ) {
$metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime::class );
}
if ( !$metadata instanceof CacheTime ) {
$this->incrementStats( $page, 'miss', 'unserialize' );
return null;
}
if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
return null;
}
if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
return null;
}
$this->logger->debug( 'Parser cache options found', [ 'name' => $this->name ] );
return $metadata;
}
/**
* @param PageRecord $page
* @return string
*/
private function makeMetadataKey( PageRecord $page ): string {
return $this->cache->makeKey( $this->name, 'idoptions', $page->getId( PageRecord::LOCAL ) );
}
/**
* Get a key that will be used by the ParserCache to store the content
* for a given page considering the given options and the array of
* used options.
*
* @warning The exact format of the key is considered internal and is subject
* to change, thus should not be used as storage or long-term caching key.
* This is intended to be used for logging or keying something transient.
*
* @param PageRecord $page
* @param ParserOptions $options
* @param array|null $usedOptions Defaults to all cache varying options.
* @return string
* @internal
* @since 1.36
*/
public function makeParserOutputKey(
PageRecord $page,
ParserOptions $options,
array $usedOptions = null
): string {
$usedOptions ??= ParserOptions::allCacheVaryingOptions();
// idhash seem to mean 'page id' + 'rendering hash' (r3710)
$pageid = $page->getId( PageRecord::LOCAL );
$title = $this->titleFactory->newFromPageIdentity( $page );
$hash = $options->optionsHash( $usedOptions, $title );
// Before T263581 ParserCache was split between normal page views
// and action=parse. -0 is left in the key to avoid invalidating the entire
// cache when removing the cache split.
return $this->cache->makeKey( $this->name, 'idhash', "{$pageid}-0!{$hash}" );
}
/**
* Retrieve the ParserOutput from ParserCache.
* false if not found or outdated.
*
* @param PageRecord $page
* @param ParserOptions $popts
* @param bool $useOutdated (default false)
*
* @return ParserOutput|false
*/
public function get( PageRecord $page, $popts, $useOutdated = false ) {
$page->assertWiki( PageRecord::LOCAL );
if ( !$page->exists() ) {
$this->incrementStats( $page, 'miss', 'nonexistent' );
return false;
}
if ( $page->isRedirect() ) {
// It's a redirect now
$this->incrementStats( $page, 'miss', 'redirect' );
return false;
}
$staleConstraint = $useOutdated ? self::USE_OUTDATED : self::USE_CURRENT_ONLY;
$parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
if ( !$parserOutputMetadata ) {
return false;
}
if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
$this->incrementStats( $page, 'miss', 'unsafe' );
return false;
}
$parserOutputKey = $this->makeParserOutputKey(
$page,
$popts,
$parserOutputMetadata->getUsedOptions()
);
$value = $this->cache->get( $parserOutputKey, BagOStuff::READ_VERIFIED );
if ( $value === false ) {
$this->incrementStats( $page, 'miss', 'absent' );
$this->logger->debug( 'ParserOutput cache miss', [ 'name' => $this->name ] );
return false;
}
// NOTE: If the value wasn't serialized to JSON when being stored,
// we may already have a ParserOutput object here. This used
// to be the default behavior before 1.36. We need to retain
// support so we can handle cached objects after an update
// from an earlier revision.
// NOTE: Support for reading string values from the cache must be
// deployed a while before starting to write JSON to the cache,
// in case we have to revert either change.
if ( is_string( $value ) ) {
$value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput::class );
}
if ( !$value instanceof ParserOutput ) {
$this->incrementStats( $page, 'miss', 'unserialize' );
return false;
}
if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
return false;
}
if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
return false;
}
$wikiPage = $this->wikiPageFactory->newFromTitle( $page );
if ( $this->hookRunner->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
$this->incrementStats( $page, 'miss', 'rejected' );
$this->logger->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
[ 'name' => $this->name ] );
return false;
}
$this->logger->debug( 'ParserOutput cache found', [ 'name' => $this->name ] );
$this->incrementStats( $page, 'hit' );
return $value;
}
/**
* @param ParserOutput $parserOutput
* @param PageRecord $page
* @param ParserOptions $popts
* @param string|null $cacheTime TS_MW timestamp when the cache was generated
* @param int|null $revId Revision ID that was parsed
*/
public function save(
ParserOutput $parserOutput,
PageRecord $page,
$popts,
$cacheTime = null,
$revId = null
) {
$page->assertWiki( PageRecord::LOCAL );
// T350538: Eventually we'll warn if the $cacheTime and $revId
// parameters are non-null here, since we *should* be getting
// them from the ParserOutput.
if ( $revId !== null && $revId !== $parserOutput->getCacheRevisionId() ) {
$this->logger->warning(
'Inconsistent revision ID',
[
'name' => $this->name,
'reason' => $popts->getRenderReason(),
'revid1' => $revId,
'revid2' => $parserOutput->getCacheRevisionId(),
]
);
}
if ( !$parserOutput->hasText() ) {
throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
}
$expire = $parserOutput->getCacheExpiry();
if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
$this->logger->debug(
'Parser options are not safe to cache and has not been saved',
[ 'name' => $this->name ]
);
$this->incrementStats( $page, 'save', 'unsafe' );
return;
}
if ( $expire <= 0 ) {
$this->logger->debug(
'Parser output was marked as uncacheable and has not been saved',
[ 'name' => $this->name ]
);
$this->incrementStats( $page, 'save', 'uncacheable' );
return;
}
if ( $this->filter && !$this->filter->shouldCache( $parserOutput, $page, $popts ) ) {
$this->logger->debug(
'Parser output was filtered and has not been saved',
[ 'name' => $this->name ]
);
$this->incrementStats( $page, 'save', 'filtered' );
// TODO: In this case, we still want to cache in RevisionOutputCache (T350669).
return;
}
if ( $this->cache instanceof EmptyBagOStuff ) {
return;
}
// Ensure cache properties are set in the ParserOutput
// T350538: These should be turned into assertions that the
// properties are already present.
if ( $cacheTime ) {
$parserOutput->setCacheTime( $cacheTime );
} else {
if ( !$parserOutput->hasCacheTime() ) {
$this->logger->warning(
'No cache time set',
[
'name' => $this->name,
'reason' => $popts->getRenderReason(),
]
);
}
$cacheTime = $parserOutput->getCacheTime();
}
if ( $revId ) {
$parserOutput->setCacheRevisionId( $revId );
} elseif ( $parserOutput->getCacheRevisionId() ) {
$revId = $parserOutput->getCacheRevisionId();
} else {
$revId = $page->getLatest( PageRecord::LOCAL );
$parserOutput->setCacheRevisionId( $revId );
}
if ( !$revId ) {
$this->logger->warning(
'Parser output cannot be saved if the revision ID is not known',
[ 'name' => $this->name ]
);
$this->incrementStats( $page, 'save', 'norevid' );
return;
}
if ( !$parserOutput->getRenderId() ) {
$this->logger->warning(
'Parser output missing render ID',
[
'name' => $this->name,
'reason' => $popts->getRenderReason(),
]
);
$parserOutput->setRenderId( $this->globalIdGenerator->newUUIDv1() );
}
// Transfer cache properties to the cache metadata
$metadata = new CacheTime;
$metadata->recordOptions( $parserOutput->getUsedOptions() );
$metadata->updateCacheExpiry( $expire );
$metadata->setCacheTime( $cacheTime );
$metadata->setCacheRevisionId( $revId );
$parserOutputKey = $this->makeParserOutputKey(
$page,
$popts,
$metadata->getUsedOptions()
);
$msg = "Saved in parser cache with key $parserOutputKey" .
" and timestamp $cacheTime" .
" and revision id $revId.";
$reason = $popts->getRenderReason();
$msg .= " Rendering was triggered because: $reason";
$parserOutput->addCacheMessage( $msg );
$pageKey = $this->makeMetadataKey( $page );
$parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
$metadataData = $this->convertForCache( $metadata, $pageKey );
if ( !$parserOutputData || !$metadataData ) {
$this->logger->warning(
'Parser output failed to serialize and was not saved',
[ 'name' => $this->name ]
);
$this->incrementStats( $page, 'save', 'nonserializable' );
return;
}
// Save the parser output
$this->cache->set(
$parserOutputKey,
$parserOutputData,
$expire,
BagOStuff::WRITE_ALLOW_SEGMENTS
);
// ...and its pointer to the local cache.
$this->metadataProcCache->set( $pageKey, $metadataData, $expire );
// ...and to the global cache.
$this->cache->set( $pageKey, $metadataData, $expire );
$title = $this->titleFactory->newFromPageIdentity( $page );
$this->hookRunner->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
$this->logger->debug( 'Saved in parser cache', [
'name' => $this->name,
'key' => $parserOutputKey,
'cache_time' => $cacheTime,
'rev_id' => $revId
] );
$this->incrementStats( $page, 'save', 'success' );
$this->incrementRenderReasonStats( $page, $popts->getRenderReason() );
}
/**
* Get the backend BagOStuff instance that
* powers the parser cache
*
* @since 1.30
* @internal
* @return BagOStuff
*/
public function getCacheStorage() {
return $this->cache;
}
/**
* Check if $entry expired for $page given the $staleConstraint
* when fetching from $cacheTier.
* @param CacheTime $entry
* @param PageRecord $page
* @param int $staleConstraint One of USE_* constants.
* @param string $cacheTier
* @return bool
*/
private function checkExpired(
CacheTime $entry,
PageRecord $page,
int $staleConstraint,
string $cacheTier
): bool {
if ( $staleConstraint < self::USE_EXPIRED && $entry->expired( $page->getTouched() ) ) {
$this->incrementStats( $page, 'miss', 'expired' );
$this->logger->debug( "{$cacheTier} key expired", [
'name' => $this->name,
'touched' => $page->getTouched(),
'epoch' => $this->cacheEpoch,
'cache_time' => $entry->getCacheTime()
] );
return true;
}
return false;
}
/**
* Check if $entry belongs to the latest revision of $page
* given $staleConstraint when fetched from $cacheTier.
* @param CacheTime $entry
* @param PageRecord $page
* @param int $staleConstraint One of USE_* constants.
* @param string $cacheTier
* @return bool
*/
private function checkOutdated(
CacheTime $entry,
PageRecord $page,
int $staleConstraint,
string $cacheTier
): bool {
$latestRevId = $page->getLatest( PageRecord::LOCAL );
if ( $staleConstraint < self::USE_OUTDATED && $entry->isDifferentRevision( $latestRevId ) ) {
$this->incrementStats( $page, 'miss', 'revid' );
$this->logger->debug( "{$cacheTier} key is for an old revision", [
'name' => $this->name,
'rev_id' => $latestRevId,
'cached_rev_id' => $entry->getCacheRevisionId()
] );
return true;
}
return false;
}
/**
* @param string $jsonData
* @param string $key
* @param string $expectedClass
* @return CacheTime|ParserOutput|null
*/
private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
try {
/** @var CacheTime $obj */
$obj = $this->jsonCodec->deserialize( $jsonData, $expectedClass );
return $obj;
} catch ( JsonException $e ) {
$this->logger->error( "Unable to deserialize JSON", [
'name' => $this->name,
'cache_key' => $key,
'ex_message' => $e->getMessage()
] );
return null;
} catch ( Exception $e ) {
$this->logger->error( "Unexpected failure during cache load", [
'name' => $this->name,
'cache_key' => $key,
'ex_message' => $e->getMessage()
] );
return null;
}
}
/**
* @param CacheTime $obj
* @param string $key
* @return string|null
*/
protected function convertForCache( CacheTime $obj, string $key ) {
try {
return $this->jsonCodec->serialize( $obj );
} catch ( JsonException $e ) {
// Try to collect some additional debugging information, but
// wrap this in a try block to ensure we don't make the problem
// worse.
try {
$details = $this->jsonCodec->detectNonSerializableData( $obj, true );
} catch ( \Throwable $t ) {
$details = $t->getMessage();
}
$this->logger->error( "Unable to serialize JSON", [
'name' => $this->name,
'cache_key' => $key,
'ex_message' => $e->getMessage(),
'details' => $details,
'trace' => $e->getTraceAsString(),
] );
return null;
}
}
}