Files
FreshRSS/cli/check.translation.php
polybjorn 3a7431ce04 fix(i18n): validate language directory names against gen.lang.* keys (#8767)
Detect when an `app/i18n/<lang>/` directory has no matching `gen.lang.<lang>`
key in the reference language (or vice versa), and refuse to regenerate the
README from that invalid state.

This catches a class of silent corruption where the README translation
table renders literal i18n keys instead of localised language names. The
trigger is most often a case-folded directory on macOS APFS - git tracks
`zh-TW`, the local FS reads back `zh-tw`, the script's `_t('gen.lang.zh-tw')`
lookup misses, and the README ends up with `gen.lang.zh-tw (zh-tw)` instead
of `正體中文 (zh-TW)`. The same check also flags orphan directories (no
display-name key) and orphan keys (no directory).

The new validateLanguageNames() method on I18nData performs a bidirectional
set comparison and returns human-readable issues. cli/check.translation.php
prints them to STDERR and gates --generate-readme on the result, leaving
routine completeness validation behaviour unchanged. Adds four PHPUnit
tests covering: clean state, case mismatch, orphan directory, orphan key.

Co-authored-by: Bjørn A. Andersen <polybjorn@users.noreply.github.com>
2026-05-02 23:43:19 +02:00

264 lines
8.0 KiB
PHP
Executable File

#!/usr/bin/env php
<?php
declare(strict_types=1);
require_once __DIR__ . '/_cli.php';
require_once __DIR__ . '/i18n/I18nCompletionValidator.php';
require_once __DIR__ . '/i18n/I18nData.php';
require_once __DIR__ . '/i18n/I18nFile.php';
require_once __DIR__ . '/i18n/I18nUsageValidator.php';
require_once dirname(__DIR__) . '/constants.php';
$cliOptions = new class extends CliOptionsParser {
/** @var array<int,string> $language */
public array $language;
public bool $displayResult;
public bool $help;
public bool $displayReport;
public bool $generateReadme;
public function __construct() {
$this->addOption('language', (new CliOption('language', 'l'))->typeOfArrayOfString());
$this->addOption('displayResult', (new CliOption('display-result', 'd'))->withValueNone());
$this->addOption('help', (new CliOption('help', 'h'))->withValueNone());
$this->addOption('displayReport', (new CliOption('display-report', 'r'))->withValueNone());
$this->addOption('generateReadme', (new CliOption('generate-readme', 'g'))->withValueNone());
parent::__construct();
}
};
if (!empty($cliOptions->errors)) {
fail('FreshRSS error: ' . array_shift($cliOptions->errors) . "\n" . $cliOptions->usage);
}
if ($cliOptions->help) {
checkHelp();
}
$i18nFile = new I18nFile();
$i18nData = new I18nData($i18nFile->load());
if (isset($cliOptions->language)) {
$languages = $cliOptions->language;
} else {
$languages = $i18nData->getAvailableLanguages();
}
$isValidated = true;
$languageNameIssues = $i18nData->validateLanguageNames();
foreach ($languageNameIssues as $issue) {
fwrite(STDERR, "Error: {$issue}\n");
$isValidated = false;
}
$result = [];
$report = [];
$percentage = [];
foreach ($languages as $language) {
if ($language === $i18nData::REFERENCE_LANGUAGE) {
$usedTranslations = findUsedTranslations();
$referenceLanguage = $i18nData->getReferenceLanguage();
$pluralFamilies = loadPluralReferenceFamilies($referenceLanguage);
if ($pluralFamilies !== []) {
$referenceLanguage['plurals.php'] = $pluralFamilies;
}
$i18nValidator = new I18nUsageValidator($referenceLanguage, $usedTranslations['keys'], $usedTranslations['prefixes']);
} else {
$i18nValidator = new I18nCompletionValidator($i18nData->getReferenceLanguage(), $i18nData->getLanguage($language));
}
$isValidated = $i18nValidator->validate() && $isValidated;
$report[$language] = sprintf('%-5s - %s', $language, $i18nValidator->displayReport());
$percentage[$language] = $i18nValidator->displayReport(percentage_only: true);
$result[$language] = $i18nValidator->displayResult();
}
if ($cliOptions->displayResult) {
foreach ($result as $lang => $value) {
echo 'Language: ', $lang, PHP_EOL;
print_r($value);
echo PHP_EOL;
}
}
if ($cliOptions->displayReport) {
foreach ($report as $value) {
echo $value;
}
}
function writeToReadme(string $readmePath, string $markdownTable): void {
$language = explode('.', $readmePath)[1];
// expecting `README.md` for `en` or `README.fr.md` for `fr`
if ($language === 'md') {
$language = 'en';
}
Minz_Translate::init($language);
$placeholders = [];
if (preg_match_all('/__.*?__/', $markdownTable, $placeholders) === false) {
echo 'Error: Fail while matching translation placeholders', PHP_EOL;
exit(1);
}
foreach (array_unique($placeholders[0]) as $_ => $placeholder) {
$markdownTable = str_replace($placeholder, _t('gen.readme.' . substr($placeholder, 2, -2)), $markdownTable);
}
$readme = file_get_contents($readmePath);
if ($readme === false) {
echo 'Error: Unable to open ' . $readmePath, PHP_EOL;
exit(1);
}
if (file_put_contents($readmePath, preg_replace('/<translations>(.*?)<\/translations>/s', <<<EOF
<translations>
<!-- This section is automatically generated by `./cli/check.translation.php -g` -->
$markdownTable
</translations>
EOF, $readme)) === false) {
echo 'Error: Fail while writing to ' . $readmePath, PHP_EOL;
exit(1);
}
echo 'Successfully written translation status into ' . $readmePath, PHP_EOL;
}
if ($cliOptions->generateReadme) {
if ($languageNameIssues !== []) {
// Refuse to regenerate the README when language directory names and
// `gen.lang.*` keys disagree, otherwise we would silently produce a
// corrupt translation table (e.g. literal `gen.lang.*` keys instead of
// localised language names). Routine incomplete translations are fine.
exit(1);
}
$markdownTable = <<<EOF
| __language__ | __translated__ | |
| - | - | - |
EOF;
$markdownTable .= "\n";
foreach ($percentage as $lang => $value) {
$percentageInt = intval(rtrim($value, '%'));
$completed = intval($percentageInt / 10);
$uncompleted = intval(ceil((100 - $percentageInt) / 10));
$progressBar = str_repeat('■', $completed) . str_repeat('・', $uncompleted);
$ghSearchUrl = 'https://github.com/search?q=' . urlencode("repo:FreshRSS/FreshRSS path:app/i18n/$lang /(TODO|DIRTY)$/");
$markdownTable .= '| ' . implode(' | ', [
_t('gen.lang.' . $lang) . " ($lang)",
$progressBar . ' ' . $percentageInt . '%',
"[__contribute__]($ghSearchUrl)",
]) . " |\n";
}
// In case we're located in ./cli/
if (!file_exists('constants.php')) {
chdir('..');
}
foreach (array_merge(['README.md'], glob('README.*.md') ?: []) as $readmePath) {
writeToReadme($readmePath, rtrim($markdownTable));
}
exit();
}
if (!$isValidated) {
exit(1);
}
/**
* Find used translation keys in the project
*
* Iterates through all php and phtml files in the whole project and extracts all
* translation keys used.
*
* @return array{keys:list<string>,prefixes:list<string>}
*/
function findUsedTranslations(): array {
$directory = new RecursiveDirectoryIterator(__DIR__ . '/..', FilesystemIterator::SKIP_DOTS);
$iterator = new RecursiveIteratorIterator($directory, RecursiveIteratorIterator::LEAVES_ONLY, RecursiveIteratorIterator::CATCH_GET_CHILD);
$regex = new RegexIterator($iterator, '/^.+\.(php|phtml)$/i', RecursiveRegexIterator::GET_MATCH);
$usedI18n = [];
$usedPrefixes = [];
foreach ($regex as $file => $value) {
if (!is_string($file) || $file === '') {
continue;
}
$fileContent = file_get_contents($file);
if ($fileContent === false) {
continue;
}
preg_match_all('/_t\([\'"](?P<strings>[^\'"]+)[\'"]/', $fileContent, $matches);
$usedI18n = array_merge($usedI18n, $matches['strings']);
preg_match_all('/Minz_Translate::plural\(\s*[\'"](?P<string>[^\'"]+)[\'"](?P<dynamic>\s*\.)?/', $fileContent, $pluralMatches, PREG_SET_ORDER);
foreach ($pluralMatches as $match) {
$string = $match['string'];
if (($match['dynamic'] ?? '') !== '') {
$usedPrefixes[] = $string;
} else {
$usedI18n[] = $string;
}
}
}
return [
'keys' => array_values(array_unique($usedI18n)),
'prefixes' => array_values(array_unique($usedPrefixes)),
];
}
/**
* @param array<string,array<string,I18nValue>> $referenceLanguage
* @return array<string,I18nValue>
*/
function loadPluralReferenceFamilies(array $referenceLanguage): array {
$pluralFamilies = [];
foreach ($referenceLanguage as $values) {
foreach ($values as $key => $value) {
if (preg_match('/^(?P<base>.+)\.(?P<index>\d+)$/', $key, $matches) !== 1) {
continue;
}
$baseKey = $matches['base'];
$index = $matches['index'];
$pluralFamilies[$baseKey][(int)$index] = $value->__toString();
}
}
$normalisedFamilies = [];
foreach ($pluralFamilies as $baseKey => $messageFamily) {
$messages = [];
ksort($messageFamily);
foreach ($messageFamily as $message) {
if ($message !== '') {
$messages[] = $message;
}
}
$normalisedFamilies[$baseKey] = new I18nValue(implode(' | ', $messages));
}
return $normalisedFamilies;
}
/**
* Output help message.
*/
function checkHelp(): never {
$file = str_replace(__DIR__ . '/', '', __FILE__);
echo <<<HELP
NAME
$file
SYNOPSIS
php $file [OPTION]...
DESCRIPTION
Check if translation files have missing keys or missing translations.
-d, --display-result display results.
-h, --help display this help and exit.
-l, --language=LANG filter by LANG.
-r, --display-report display completion report.
-g, --generate-readme generate translation progress section in readme.
HELP;
exit();
}