fix(i18n): validate language directory names against gen.lang.* keys (#8767)

Detect when an `app/i18n/<lang>/` directory has no matching `gen.lang.<lang>`
key in the reference language (or vice versa), and refuse to regenerate the
README from that invalid state.

This catches a class of silent corruption where the README translation
table renders literal i18n keys instead of localised language names. The
trigger is most often a case-folded directory on macOS APFS - git tracks
`zh-TW`, the local FS reads back `zh-tw`, the script's `_t('gen.lang.zh-tw')`
lookup misses, and the README ends up with `gen.lang.zh-tw (zh-tw)` instead
of `正體中文 (zh-TW)`. The same check also flags orphan directories (no
display-name key) and orphan keys (no directory).

The new validateLanguageNames() method on I18nData performs a bidirectional
set comparison and returns human-readable issues. cli/check.translation.php
prints them to STDERR and gates --generate-readme on the result, leaving
routine completeness validation behaviour unchanged. Adds four PHPUnit
tests covering: clean state, case mismatch, orphan directory, orphan key.

Co-authored-by: Bjørn A. Andersen <polybjorn@users.noreply.github.com>
This commit is contained in:
polybjorn
2026-05-02 21:43:19 +00:00
committed by GitHub
parent b08c4ef243
commit 3a7431ce04
3 changed files with 98 additions and 0 deletions

View File

@@ -43,6 +43,12 @@ if (isset($cliOptions->language)) {
}
$isValidated = true;
$languageNameIssues = $i18nData->validateLanguageNames();
foreach ($languageNameIssues as $issue) {
fwrite(STDERR, "Error: {$issue}\n");
$isValidated = false;
}
$result = [];
$report = [];
$percentage = [];
@@ -116,6 +122,13 @@ function writeToReadme(string $readmePath, string $markdownTable): void {
}
if ($cliOptions->generateReadme) {
if ($languageNameIssues !== []) {
// Refuse to regenerate the README when language directory names and
// `gen.lang.*` keys disagree, otherwise we would silently produce a
// corrupt translation table (e.g. literal `gen.lang.*` keys instead of
// localised language names). Routine incomplete translations are fine.
exit(1);
}
$markdownTable = <<<EOF
| __language__ | __translated__ | |
| - | - | - |

View File

@@ -246,6 +246,45 @@ class I18nData {
return $languages;
}
/**
* Verify that the set of available language directories matches the set of
* `gen.lang.<code>` keys in the reference language, case-sensitively.
*
* Catches two classes of mismatch:
* - A language directory whose name has no matching `gen.lang.<code>` key
* (case-folding on case-insensitive filesystems such as macOS APFS, typo,
* or a new language added without its display name).
* - A `gen.lang.<code>` key with no matching directory (orphan after a
* language was removed).
*
* @return list<string> Human-readable mismatches; empty when consistent.
*/
public function validateLanguageNames(): array {
$prefix = 'gen.lang.';
$declared = [];
foreach (array_keys($this->data[static::REFERENCE_LANGUAGE]['gen.php'] ?? []) as $key) {
if (str_starts_with((string)$key, $prefix)) {
$declared[] = substr((string)$key, strlen($prefix));
}
}
sort($declared);
$available = $this->getAvailableLanguages();
$issues = [];
foreach (array_diff($available, $declared) as $orphanDir) {
$issues[] = "Language directory `app/i18n/{$orphanDir}/` has no matching "
. "`gen.lang.{$orphanDir}` key in the reference language. Possible causes: "
. 'case mismatch (e.g. on macOS APFS), typo, or missing display-name key.';
}
foreach (array_diff($declared, $available) as $orphanKey) {
$issues[] = "Reference key `gen.lang.{$orphanKey}` has no matching "
. "`app/i18n/{$orphanKey}/` directory. Possible cause: orphan key after "
. 'a language was removed.';
}
return $issues;
}
/**
* Return all available languages without the reference language
* @return list<string>

View File

@@ -869,4 +869,50 @@ final class I18nDataTest extends \PHPUnit\Framework\TestCase {
$data = new I18nData($rawData);
self::assertSame($this->referenceData['en'], $data->getReferenceLanguage());
}
/** @return array<string,array<string,array<string,I18nValue>>> */
private function dataWithLangKeys(string ...$langCodes): array {
$genFile = [];
foreach ($langCodes as $code) {
$genFile['gen.lang.' . $code] = $this->value;
}
return [
'en' => ['gen.php' => $genFile],
];
}
public function testValidateLanguageNamesPassesWhenDirsAndKeysMatch(): void {
$rawData = $this->dataWithLangKeys('en', 'fr', 'zh-TW');
$rawData['fr'] = [];
$rawData['zh-TW'] = [];
$data = new I18nData($rawData);
self::assertSame([], $data->validateLanguageNames());
}
public function testValidateLanguageNamesFlagsCaseMismatch(): void {
$rawData = $this->dataWithLangKeys('en', 'zh-TW');
$rawData['zh-tw'] = [];
$data = new I18nData($rawData);
$issues = $data->validateLanguageNames();
self::assertCount(2, $issues);
self::assertStringContainsString('app/i18n/zh-tw/', $issues[0]);
self::assertStringContainsString('gen.lang.zh-TW', $issues[1]);
}
public function testValidateLanguageNamesFlagsOrphanDirectory(): void {
$rawData = $this->dataWithLangKeys('en');
$rawData['fr'] = [];
$data = new I18nData($rawData);
$issues = $data->validateLanguageNames();
self::assertCount(1, $issues);
self::assertStringContainsString('app/i18n/fr/', $issues[0]);
}
public function testValidateLanguageNamesFlagsOrphanKey(): void {
$rawData = $this->dataWithLangKeys('en', 'fr');
$data = new I18nData($rawData);
$issues = $data->validateLanguageNames();
self::assertCount(1, $issues);
self::assertStringContainsString('gen.lang.fr', $issues[0]);
}
}