Enhancement - optional transparent homoglyph encoding of a few characters in certain languages for more compact and efficient text messages (#4491)

This commit is contained in:
Pavel Vasiliev
2026-02-07 21:49:35 +03:00
committed by GitHub
parent 6ec2ed76ca
commit 4303bfaac4
10 changed files with 297 additions and 4 deletions

View File

@@ -30,6 +30,8 @@ import org.meshtastic.core.prefs.emoji.CustomEmojiPrefs
import org.meshtastic.core.prefs.emoji.CustomEmojiPrefsImpl
import org.meshtastic.core.prefs.filter.FilterPrefs
import org.meshtastic.core.prefs.filter.FilterPrefsImpl
import org.meshtastic.core.prefs.homoglyph.HomoglyphPrefs
import org.meshtastic.core.prefs.homoglyph.HomoglyphPrefsImpl
import org.meshtastic.core.prefs.map.MapConsentPrefs
import org.meshtastic.core.prefs.map.MapConsentPrefsImpl
import org.meshtastic.core.prefs.map.MapPrefs
@@ -54,6 +56,10 @@ import javax.inject.Singleton
@Retention(AnnotationRetention.BINARY)
internal annotation class AnalyticsSharedPreferences
@Qualifier
@Retention(AnnotationRetention.BINARY)
internal annotation class HomoglyphEncodingSharedPreferences
@Qualifier
@Retention(AnnotationRetention.BINARY)
internal annotation class AppSharedPreferences
@@ -101,6 +107,8 @@ interface PrefsModule {
@Binds fun bindAnalyticsPrefs(analyticsPrefsImpl: AnalyticsPrefsImpl): AnalyticsPrefs
@Binds fun bindHomoglyphEncodingPrefs(homoglyphEncodingPrefsImpl: HomoglyphPrefsImpl): HomoglyphPrefs
@Binds fun bindCustomEmojiPrefs(customEmojiPrefsImpl: CustomEmojiPrefsImpl): CustomEmojiPrefs
@Binds fun bindMapConsentPrefs(mapConsentPrefsImpl: MapConsentPrefsImpl): MapConsentPrefs
@@ -127,6 +135,12 @@ interface PrefsModule {
fun provideAnalyticsSharedPreferences(@ApplicationContext context: Context): SharedPreferences =
context.getSharedPreferences("analytics-prefs", Context.MODE_PRIVATE)
@Provides
@Singleton
@HomoglyphEncodingSharedPreferences
fun provideHomoglyphEncodingSharedPreferences(@ApplicationContext context: Context): SharedPreferences =
context.getSharedPreferences("homoglyph-encoding-prefs", Context.MODE_PRIVATE)
@Provides
@Singleton
@AppSharedPreferences

View File

@@ -0,0 +1,67 @@
/*
* Copyright (c) 2026 Meshtastic LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.meshtastic.core.prefs.homoglyph
import android.content.SharedPreferences
import kotlinx.coroutines.channels.awaitClose
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.callbackFlow
import org.meshtastic.core.prefs.PrefDelegate
import org.meshtastic.core.prefs.di.HomoglyphEncodingSharedPreferences
import javax.inject.Inject
import javax.inject.Singleton
interface HomoglyphPrefs {
/** Preference for whether homoglyph encoding is enabled by the user. */
var homoglyphEncodingEnabled: Boolean
/**
* Provides a [Flow] that emits the current state of [homoglyphEncodingEnabled] and subsequent changes.
*
* @return A [Flow] of [Boolean] indicating if homoglyph encoding is enabled.
*/
fun getHomoglyphEncodingEnabledChangesFlow(): Flow<Boolean>
companion object {
/** Key for the homoglyphEncodingEnabled preference. */
const val KEY_HOMOGLYPH_ENCODING_ENABLED = "enabled"
}
}
@Singleton
class HomoglyphPrefsImpl
@Inject
constructor(
@HomoglyphEncodingSharedPreferences private val homoglyphEncodingSharedPreferences: SharedPreferences,
) : HomoglyphPrefs {
override var homoglyphEncodingEnabled: Boolean by
PrefDelegate(homoglyphEncodingSharedPreferences, HomoglyphPrefs.KEY_HOMOGLYPH_ENCODING_ENABLED, false)
override fun getHomoglyphEncodingEnabledChangesFlow(): Flow<Boolean> = callbackFlow {
val listener =
SharedPreferences.OnSharedPreferenceChangeListener { _, key ->
if (key == HomoglyphPrefs.KEY_HOMOGLYPH_ENCODING_ENABLED) {
trySend(homoglyphEncodingEnabled)
}
}
// Emit the initial value
trySend(homoglyphEncodingEnabled)
homoglyphEncodingSharedPreferences.registerOnSharedPreferenceChangeListener(listener)
awaitClose { homoglyphEncodingSharedPreferences.unregisterOnSharedPreferenceChangeListener(listener) }
}
}

View File

@@ -294,6 +294,7 @@
<string name="theme_system">System default</string>
<string name="choose_theme">Choose theme</string>
<string name="provide_location_to_mesh">Provide phone location to mesh</string>
<string name="use_homoglyph_characters_encoding">Compact encoding for Cyrillic</string>
<plurals name="delete_messages">
<item quantity="one">Delete message?</item>
<item quantity="other">Delete %1$s messages?</item>

View File

@@ -62,4 +62,6 @@ dependencies {
androidTestImplementation(libs.androidx.compose.ui.test.junit4)
androidTestImplementation(libs.androidx.test.ext.junit)
testImplementation(libs.junit)
}

View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) 2026 Meshtastic LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.meshtastic.feature.messaging
/**
* This util class allows you to optimize the binary size of the transmitted text message strings. It replaces certain
* characters from national alphabets with the characters from the latin alphabet that have an identical appearance
* (homoglyphs), for example: cyrillic "А", "С", "у" -> latin "A", "C", "y", etc. According to statistics, such letters
* can make up about 20-25% of the total number of letters in the average text. Replacing them with Latin characters
* reduces the binary size of the transmitted message. The average transmitted message volume can then fit around
* ~140-145 characters instead of ~115-120
*/
internal object HomoglyphCharacterStringTransformer {
/**
* Unicode characters from the basic cyrillic block (U+0400-U+04FF), each of which occupies 2 bytes
* https://www.compart.com/en/unicode/block/U+0400 Mapped with the corresponding similarly written latin characters,
* each of which occupies 1 byte
*
* Please note that only 100% "reliable", completely visually identical characters are presented will here The
* characters that look like latin but contain various descenders, hooks, strokes, etc are not replaced with
* "simplified" latin appearance and will remain 2 byte unicode, as usual
*/
private val homoglyphCharactersSubstitutionMapping: Map<Char, Char> =
mapOf(
'\u0405' to 'S', // https://www.compart.com/en/unicode/U+0405 - Cyrillic Capital Letter Dze
'\u0406' to
'I', // https://www.compart.com/en/unicode/U+0406 - Cyrillic Capital Letter Byelorussian-Ukrainian I
'\u0408' to 'J', // https://www.compart.com/en/unicode/U+0408 - Cyrillic Capital Letter Je
'\u0410' to 'A', // https://www.compart.com/en/unicode/U+0410 - Cyrillic Capital Letter A
'\u0412' to 'B', // https://www.compart.com/en/unicode/U+0412 - Cyrillic Capital Letter Ve
'\u0415' to 'E', // https://www.compart.com/en/unicode/U+0415 - Cyrillic Capital Letter Ie
'\u041A' to 'K', // https://www.compart.com/en/unicode/U+041A - Cyrillic Capital Letter Ka
'\u041C' to 'M', // https://www.compart.com/en/unicode/U+041C - Cyrillic Capital Letter Em
'\u041D' to 'H', // https://www.compart.com/en/unicode/U+041D - Cyrillic Capital Letter En
'\u041E' to 'O', // https://www.compart.com/en/unicode/U+041E - Cyrillic Capital Letter O
'\u0420' to 'P', // https://www.compart.com/en/unicode/U+0420 - Cyrillic Capital Letter Er
'\u0421' to 'C', // https://www.compart.com/en/unicode/U+0421 - Cyrillic Capital Letter Es
'\u0422' to 'T', // https://www.compart.com/en/unicode/U+0422 - Cyrillic Capital Letter Te
'\u0425' to 'X', // https://www.compart.com/en/unicode/U+0425 - Cyrillic Capital Letter Ha
'\u0430' to 'a', // https://www.compart.com/en/unicode/U+0430 - Cyrillic Small Letter A
'\u0435' to 'e', // https://www.compart.com/en/unicode/U+0435 - Cyrillic Small Letter Ie
'\u043E' to 'o', // https://www.compart.com/en/unicode/U+043E - Cyrillic Small Letter O
'\u0440' to 'p', // https://www.compart.com/en/unicode/U+0440 - Cyrillic Small Letter Er
'\u0441' to 'c', // https://www.compart.com/en/unicode/U+0441 - Cyrillic Small Letter Es
'\u0443' to 'y', // https://www.compart.com/en/unicode/U+0443 - Cyrillic Small Letter U
'\u0445' to 'x', // https://www.compart.com/en/unicode/U+0445 - Cyrillic Small Letter Ha
'\u0455' to 's', // https://www.compart.com/en/unicode/U+0455 - Cyrillic Small Letter Dze
'\u0456' to
'i', // https://www.compart.com/en/unicode/U+0456 - Cyrillic Small Letter Byelorussian-Ukrainian I
'\u0458' to 'j', // https://www.compart.com/en/unicode/U+0458 - Cyrillic Small Letter Je
'\u04AE' to 'Y', // https://www.compart.com/en/unicode/U+04AE - Cyrillic Capital Letter Straight U
'\u0417' to '3', // https://www.compart.com/en/unicode/U+0417 - Cyrillic Capital Letter Ze
// Note that capital "ze" here is a bit special - it technically transforms to a digit "three"
// The visuals are all the same, across the different fonts etc& The core idea is the same:
// We are still replacing 2-byte unicode letter with a digit character that occupies 1 byte in Unicode
// But I have to point it out to avoid confusion
)
/**
* Returns the transformed optimized [String] value, in which some characters of the national alphabets are replaced
* with identical Latin characters so that the text takes up fewer bytes and is more compact for transmission.
*
* @param value original string value.
* @return optimized string value.
*/
fun optimizeUtf8StringWithHomoglyphs(value: String): String {
val stringBuilder = StringBuilder()
for (c in value.toCharArray()) stringBuilder.append(homoglyphCharactersSubstitutionMapping.getOrDefault(c, c))
return stringBuilder.toString()
}
}

View File

@@ -178,6 +178,7 @@ fun MessageScreen(
val quickChatActions by viewModel.quickChatActions.collectAsStateWithLifecycle(initialValue = emptyList())
val pagedMessages = viewModel.getMessagesFromPaged(contactKey).collectAsLazyPagingItems()
val contactSettings by viewModel.contactSettings.collectAsStateWithLifecycle(initialValue = emptyMap())
val homoglyphEncodingEnabled by viewModel.homoglyphEncodingEnabled.collectAsStateWithLifecycle(initialValue = false)
// UI State managed within this Composable
var replyingToPacketId by rememberSaveable { mutableStateOf<Int?>(null) }
@@ -469,6 +470,7 @@ fun MessageScreen(
)
MessageInput(
isEnabled = connectionState.isConnected(),
isHomoglyphEncodingEnabled = homoglyphEncodingEnabled,
textFieldState = messageInputState,
onSendMessage = {
val messageText = messageInputState.text.toString().trim()
@@ -938,12 +940,21 @@ private const val MAX_LINES = 3
@Composable
private fun MessageInput(
isEnabled: Boolean,
isHomoglyphEncodingEnabled: Boolean,
textFieldState: TextFieldState,
modifier: Modifier = Modifier,
maxByteSize: Int = MESSAGE_CHARACTER_LIMIT_BYTES,
onSendMessage: () -> Unit,
) {
val currentText = textFieldState.text.toString()
val currentTextRaw = textFieldState.text.toString()
val currentText =
if (isHomoglyphEncodingEnabled) {
HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(currentTextRaw)
} else {
currentTextRaw
}
val currentByteLength =
remember(currentText) {
// Recalculate only when text changes
@@ -1000,12 +1011,23 @@ private fun MessageInputPreview() {
AppTheme {
Surface {
Column(modifier = Modifier.padding(8.dp)) {
MessageInput(isEnabled = true, textFieldState = rememberTextFieldState("Hello"), onSendMessage = {})
MessageInput(
isEnabled = true,
isHomoglyphEncodingEnabled = false,
textFieldState = rememberTextFieldState("Hello"),
onSendMessage = {},
)
Spacer(Modifier.size(16.dp))
MessageInput(isEnabled = false, textFieldState = rememberTextFieldState("Disabled"), onSendMessage = {})
MessageInput(
isEnabled = false,
isHomoglyphEncodingEnabled = false,
textFieldState = rememberTextFieldState("Disabled"),
onSendMessage = {},
)
Spacer(Modifier.size(16.dp))
MessageInput(
isEnabled = true,
isHomoglyphEncodingEnabled = false,
textFieldState =
rememberTextFieldState(
"A very long message that might exceed the byte limit " +
@@ -1018,6 +1040,7 @@ private fun MessageInputPreview() {
// Test Japanese characters (multi-byte)
MessageInput(
isEnabled = true,
isHomoglyphEncodingEnabled = false,
textFieldState = rememberTextFieldState("こんにちは世界"), // Hello World in Japanese
onSendMessage = {},
maxByteSize = 10,

View File

@@ -44,6 +44,7 @@ import org.meshtastic.core.database.model.Node
import org.meshtastic.core.model.Capabilities
import org.meshtastic.core.model.DataPacket
import org.meshtastic.core.prefs.emoji.CustomEmojiPrefs
import org.meshtastic.core.prefs.homoglyph.HomoglyphPrefs
import org.meshtastic.core.prefs.ui.UiPrefs
import org.meshtastic.core.service.MeshServiceNotifications
import org.meshtastic.core.service.ServiceAction
@@ -67,6 +68,7 @@ constructor(
private val packetRepository: PacketRepository,
private val uiPrefs: UiPrefs,
private val customEmojiPrefs: CustomEmojiPrefs,
private val homoglyphEncodingPrefs: HomoglyphPrefs,
private val meshServiceNotifications: MeshServiceNotifications,
) : ViewModel() {
private val _title = MutableStateFlow("")
@@ -122,6 +124,8 @@ constructor(
?.map { it.first }
?.take(6) ?: listOf("👍", "👎", "😂", "🔥", "❤️", "😮")
val homoglyphEncodingEnabled = homoglyphEncodingPrefs.getHomoglyphEncodingEnabledChangesFlow()
init {
val contactKey = savedStateHandle.get<String>("contactKey")
if (contactKey != null) {
@@ -204,8 +208,20 @@ constructor(
}
}
}
// Applying homoglyph encoding to the transmitted string if user has activated the feature
// In most cases the value in "str" parameter will already contain the correct
// transformed string from the text input. This call here added to make sure that
// the feature is effective across all possible message paths (quick-chat, reply, etc.)
val dataPacketText: String =
if (homoglyphEncodingPrefs.homoglyphEncodingEnabled) {
HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(str)
} else {
str
}
val p =
DataPacket(dest, channel ?: 0, str, replyId).apply {
DataPacket(dest, channel ?: 0, dataPacketText, replyId).apply {
from = ourNodeInfo.value?.user?.id ?: DataPacket.ID_LOCAL
}
sendDataPacket(p)

View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2026 Meshtastic LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.meshtastic.feature.messaging
import org.junit.Assert.assertEquals
import org.junit.Assert.assertTrue
import org.junit.Test
class HomoglyphCharacterTransformTest {
@Test
fun `optimizeUtf8StringWithHomoglyphs shrinks binary size of cyrillic text containing some homoglyphs`() {
val testString = "Мештастик - это проект с открытым исходным кодом"
val transformedTestString = HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(testString)
val testStringBytes = testString.toByteArray(charset = Charsets.UTF_8)
val transformedTestStringBytes = transformedTestString.toByteArray(charset = Charsets.UTF_8)
val transformedStringBinarySizeShrinked = transformedTestStringBytes.size < testStringBytes.size
assertTrue(transformedStringBinarySizeShrinked)
}
@Test
fun `optimizeUtf8StringWithHomoglyphs shrinks binary size in half of cyrillic text containing only homoglyphs`() {
val testString = "Косуха"
val transformedTestString = HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(testString)
val testStringBytes = testString.toByteArray(charset = Charsets.UTF_8)
val transformedTestStringBytes = transformedTestString.toByteArray(charset = Charsets.UTF_8)
assertEquals(transformedTestStringBytes.size, testStringBytes.size / 2)
}
@Test
fun `optimizeUtf8StringWithHomoglyphs does not transform cyrillic text without any homoglyphs`() {
val testString = "Близкий"
val transformedTestString = HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(testString)
assertEquals(transformedTestString, testString)
}
@Test
fun `optimizeUtf8StringWithHomoglyphs does not transform latin text message`() {
val testString = "Meshtastic is an open source, off-grid, decentralized mesh network"
val transformedTestString = HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(testString)
assertEquals(transformedTestString, testString)
}
@Test
fun `optimizeUtf8StringWithHomoglyphs does not transform characters impossible to present by latin letters`() {
val testString = "ميشتاستيك هو مصدر مفتوح ، خارج الشبكة ، شبكة شبكة"
val transformedTestString = HomoglyphCharacterStringTransformer.optimizeUtf8StringWithHomoglyphs(testString)
assertEquals(transformedTestString, testString)
}
}

View File

@@ -36,6 +36,7 @@ import androidx.compose.foundation.rememberScrollState
import androidx.compose.foundation.verticalScroll
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.automirrored.rounded.KeyboardArrowRight
import androidx.compose.material.icons.filled.Abc
import androidx.compose.material.icons.filled.BugReport
import androidx.compose.material.icons.rounded.AppSettingsAlt
import androidx.compose.material.icons.rounded.FormatPaint
@@ -102,6 +103,7 @@ import org.meshtastic.core.strings.theme
import org.meshtastic.core.strings.theme_dark
import org.meshtastic.core.strings.theme_light
import org.meshtastic.core.strings.theme_system
import org.meshtastic.core.strings.use_homoglyph_characters_encoding
import org.meshtastic.core.ui.component.DropDownPreference
import org.meshtastic.core.ui.component.ListItem
import org.meshtastic.core.ui.component.MainAppBar
@@ -315,6 +317,15 @@ fun SettingsScreen(
onClick = { settingsViewModel.setProvideLocation(!provideLocation) },
)
val homoglyphEncodingEnabled by
viewModel.homoglyphEncodingEnabledFlow.collectAsStateWithLifecycle(false)
SwitchListItem(
text = stringResource(Res.string.use_homoglyph_characters_encoding),
checked = homoglyphEncodingEnabled,
leadingIcon = Icons.Default.Abc,
onClick = { viewModel.toggleHomoglyphCharactersEncodingEnabled() },
)
val settingsLauncher =
rememberLauncherForActivityResult(contract = ActivityResultContracts.StartActivityForResult()) {}

View File

@@ -57,6 +57,7 @@ import org.meshtastic.core.model.Position
import org.meshtastic.core.model.util.toChannelSet
import org.meshtastic.core.navigation.SettingsRoutes
import org.meshtastic.core.prefs.analytics.AnalyticsPrefs
import org.meshtastic.core.prefs.homoglyph.HomoglyphPrefs
import org.meshtastic.core.prefs.map.MapConsentPrefs
import org.meshtastic.core.service.ConnectionState
import org.meshtastic.core.service.IMeshService
@@ -117,6 +118,7 @@ constructor(
private val locationRepository: LocationRepository,
private val mapConsentPrefs: MapConsentPrefs,
private val analyticsPrefs: AnalyticsPrefs,
private val homoglyphEncodingPrefs: HomoglyphPrefs,
) : ViewModel() {
private val meshService: IMeshService?
get() = serviceRepository.meshService
@@ -127,6 +129,12 @@ constructor(
analyticsPrefs.analyticsAllowed = !analyticsPrefs.analyticsAllowed
}
val homoglyphEncodingEnabledFlow = homoglyphEncodingPrefs.getHomoglyphEncodingEnabledChangesFlow()
fun toggleHomoglyphCharactersEncodingEnabled() {
homoglyphEncodingPrefs.homoglyphEncodingEnabled = !homoglyphEncodingPrefs.homoglyphEncodingEnabled
}
private val destNum =
savedStateHandle.get<Int>("destNum")
?: runCatching { savedStateHandle.toRoute<SettingsRoutes.Settings>().destNum }.getOrNull()