mirror of
https://github.com/meshtastic/firmware.git
synced 2026-05-24 16:58:01 -04:00
Sane sanitization
This commit is contained in:
@@ -29,6 +29,7 @@ build_flags = -Wno-missing-field-initializers
|
||||
-DUSE_THREAD_NAMES
|
||||
-DTINYGPS_OPTION_NO_CUSTOM_FIELDS
|
||||
-DPB_ENABLE_MALLOC=1
|
||||
-DPB_VALIDATE_UTF8=1
|
||||
-DRADIOLIB_EXCLUDE_CC1101=1
|
||||
-DRADIOLIB_EXCLUDE_NRF24=1
|
||||
-DRADIOLIB_EXCLUDE_RF69=1
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "TypeConversions.h"
|
||||
#include "mesh/generated/meshtastic/deviceonly.pb.h"
|
||||
#include "mesh/generated/meshtastic/mesh.pb.h"
|
||||
#include "meshUtils.h"
|
||||
|
||||
meshtastic_NodeInfo TypeConversions::ConvertToNodeInfo(const meshtastic_NodeInfoLite *lite)
|
||||
{
|
||||
@@ -82,8 +83,10 @@ meshtastic_UserLite TypeConversions::ConvertToUserLite(meshtastic_User user)
|
||||
|
||||
strncpy(lite.long_name, user.long_name, sizeof(lite.long_name));
|
||||
lite.long_name[sizeof(lite.long_name) - 1] = '\0';
|
||||
sanitizeUtf8(lite.long_name, sizeof(lite.long_name));
|
||||
strncpy(lite.short_name, user.short_name, sizeof(lite.short_name));
|
||||
lite.short_name[sizeof(lite.short_name) - 1] = '\0';
|
||||
sanitizeUtf8(lite.short_name, sizeof(lite.short_name));
|
||||
lite.hw_model = user.hw_model;
|
||||
lite.role = user.role;
|
||||
lite.is_licensed = user.is_licensed;
|
||||
@@ -102,8 +105,10 @@ meshtastic_User TypeConversions::ConvertToUser(uint32_t nodeNum, meshtastic_User
|
||||
snprintf(user.id, sizeof(user.id), "!%08x", nodeNum);
|
||||
strncpy(user.long_name, lite.long_name, sizeof(user.long_name));
|
||||
user.long_name[sizeof(user.long_name) - 1] = '\0';
|
||||
sanitizeUtf8(user.long_name, sizeof(user.long_name));
|
||||
strncpy(user.short_name, lite.short_name, sizeof(user.short_name));
|
||||
user.short_name[sizeof(user.short_name) - 1] = '\0';
|
||||
sanitizeUtf8(user.short_name, sizeof(user.short_name));
|
||||
user.hw_model = lite.hw_model;
|
||||
user.role = lite.role;
|
||||
user.is_licensed = lite.is_licensed;
|
||||
|
||||
@@ -117,4 +117,93 @@ size_t pb_string_length(const char *str, size_t max_len)
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
bool sanitizeUtf8(char *buf, size_t bufSize)
|
||||
{
|
||||
if (!buf || bufSize == 0)
|
||||
return false;
|
||||
|
||||
// Ensure null-terminated within buffer
|
||||
buf[bufSize - 1] = '\0';
|
||||
|
||||
bool replaced = false;
|
||||
size_t i = 0;
|
||||
size_t len = strlen(buf);
|
||||
|
||||
while (i < len) {
|
||||
uint8_t b = (uint8_t)buf[i];
|
||||
|
||||
// Determine expected sequence length from lead byte
|
||||
size_t seqLen;
|
||||
uint32_t minCodepoint;
|
||||
if (b <= 0x7F) {
|
||||
// ASCII — valid single byte
|
||||
i++;
|
||||
continue;
|
||||
} else if ((b & 0xE0) == 0xC0) {
|
||||
seqLen = 2;
|
||||
minCodepoint = 0x80; // Reject overlong
|
||||
} else if ((b & 0xF0) == 0xE0) {
|
||||
seqLen = 3;
|
||||
minCodepoint = 0x800;
|
||||
} else if ((b & 0xF8) == 0xF0) {
|
||||
seqLen = 4;
|
||||
minCodepoint = 0x10000;
|
||||
} else {
|
||||
// Invalid lead byte (0x80-0xBF or 0xF8+)
|
||||
buf[i] = '?';
|
||||
replaced = true;
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check that we have enough bytes remaining
|
||||
if (i + seqLen > len) {
|
||||
// Truncated sequence at end of string — replace remaining bytes
|
||||
for (size_t j = i; j < len; j++) {
|
||||
buf[j] = '?';
|
||||
}
|
||||
replaced = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Validate continuation bytes (must be 10xxxxxx)
|
||||
bool valid = true;
|
||||
for (size_t j = 1; j < seqLen; j++) {
|
||||
if (((uint8_t)buf[i + j] & 0xC0) != 0x80) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid) {
|
||||
// Decode codepoint to check for overlong encodings and surrogates
|
||||
uint32_t cp = 0;
|
||||
if (seqLen == 2)
|
||||
cp = b & 0x1F;
|
||||
else if (seqLen == 3)
|
||||
cp = b & 0x0F;
|
||||
else
|
||||
cp = b & 0x07;
|
||||
for (size_t j = 1; j < seqLen; j++)
|
||||
cp = (cp << 6) | ((uint8_t)buf[i + j] & 0x3F);
|
||||
|
||||
if (cp < minCodepoint || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) {
|
||||
// Overlong encoding, out of Unicode range, or surrogate half
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid) {
|
||||
i += seqLen;
|
||||
} else {
|
||||
// Replace only the lead byte; continuation bytes will be caught on next iteration
|
||||
buf[i] = '?';
|
||||
replaced = true;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return replaced;
|
||||
}
|
||||
@@ -38,6 +38,10 @@ const std::string vformat(const char *const zcFormat, ...);
|
||||
// Get actual string length for nanopb char array fields.
|
||||
size_t pb_string_length(const char *str, size_t max_len);
|
||||
|
||||
// Sanitize a fixed-size char buffer in-place by replacing invalid UTF-8 sequences with '?'.
|
||||
// Ensures the result is null-terminated within bufSize. Returns true if any bytes were replaced.
|
||||
bool sanitizeUtf8(char *buf, size_t bufSize);
|
||||
|
||||
/// Calculate 2^n without calling pow() - used for spreading factor and other calculations
|
||||
inline uint32_t pow_of_2(uint32_t n)
|
||||
{
|
||||
|
||||
@@ -599,10 +599,14 @@ void AdminModule::handleSetOwner(const meshtastic_User &o)
|
||||
if (*o.long_name) {
|
||||
changed |= strcmp(owner.long_name, o.long_name);
|
||||
strncpy(owner.long_name, o.long_name, sizeof(owner.long_name));
|
||||
owner.long_name[sizeof(owner.long_name) - 1] = '\0';
|
||||
sanitizeUtf8(owner.long_name, sizeof(owner.long_name));
|
||||
}
|
||||
if (*o.short_name) {
|
||||
changed |= strcmp(owner.short_name, o.short_name);
|
||||
strncpy(owner.short_name, o.short_name, sizeof(owner.short_name));
|
||||
owner.short_name[sizeof(owner.short_name) - 1] = '\0';
|
||||
sanitizeUtf8(owner.short_name, sizeof(owner.short_name));
|
||||
}
|
||||
snprintf(owner.id, sizeof(owner.id), "!%08x", nodeDB->getNodeNum());
|
||||
|
||||
@@ -1400,7 +1404,11 @@ void AdminModule::handleSetHamMode(const meshtastic_HamParameters &p)
|
||||
|
||||
// Set call sign and override lora limitations for licensed use
|
||||
strncpy(owner.long_name, p.call_sign, sizeof(owner.long_name));
|
||||
owner.long_name[sizeof(owner.long_name) - 1] = '\0';
|
||||
sanitizeUtf8(owner.long_name, sizeof(owner.long_name));
|
||||
strncpy(owner.short_name, p.short_name, sizeof(owner.short_name));
|
||||
owner.short_name[sizeof(owner.short_name) - 1] = '\0';
|
||||
sanitizeUtf8(owner.short_name, sizeof(owner.short_name));
|
||||
owner.is_licensed = true;
|
||||
config.lora.override_duty_cycle = true;
|
||||
config.lora.tx_power = p.tx_power;
|
||||
|
||||
195
test/test_utf8/test_main.cpp
Normal file
195
test/test_utf8/test_main.cpp
Normal file
@@ -0,0 +1,195 @@
|
||||
#include "meshUtils.h"
|
||||
#include <cstring>
|
||||
#include <unity.h>
|
||||
|
||||
void setUp(void) {}
|
||||
void tearDown(void) {}
|
||||
|
||||
// --- Valid UTF-8 should pass through unchanged ---
|
||||
|
||||
void test_ascii_unchanged()
|
||||
{
|
||||
char buf[32] = "Hello World";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("Hello World", buf);
|
||||
}
|
||||
|
||||
void test_valid_2byte_unchanged()
|
||||
{
|
||||
// "café" — é is C3 A9
|
||||
char buf[16] = "caf\xC3\xA9";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("caf\xC3\xA9", buf);
|
||||
}
|
||||
|
||||
void test_valid_3byte_unchanged()
|
||||
{
|
||||
// "€" is E2 82 AC
|
||||
char buf[16] = "\xE2\x82\xAC";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("\xE2\x82\xAC", buf);
|
||||
}
|
||||
|
||||
void test_valid_4byte_emoji_unchanged()
|
||||
{
|
||||
// 🌙 is F0 9F 8C 99
|
||||
char buf[16] = "\xF0\x9F\x8C\x99";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("\xF0\x9F\x8C\x99", buf);
|
||||
}
|
||||
|
||||
void test_valid_mixed_unchanged()
|
||||
{
|
||||
// "Hi 🌙!" — mix of ASCII and 4-byte
|
||||
char buf[16] = "Hi \xF0\x9F\x8C\x99!";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("Hi \xF0\x9F\x8C\x99!", buf);
|
||||
}
|
||||
|
||||
void test_empty_string()
|
||||
{
|
||||
char buf[8] = "";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("", buf);
|
||||
}
|
||||
|
||||
// --- Invalid sequences observed in the wild ---
|
||||
|
||||
void test_truncated_4byte_at_end()
|
||||
{
|
||||
// Name with valid emoji 🌙 followed by a truncated 4-byte sequence + ASCII
|
||||
char buf[32] = "Lunar Tower \xF0\x9F\x8C\x99\xF0\x9F\x97"
|
||||
"4";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
// The 🌙 should be preserved; F0 9F 97 is an incomplete 4-byte sequence,
|
||||
// '4' (0x34) is not a valid continuation byte
|
||||
TEST_ASSERT_EQUAL_STRING("Lunar Tower \xF0\x9F\x8C\x99???4", buf);
|
||||
}
|
||||
|
||||
void test_lone_lead_bytes_without_continuations()
|
||||
{
|
||||
// Mixed ASCII with stray multibyte lead bytes (E1, F3) lacking proper continuations
|
||||
char buf[32] = "Mesht\xE1\xF3tic 37e2";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
// E1 expects 2 continuation bytes, but F3 is not a continuation → E1 replaced
|
||||
// F3 expects 3 continuation bytes, 't','i','c' are not continuations → F3 replaced
|
||||
TEST_ASSERT_EQUAL_STRING("Mesht??tic 37e2", buf);
|
||||
}
|
||||
|
||||
// --- Edge cases ---
|
||||
|
||||
void test_bare_continuation_byte()
|
||||
{
|
||||
// 0x80 alone is invalid (continuation byte with no lead)
|
||||
char buf[8] = "\x80";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("?", buf);
|
||||
}
|
||||
|
||||
void test_overlong_2byte()
|
||||
{
|
||||
// C0 AF is an overlong encoding of U+002F '/'
|
||||
char buf[8] = "\xC0\xAF";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
// C0 is a 2-byte lead, AF is valid continuation, but codepoint 0x2F < 0x80 → overlong
|
||||
// C0 replaced, AF (now bare continuation) also replaced
|
||||
TEST_ASSERT_EQUAL_STRING("??", buf);
|
||||
}
|
||||
|
||||
void test_surrogate_half()
|
||||
{
|
||||
// ED A0 80 encodes U+D800 (surrogate half — invalid in UTF-8)
|
||||
char buf[8] = "\xED\xA0\x80";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("???", buf);
|
||||
}
|
||||
|
||||
void test_5byte_sequence_rejected()
|
||||
{
|
||||
// F8 80 80 80 80 — 5-byte sequence, not valid UTF-8
|
||||
char buf[8] = "\xF8\x80\x80\x80\x80";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
// F8 is invalid lead (>= 0xF8), each 0x80 is bare continuation
|
||||
TEST_ASSERT_EQUAL_STRING("?????", buf);
|
||||
}
|
||||
|
||||
void test_truncated_3byte_at_buffer_end()
|
||||
{
|
||||
// Buffer is exactly 4 bytes: E2 82 then forced null at [3]
|
||||
char buf[4];
|
||||
buf[0] = '\xE2';
|
||||
buf[1] = '\x82';
|
||||
buf[2] = '\0'; // String ends before the 3-byte sequence completes
|
||||
buf[3] = '\0';
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("??", buf);
|
||||
}
|
||||
|
||||
void test_null_termination_enforced()
|
||||
{
|
||||
// Fill buffer completely with no null terminator
|
||||
char buf[5];
|
||||
memset(buf, 'A', sizeof(buf));
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
// Should be null-terminated and content preserved (all ASCII)
|
||||
TEST_ASSERT_EQUAL_STRING("AAAA", buf);
|
||||
}
|
||||
|
||||
void test_null_buffer()
|
||||
{
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(nullptr, 10));
|
||||
}
|
||||
|
||||
void test_zero_size()
|
||||
{
|
||||
char buf[4] = "Hi";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, 0));
|
||||
// Buffer should be untouched
|
||||
TEST_ASSERT_EQUAL_STRING("Hi", buf);
|
||||
}
|
||||
|
||||
void test_valid_max_codepoint()
|
||||
{
|
||||
// U+10FFFF = F4 8F BF BF (maximum valid Unicode codepoint)
|
||||
char buf[8] = "\xF4\x8F\xBF\xBF";
|
||||
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
TEST_ASSERT_EQUAL_STRING("\xF4\x8F\xBF\xBF", buf);
|
||||
}
|
||||
|
||||
void test_above_max_codepoint()
|
||||
{
|
||||
// U+110000 = F4 90 80 80 (just above maximum valid Unicode)
|
||||
char buf[8] = "\xF4\x90\x80\x80";
|
||||
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
UNITY_BEGIN();
|
||||
|
||||
// Valid UTF-8 passthrough
|
||||
RUN_TEST(test_ascii_unchanged);
|
||||
RUN_TEST(test_valid_2byte_unchanged);
|
||||
RUN_TEST(test_valid_3byte_unchanged);
|
||||
RUN_TEST(test_valid_4byte_emoji_unchanged);
|
||||
RUN_TEST(test_valid_mixed_unchanged);
|
||||
RUN_TEST(test_empty_string);
|
||||
|
||||
// Invalid sequences observed in the wild
|
||||
RUN_TEST(test_truncated_4byte_at_end);
|
||||
RUN_TEST(test_lone_lead_bytes_without_continuations);
|
||||
|
||||
// Edge cases
|
||||
RUN_TEST(test_bare_continuation_byte);
|
||||
RUN_TEST(test_overlong_2byte);
|
||||
RUN_TEST(test_surrogate_half);
|
||||
RUN_TEST(test_5byte_sequence_rejected);
|
||||
RUN_TEST(test_truncated_3byte_at_buffer_end);
|
||||
RUN_TEST(test_null_termination_enforced);
|
||||
RUN_TEST(test_null_buffer);
|
||||
RUN_TEST(test_zero_size);
|
||||
RUN_TEST(test_valid_max_codepoint);
|
||||
RUN_TEST(test_above_max_codepoint);
|
||||
|
||||
return UNITY_END();
|
||||
}
|
||||
Reference in New Issue
Block a user