diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index ec6239207..f90f4f4ac 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -9,7 +9,7 @@ plugins: lint: enabled: - checkov@3.2.524 - - renovate@43.139.6 + - renovate@43.141.0 - prettier@3.8.3 - trufflehog@3.95.2 - yamllint@1.38.0 diff --git a/bin/show-unmerged-prs.sh b/bin/show-unmerged-prs.sh new file mode 100755 index 000000000..2a76f63d6 --- /dev/null +++ b/bin/show-unmerged-prs.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# Script to show commits in develop that are not in master +# with their associated PR info and commit hashes +# +# Usage: +# ./show-unmerged-prs.sh # Show all unmerged commits +# ./show-unmerged-prs.sh --bugfix # Show only bugfix-labeled PRs + +set -e + +REPO="firmware" +OWNER="meshtastic" +BASE_BRANCH="master" +HEAD_BRANCH="develop" +LIMIT=100 +FILTER_LABEL="" + +# Parse arguments +for arg in "$@"; do + case $arg in + --bugfix) + FILTER_LABEL="bugfix" + shift + ;; + --feature) + FILTER_LABEL="feature" + shift + ;; + --help) + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --bugfix Show only PRs labeled with 'bugfix'" + echo " --feature Show only PRs labeled with 'feature'" + echo " --help Show this help message" + exit 0 + ;; + esac +done + +if [ -n "$FILTER_LABEL" ]; then + echo "Fetching commits in $HEAD_BRANCH that are not in $BASE_BRANCH (filtered by label: $FILTER_LABEL)..." +else + echo "Fetching commits in $HEAD_BRANCH that are not in $BASE_BRANCH..." +fi +echo "" + +# Check if gh CLI is available +if ! command -v gh &> /dev/null; then + echo "ERROR: GitHub CLI (gh) not found. Please install it first." + echo "Visit: https://cli.github.com/" + exit 1 +fi + +# Get commits in develop that are not in master +# For each commit, try to find associated PR +git fetch origin develop master 2>/dev/null || true + +# Use git to get the list of commits +commits=$(git log --pretty=format:"%H|%s" origin/master..origin/develop | head -n $LIMIT) + +count=0 +displayed=0 +echo "Commits in $HEAD_BRANCH not in $BASE_BRANCH:" +echo "==============================================" +echo "" + +while IFS='|' read -r hash subject; do + ((count++)) + + # Try to find the PR for this commit + # Extract PR number, title, description, and labels + pr_response=$(gh api -X GET "/repos/$OWNER/$REPO/commits/$hash/pulls" \ + -H "Accept: application/vnd.github.v3+json" 2>/dev/null | \ + jq -r '.[0] | "\(.number)|\(.title)|\(.body // "No description")|\(.labels | map(.name) | join(","))"' 2>/dev/null || echo "||||") + + if [ -z "$pr_response" ] || [ "$pr_response" = "||||" ]; then + # If no PR found, skip if filter is active, otherwise show the commit + if [ -z "$FILTER_LABEL" ]; then + ((displayed++)) + echo "[$displayed] Commit: $hash" + echo " Subject: $subject" + echo " PR: Not found in GitHub" + echo "" + fi + else + IFS='|' read -r pr_num pr_title pr_desc pr_labels <<< "$pr_response" + + # Check if filter matches + if [ -n "$FILTER_LABEL" ]; then + # Only show if the label is in the labels list + if ! echo "$pr_labels" | grep -q "$FILTER_LABEL"; then + continue + fi + fi + + ((displayed++)) + echo "[$displayed] PR #$pr_num - $pr_title" + echo " Commit: $hash" + if [ -n "$pr_desc" ] && [ "$pr_desc" != "No description" ]; then + # Truncate description to 200 chars + desc_short="${pr_desc:0:200}" + [ ${#pr_desc} -gt 200 ] && desc_short+="..." + echo " Description: $desc_short" + fi + if [ -n "$pr_labels" ] && [ "$pr_labels" != "" ]; then + echo " Labels: $pr_labels" + fi + echo "" + fi +done <<< "$commits" + +echo "" +if [ -n "$FILTER_LABEL" ]; then + echo "Done. Showing $displayed PRs with label '$FILTER_LABEL' from $displayed commits checked." +else + echo "Done. Showing $displayed commits from $HEAD_BRANCH not in $BASE_BRANCH." +fi diff --git a/platformio.ini b/platformio.ini index 48f534d5d..7f39cc5f0 100644 --- a/platformio.ini +++ b/platformio.ini @@ -29,6 +29,7 @@ build_flags = -Wno-missing-field-initializers -DUSE_THREAD_NAMES -DTINYGPS_OPTION_NO_CUSTOM_FIELDS -DPB_ENABLE_MALLOC=1 + -DPB_VALIDATE_UTF8=1 -DRADIOLIB_EXCLUDE_CC1101=1 -DRADIOLIB_EXCLUDE_NRF24=1 -DRADIOLIB_EXCLUDE_RF69=1 @@ -66,7 +67,7 @@ monitor_speed = 115200 monitor_filters = direct lib_deps = # renovate: datasource=git-refs depName=meshtastic-esp8266-oled-ssd1306 packageName=https://github.com/meshtastic/esp8266-oled-ssd1306 gitBranch=master - https://github.com/meshtastic/esp8266-oled-ssd1306/archive/21e484f409cde18d44012caef84c244eb5ca28f3.zip + https://github.com/meshtastic/esp8266-oled-ssd1306/archive/6bfd1f135e1ebe37afd6050bb4b9964cea3fcfda.zip # renovate: datasource=git-refs depName=meshtastic-OneButton packageName=https://github.com/meshtastic/OneButton gitBranch=master https://github.com/meshtastic/OneButton/archive/fa352d668c53f290cfa480a5f79ad422cd828c70.zip # renovate: datasource=git-refs depName=meshtastic-arduino-fsm packageName=https://github.com/meshtastic/arduino-fsm gitBranch=master diff --git a/protobufs b/protobufs index d004f503b..249a80855 160000 --- a/protobufs +++ b/protobufs @@ -1 +1 @@ -Subproject commit d004f503bbf3498fd689013a794e2a0e384b3f19 +Subproject commit 249a80855a2adb76fb0904dac8bf6285d45f330f diff --git a/src/mesh/HardwareRNG.cpp b/src/mesh/HardwareRNG.cpp index f5a805487..da9fd468c 100644 --- a/src/mesh/HardwareRNG.cpp +++ b/src/mesh/HardwareRNG.cpp @@ -48,8 +48,10 @@ bool mixWithLoRaEntropy(uint8_t *buffer, size_t length) // and return false so callers know no extra mixing occurred. RadioLibInterface *radio = RadioLibInterface::instance; if (!radio) { - // Intentionally silent: this path runs during portduinoSetup() before the - // console/SerialConsole is initialized, so LOG_* here would dereference a null pointer. + // This path can run during portduinoSetup() before the console is initialized. +#ifndef PIO_UNIT_TESTING + LOG_ERROR("No radio instance available to provide entropy"); +#endif return false; } diff --git a/src/mesh/TypeConversions.cpp b/src/mesh/TypeConversions.cpp index 201a703e2..3798daf28 100644 --- a/src/mesh/TypeConversions.cpp +++ b/src/mesh/TypeConversions.cpp @@ -1,6 +1,7 @@ #include "TypeConversions.h" #include "mesh/generated/meshtastic/deviceonly.pb.h" #include "mesh/generated/meshtastic/mesh.pb.h" +#include "meshUtils.h" meshtastic_NodeInfo TypeConversions::ConvertToNodeInfo(const meshtastic_NodeInfoLite *lite) { @@ -82,8 +83,10 @@ meshtastic_UserLite TypeConversions::ConvertToUserLite(meshtastic_User user) strncpy(lite.long_name, user.long_name, sizeof(lite.long_name)); lite.long_name[sizeof(lite.long_name) - 1] = '\0'; + sanitizeUtf8(lite.long_name, sizeof(lite.long_name)); strncpy(lite.short_name, user.short_name, sizeof(lite.short_name)); lite.short_name[sizeof(lite.short_name) - 1] = '\0'; + sanitizeUtf8(lite.short_name, sizeof(lite.short_name)); lite.hw_model = user.hw_model; lite.role = user.role; lite.is_licensed = user.is_licensed; @@ -102,8 +105,10 @@ meshtastic_User TypeConversions::ConvertToUser(uint32_t nodeNum, meshtastic_User snprintf(user.id, sizeof(user.id), "!%08x", nodeNum); strncpy(user.long_name, lite.long_name, sizeof(user.long_name)); user.long_name[sizeof(user.long_name) - 1] = '\0'; + sanitizeUtf8(user.long_name, sizeof(user.long_name)); strncpy(user.short_name, lite.short_name, sizeof(user.short_name)); user.short_name[sizeof(user.short_name) - 1] = '\0'; + sanitizeUtf8(user.short_name, sizeof(user.short_name)); user.hw_model = lite.hw_model; user.role = lite.role; user.is_licensed = lite.is_licensed; diff --git a/src/mesh/generated/meshtastic/config.pb.h b/src/mesh/generated/meshtastic/config.pb.h index 7e71f3f7a..0e14334d5 100644 --- a/src/mesh/generated/meshtastic/config.pb.h +++ b/src/mesh/generated/meshtastic/config.pb.h @@ -289,7 +289,14 @@ typedef enum _meshtastic_Config_LoRaConfig_RegionCode { /* ITU Region 1 Amateur Radio 2m band (144-146 MHz) */ meshtastic_Config_LoRaConfig_RegionCode_ITU1_2M = 27, /* ITU Region 2 / 3 Amateur Radio 2m band (144-148 MHz) */ - meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M = 28 + meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M = 28, + /* EU 866MHz band (Band no. 47b of 2006/771/EC and subsequent amendments) for Non-specific short-range devices (SRD) */ + meshtastic_Config_LoRaConfig_RegionCode_EU_866 = 29, + /* EU 874MHz and 917MHz bands (Band no. 1 and 4 of 2022/172/EC and subsequent amendments) for Non-specific short-range devices (SRD) */ + meshtastic_Config_LoRaConfig_RegionCode_EU_874 = 30, + meshtastic_Config_LoRaConfig_RegionCode_EU_917 = 31, + /* EU 868MHz band, with narrow presets */ + meshtastic_Config_LoRaConfig_RegionCode_EU_N_868 = 32 } meshtastic_Config_LoRaConfig_RegionCode; /* Standard predefined channel settings @@ -319,7 +326,24 @@ typedef enum _meshtastic_Config_LoRaConfig_ModemPreset { meshtastic_Config_LoRaConfig_ModemPreset_SHORT_TURBO = 8, /* Long Range - Turbo This preset performs similarly to LongFast, but with 500Khz bandwidth. */ - meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO = 9 + meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO = 9, + /* Lite Fast + Medium range preset optimized for EU 866MHz SRD band with 125kHz bandwidth. + Comparable link budget to MEDIUM_FAST but compliant with Band no. 47b of 2006/771/EC. */ + meshtastic_Config_LoRaConfig_ModemPreset_LITE_FAST = 10, + /* Lite Slow + Medium-to-moderate range preset optimized for EU 866MHz SRD band with 125kHz bandwidth. + Comparable link budget to LONG_FAST but compliant with Band no. 47b of 2006/771/EC. */ + meshtastic_Config_LoRaConfig_ModemPreset_LITE_SLOW = 11, + /* Narrow Fast + Medium-to-moderate range preset optimized for EU 868MHz band with 62.5kHz bandwidth. + Comparable link budget to SHORT_SLOW, but with half the data rate. + Intended to avoid interference with other devices. */ + meshtastic_Config_LoRaConfig_ModemPreset_NARROW_FAST = 12, + /* Narrow Slow + Moderate range preset optimized for EU 868MHz band with 62.5kHz bandwidth. + Comparable link budget and data rate to LONG_FAST. */ + meshtastic_Config_LoRaConfig_ModemPreset_NARROW_SLOW = 13 } meshtastic_Config_LoRaConfig_ModemPreset; typedef enum _meshtastic_Config_LoRaConfig_FEM_LNA_Mode { @@ -706,12 +730,12 @@ extern "C" { #define _meshtastic_Config_DisplayConfig_CompassOrientation_ARRAYSIZE ((meshtastic_Config_DisplayConfig_CompassOrientation)(meshtastic_Config_DisplayConfig_CompassOrientation_DEGREES_270_INVERTED+1)) #define _meshtastic_Config_LoRaConfig_RegionCode_MIN meshtastic_Config_LoRaConfig_RegionCode_UNSET -#define _meshtastic_Config_LoRaConfig_RegionCode_MAX meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M -#define _meshtastic_Config_LoRaConfig_RegionCode_ARRAYSIZE ((meshtastic_Config_LoRaConfig_RegionCode)(meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M+1)) +#define _meshtastic_Config_LoRaConfig_RegionCode_MAX meshtastic_Config_LoRaConfig_RegionCode_EU_N_868 +#define _meshtastic_Config_LoRaConfig_RegionCode_ARRAYSIZE ((meshtastic_Config_LoRaConfig_RegionCode)(meshtastic_Config_LoRaConfig_RegionCode_EU_N_868+1)) #define _meshtastic_Config_LoRaConfig_ModemPreset_MIN meshtastic_Config_LoRaConfig_ModemPreset_LONG_FAST -#define _meshtastic_Config_LoRaConfig_ModemPreset_MAX meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO -#define _meshtastic_Config_LoRaConfig_ModemPreset_ARRAYSIZE ((meshtastic_Config_LoRaConfig_ModemPreset)(meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO+1)) +#define _meshtastic_Config_LoRaConfig_ModemPreset_MAX meshtastic_Config_LoRaConfig_ModemPreset_NARROW_SLOW +#define _meshtastic_Config_LoRaConfig_ModemPreset_ARRAYSIZE ((meshtastic_Config_LoRaConfig_ModemPreset)(meshtastic_Config_LoRaConfig_ModemPreset_NARROW_SLOW+1)) #define _meshtastic_Config_LoRaConfig_FEM_LNA_Mode_MIN meshtastic_Config_LoRaConfig_FEM_LNA_Mode_DISABLED #define _meshtastic_Config_LoRaConfig_FEM_LNA_Mode_MAX meshtastic_Config_LoRaConfig_FEM_LNA_Mode_NOT_PRESENT diff --git a/src/mesh/generated/meshtastic/mesh.pb.h b/src/mesh/generated/meshtastic/mesh.pb.h index d7ff32cb4..f22825030 100644 --- a/src/mesh/generated/meshtastic/mesh.pb.h +++ b/src/mesh/generated/meshtastic/mesh.pb.h @@ -315,6 +315,8 @@ typedef enum _meshtastic_HardwareModel { meshtastic_HardwareModel_THINKNODE_M7 = 129, meshtastic_HardwareModel_THINKNODE_M8 = 130, meshtastic_HardwareModel_THINKNODE_M9 = 131, + /* The Heltec-V4-R8 uses an ESP32S3R8 chip, plus an SX1262. */ + meshtastic_HardwareModel_HELTEC_V4_R8 = 132, /* ------------------------------------------------------------------------------------------------------------------------------------------ Reserved ID For developing private Ports. These will show up in live traffic sparsely, so we can use a high number. Keep it within 8 bits. ------------------------------------------------------------------------------------------------------------------------------------------ */ diff --git a/src/meshUtils.cpp b/src/meshUtils.cpp index 1a4497101..89c548887 100644 --- a/src/meshUtils.cpp +++ b/src/meshUtils.cpp @@ -117,4 +117,93 @@ size_t pb_string_length(const char *str, size_t max_len) } } return len; +} + +bool sanitizeUtf8(char *buf, size_t bufSize) +{ + if (!buf || bufSize == 0) + return false; + + // Ensure null-terminated within buffer + buf[bufSize - 1] = '\0'; + + bool replaced = false; + size_t i = 0; + size_t len = strlen(buf); + + while (i < len) { + uint8_t b = (uint8_t)buf[i]; + + // Determine expected sequence length from lead byte + size_t seqLen; + uint32_t minCodepoint; + if (b <= 0x7F) { + // ASCII — valid single byte + i++; + continue; + } else if ((b & 0xE0) == 0xC0) { + seqLen = 2; + minCodepoint = 0x80; // Reject overlong + } else if ((b & 0xF0) == 0xE0) { + seqLen = 3; + minCodepoint = 0x800; + } else if ((b & 0xF8) == 0xF0) { + seqLen = 4; + minCodepoint = 0x10000; + } else { + // Invalid lead byte (0x80-0xBF or 0xF8+) + buf[i] = '?'; + replaced = true; + i++; + continue; + } + + // Check that we have enough bytes remaining + if (i + seqLen > len) { + // Truncated sequence at end of string — replace remaining bytes + for (size_t j = i; j < len; j++) { + buf[j] = '?'; + } + replaced = true; + break; + } + + // Validate continuation bytes (must be 10xxxxxx) + bool valid = true; + for (size_t j = 1; j < seqLen; j++) { + if (((uint8_t)buf[i + j] & 0xC0) != 0x80) { + valid = false; + break; + } + } + + if (valid) { + // Decode codepoint to check for overlong encodings and surrogates + uint32_t cp = 0; + if (seqLen == 2) + cp = b & 0x1F; + else if (seqLen == 3) + cp = b & 0x0F; + else + cp = b & 0x07; + for (size_t j = 1; j < seqLen; j++) + cp = (cp << 6) | ((uint8_t)buf[i + j] & 0x3F); + + if (cp < minCodepoint || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) { + // Overlong encoding, out of Unicode range, or surrogate half + valid = false; + } + } + + if (valid) { + i += seqLen; + } else { + // Replace only the lead byte; continuation bytes will be caught on next iteration + buf[i] = '?'; + replaced = true; + i++; + } + } + + return replaced; } \ No newline at end of file diff --git a/src/meshUtils.h b/src/meshUtils.h index fe94ead2f..4c450b3c4 100644 --- a/src/meshUtils.h +++ b/src/meshUtils.h @@ -56,6 +56,10 @@ const std::string vformat(const char *const zcFormat, ...); // Get actual string length for nanopb char array fields. size_t pb_string_length(const char *str, size_t max_len); +// Sanitize a fixed-size char buffer in-place by replacing invalid UTF-8 sequences with '?'. +// Ensures the result is null-terminated within bufSize. Returns true if any bytes were replaced. +bool sanitizeUtf8(char *buf, size_t bufSize); + /// Calculate 2^n without calling pow() - used for spreading factor and other calculations inline uint32_t pow_of_2(uint32_t n) { diff --git a/src/modules/AdminModule.cpp b/src/modules/AdminModule.cpp index 852a257e5..865ac38f5 100644 --- a/src/modules/AdminModule.cpp +++ b/src/modules/AdminModule.cpp @@ -626,10 +626,14 @@ void AdminModule::handleSetOwner(const meshtastic_User &o) if (*o.long_name) { changed |= strcmp(owner.long_name, o.long_name); strncpy(owner.long_name, o.long_name, sizeof(owner.long_name)); + owner.long_name[sizeof(owner.long_name) - 1] = '\0'; + sanitizeUtf8(owner.long_name, sizeof(owner.long_name)); } if (*o.short_name) { changed |= strcmp(owner.short_name, o.short_name); strncpy(owner.short_name, o.short_name, sizeof(owner.short_name)); + owner.short_name[sizeof(owner.short_name) - 1] = '\0'; + sanitizeUtf8(owner.short_name, sizeof(owner.short_name)); } snprintf(owner.id, sizeof(owner.id), "!%08x", nodeDB->getNodeNum()); @@ -1430,7 +1434,11 @@ void AdminModule::handleSetHamMode(const meshtastic_HamParameters &p) // Set call sign and override lora limitations for licensed use strncpy(owner.long_name, p.call_sign, sizeof(owner.long_name)); + owner.long_name[sizeof(owner.long_name) - 1] = '\0'; + sanitizeUtf8(owner.long_name, sizeof(owner.long_name)); strncpy(owner.short_name, p.short_name, sizeof(owner.short_name)); + owner.short_name[sizeof(owner.short_name) - 1] = '\0'; + sanitizeUtf8(owner.short_name, sizeof(owner.short_name)); owner.is_licensed = true; config.lora.override_duty_cycle = true; config.lora.tx_power = p.tx_power; diff --git a/src/platform/nrf52/NRF52Bluetooth.cpp b/src/platform/nrf52/NRF52Bluetooth.cpp index 307e35b0c..52e45cccc 100644 --- a/src/platform/nrf52/NRF52Bluetooth.cpp +++ b/src/platform/nrf52/NRF52Bluetooth.cpp @@ -1,6 +1,7 @@ #include "NRF52Bluetooth.h" #include "BLEDfuSecure.h" #include "BluetoothCommon.h" +#include "HardwareRNG.h" #include "PowerFSM.h" #include "configuration.h" #include "main.h" @@ -272,9 +273,13 @@ void NRF52Bluetooth::setup() Bluefruit.setTxPower(NRF52_BLE_TX_POWER); #endif if (config.bluetooth.mode != meshtastic_Config_BluetoothConfig_PairingMode_NO_PIN) { - configuredPasskey = config.bluetooth.mode == meshtastic_Config_BluetoothConfig_PairingMode_FIXED_PIN - ? config.bluetooth.fixed_pin - : random(100000, 999999); + if (config.bluetooth.mode == meshtastic_Config_BluetoothConfig_PairingMode_FIXED_PIN) { + configuredPasskey = config.bluetooth.fixed_pin; + } else { + uint32_t hwrand = 0; + HardwareRNG::fill(reinterpret_cast(&hwrand), sizeof(hwrand)); + configuredPasskey = hwrand % 900000u + 100000u; + } auto pinString = std::to_string(configuredPasskey); LOG_INFO("Bluetooth pin set to '%i'", configuredPasskey); Bluefruit.Security.setPIN(pinString.c_str()); diff --git a/src/power.h b/src/power.h index 4b5ef609d..90ada889d 100644 --- a/src/power.h +++ b/src/power.h @@ -102,6 +102,14 @@ class Power : public concurrency::OSThread const uint16_t OCV[11] = {OCV_ARRAY}; bool isLowBattery() { return low_voltage_counter >= 10; }; +#ifdef ARCH_ESP32 + int beforeLightSleep(void *unused); + int afterLightSleep(esp_sleep_wakeup_cause_t cause); +#endif + + void attachPowerInterrupts(); + void detachPowerInterrupts(); + #ifdef ARCH_ESP32 int beforeLightSleep(void *unused); int afterLightSleep(esp_sleep_wakeup_cause_t cause); diff --git a/test/test_utf8/test_main.cpp b/test/test_utf8/test_main.cpp new file mode 100644 index 000000000..7ac64653d --- /dev/null +++ b/test/test_utf8/test_main.cpp @@ -0,0 +1,195 @@ +#include "meshUtils.h" +#include +#include + +void setUp(void) {} +void tearDown(void) {} + +// --- Valid UTF-8 should pass through unchanged --- + +void test_ascii_unchanged() +{ + char buf[32] = "Hello World"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("Hello World", buf); +} + +void test_valid_2byte_unchanged() +{ + // "café" — é is C3 A9 + char buf[16] = "caf\xC3\xA9"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("caf\xC3\xA9", buf); +} + +void test_valid_3byte_unchanged() +{ + // "€" is E2 82 AC + char buf[16] = "\xE2\x82\xAC"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("\xE2\x82\xAC", buf); +} + +void test_valid_4byte_emoji_unchanged() +{ + // 🌙 is F0 9F 8C 99 + char buf[16] = "\xF0\x9F\x8C\x99"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("\xF0\x9F\x8C\x99", buf); +} + +void test_valid_mixed_unchanged() +{ + // "Hi 🌙!" — mix of ASCII and 4-byte + char buf[16] = "Hi \xF0\x9F\x8C\x99!"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("Hi \xF0\x9F\x8C\x99!", buf); +} + +void test_empty_string() +{ + char buf[8] = ""; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("", buf); +} + +// --- Invalid sequences observed in the wild --- + +void test_truncated_4byte_at_end() +{ + // Name with valid emoji 🌙 followed by a truncated 4-byte sequence + ASCII + char buf[32] = "Lunar Tower \xF0\x9F\x8C\x99\xF0\x9F\x97" + "4"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + // The 🌙 should be preserved; F0 9F 97 is an incomplete 4-byte sequence, + // '4' (0x34) is not a valid continuation byte + TEST_ASSERT_EQUAL_STRING("Lunar Tower \xF0\x9F\x8C\x99???4", buf); +} + +void test_lone_lead_bytes_without_continuations() +{ + // Mixed ASCII with stray multibyte lead bytes (E1, F3) lacking proper continuations + char buf[32] = "Mesht\xE1\xF3tic 37e2"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + // E1 expects 2 continuation bytes, but F3 is not a continuation → E1 replaced + // F3 expects 3 continuation bytes, 't','i','c' are not continuations → F3 replaced + TEST_ASSERT_EQUAL_STRING("Mesht??tic 37e2", buf); +} + +// --- Edge cases --- + +void test_bare_continuation_byte() +{ + // 0x80 alone is invalid (continuation byte with no lead) + char buf[8] = "\x80"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("?", buf); +} + +void test_overlong_2byte() +{ + // C0 AF is an overlong encoding of U+002F '/' + char buf[8] = "\xC0\xAF"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + // C0 is a 2-byte lead, AF is valid continuation, but codepoint 0x2F < 0x80 → overlong + // C0 replaced, AF (now bare continuation) also replaced + TEST_ASSERT_EQUAL_STRING("??", buf); +} + +void test_surrogate_half() +{ + // ED A0 80 encodes U+D800 (surrogate half — invalid in UTF-8) + char buf[8] = "\xED\xA0\x80"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("???", buf); +} + +void test_5byte_sequence_rejected() +{ + // F8 80 80 80 80 — 5-byte sequence, not valid UTF-8 + char buf[8] = "\xF8\x80\x80\x80\x80"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + // F8 is invalid lead (>= 0xF8), each 0x80 is bare continuation + TEST_ASSERT_EQUAL_STRING("?????", buf); +} + +void test_truncated_3byte_at_buffer_end() +{ + // Buffer is exactly 4 bytes: E2 82 then forced null at [3] + char buf[4]; + buf[0] = '\xE2'; + buf[1] = '\x82'; + buf[2] = '\0'; // String ends before the 3-byte sequence completes + buf[3] = '\0'; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("??", buf); +} + +void test_null_termination_enforced() +{ + // Fill buffer completely with no null terminator + char buf[5]; + memset(buf, 'A', sizeof(buf)); + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); + // Should be null-terminated and content preserved (all ASCII) + TEST_ASSERT_EQUAL_STRING("AAAA", buf); +} + +void test_null_buffer() +{ + TEST_ASSERT_FALSE(sanitizeUtf8(nullptr, 10)); +} + +void test_zero_size() +{ + char buf[4] = "Hi"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, 0)); + // Buffer should be untouched + TEST_ASSERT_EQUAL_STRING("Hi", buf); +} + +void test_valid_max_codepoint() +{ + // U+10FFFF = F4 8F BF BF (maximum valid Unicode codepoint) + char buf[8] = "\xF4\x8F\xBF\xBF"; + TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf))); + TEST_ASSERT_EQUAL_STRING("\xF4\x8F\xBF\xBF", buf); +} + +void test_above_max_codepoint() +{ + // U+110000 = F4 90 80 80 (just above maximum valid Unicode) + char buf[8] = "\xF4\x90\x80\x80"; + TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf))); +} + +int main(int argc, char **argv) +{ + UNITY_BEGIN(); + + // Valid UTF-8 passthrough + RUN_TEST(test_ascii_unchanged); + RUN_TEST(test_valid_2byte_unchanged); + RUN_TEST(test_valid_3byte_unchanged); + RUN_TEST(test_valid_4byte_emoji_unchanged); + RUN_TEST(test_valid_mixed_unchanged); + RUN_TEST(test_empty_string); + + // Invalid sequences observed in the wild + RUN_TEST(test_truncated_4byte_at_end); + RUN_TEST(test_lone_lead_bytes_without_continuations); + + // Edge cases + RUN_TEST(test_bare_continuation_byte); + RUN_TEST(test_overlong_2byte); + RUN_TEST(test_surrogate_half); + RUN_TEST(test_5byte_sequence_rejected); + RUN_TEST(test_truncated_3byte_at_buffer_end); + RUN_TEST(test_null_termination_enforced); + RUN_TEST(test_null_buffer); + RUN_TEST(test_zero_size); + RUN_TEST(test_valid_max_codepoint); + RUN_TEST(test_above_max_codepoint); + + return UNITY_END(); +}