Merge remote-tracking branch 'origin/master' into develop

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Ben Meadors
2026-04-25 06:41:38 -05:00
14 changed files with 475 additions and 14 deletions

View File

@@ -9,7 +9,7 @@ plugins:
lint:
enabled:
- checkov@3.2.524
- renovate@43.139.6
- renovate@43.141.0
- prettier@3.8.3
- trufflehog@3.95.2
- yamllint@1.38.0

118
bin/show-unmerged-prs.sh Executable file
View File

@@ -0,0 +1,118 @@
#!/bin/bash
# Script to show commits in develop that are not in master
# with their associated PR info and commit hashes
#
# Usage:
# ./show-unmerged-prs.sh # Show all unmerged commits
# ./show-unmerged-prs.sh --bugfix # Show only bugfix-labeled PRs
set -e
REPO="firmware"
OWNER="meshtastic"
BASE_BRANCH="master"
HEAD_BRANCH="develop"
LIMIT=100
FILTER_LABEL=""
# Parse arguments
for arg in "$@"; do
case $arg in
--bugfix)
FILTER_LABEL="bugfix"
shift
;;
--feature)
FILTER_LABEL="feature"
shift
;;
--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --bugfix Show only PRs labeled with 'bugfix'"
echo " --feature Show only PRs labeled with 'feature'"
echo " --help Show this help message"
exit 0
;;
esac
done
if [ -n "$FILTER_LABEL" ]; then
echo "Fetching commits in $HEAD_BRANCH that are not in $BASE_BRANCH (filtered by label: $FILTER_LABEL)..."
else
echo "Fetching commits in $HEAD_BRANCH that are not in $BASE_BRANCH..."
fi
echo ""
# Check if gh CLI is available
if ! command -v gh &> /dev/null; then
echo "ERROR: GitHub CLI (gh) not found. Please install it first."
echo "Visit: https://cli.github.com/"
exit 1
fi
# Get commits in develop that are not in master
# For each commit, try to find associated PR
git fetch origin develop master 2>/dev/null || true
# Use git to get the list of commits
commits=$(git log --pretty=format:"%H|%s" origin/master..origin/develop | head -n $LIMIT)
count=0
displayed=0
echo "Commits in $HEAD_BRANCH not in $BASE_BRANCH:"
echo "=============================================="
echo ""
while IFS='|' read -r hash subject; do
((count++))
# Try to find the PR for this commit
# Extract PR number, title, description, and labels
pr_response=$(gh api -X GET "/repos/$OWNER/$REPO/commits/$hash/pulls" \
-H "Accept: application/vnd.github.v3+json" 2>/dev/null | \
jq -r '.[0] | "\(.number)|\(.title)|\(.body // "No description")|\(.labels | map(.name) | join(","))"' 2>/dev/null || echo "||||")
if [ -z "$pr_response" ] || [ "$pr_response" = "||||" ]; then
# If no PR found, skip if filter is active, otherwise show the commit
if [ -z "$FILTER_LABEL" ]; then
((displayed++))
echo "[$displayed] Commit: $hash"
echo " Subject: $subject"
echo " PR: Not found in GitHub"
echo ""
fi
else
IFS='|' read -r pr_num pr_title pr_desc pr_labels <<< "$pr_response"
# Check if filter matches
if [ -n "$FILTER_LABEL" ]; then
# Only show if the label is in the labels list
if ! echo "$pr_labels" | grep -q "$FILTER_LABEL"; then
continue
fi
fi
((displayed++))
echo "[$displayed] PR #$pr_num - $pr_title"
echo " Commit: $hash"
if [ -n "$pr_desc" ] && [ "$pr_desc" != "No description" ]; then
# Truncate description to 200 chars
desc_short="${pr_desc:0:200}"
[ ${#pr_desc} -gt 200 ] && desc_short+="..."
echo " Description: $desc_short"
fi
if [ -n "$pr_labels" ] && [ "$pr_labels" != "" ]; then
echo " Labels: $pr_labels"
fi
echo ""
fi
done <<< "$commits"
echo ""
if [ -n "$FILTER_LABEL" ]; then
echo "Done. Showing $displayed PRs with label '$FILTER_LABEL' from $displayed commits checked."
else
echo "Done. Showing $displayed commits from $HEAD_BRANCH not in $BASE_BRANCH."
fi

View File

@@ -29,6 +29,7 @@ build_flags = -Wno-missing-field-initializers
-DUSE_THREAD_NAMES
-DTINYGPS_OPTION_NO_CUSTOM_FIELDS
-DPB_ENABLE_MALLOC=1
-DPB_VALIDATE_UTF8=1
-DRADIOLIB_EXCLUDE_CC1101=1
-DRADIOLIB_EXCLUDE_NRF24=1
-DRADIOLIB_EXCLUDE_RF69=1
@@ -66,7 +67,7 @@ monitor_speed = 115200
monitor_filters = direct
lib_deps =
# renovate: datasource=git-refs depName=meshtastic-esp8266-oled-ssd1306 packageName=https://github.com/meshtastic/esp8266-oled-ssd1306 gitBranch=master
https://github.com/meshtastic/esp8266-oled-ssd1306/archive/21e484f409cde18d44012caef84c244eb5ca28f3.zip
https://github.com/meshtastic/esp8266-oled-ssd1306/archive/6bfd1f135e1ebe37afd6050bb4b9964cea3fcfda.zip
# renovate: datasource=git-refs depName=meshtastic-OneButton packageName=https://github.com/meshtastic/OneButton gitBranch=master
https://github.com/meshtastic/OneButton/archive/fa352d668c53f290cfa480a5f79ad422cd828c70.zip
# renovate: datasource=git-refs depName=meshtastic-arduino-fsm packageName=https://github.com/meshtastic/arduino-fsm gitBranch=master

View File

@@ -48,8 +48,10 @@ bool mixWithLoRaEntropy(uint8_t *buffer, size_t length)
// and return false so callers know no extra mixing occurred.
RadioLibInterface *radio = RadioLibInterface::instance;
if (!radio) {
// Intentionally silent: this path runs during portduinoSetup() before the
// console/SerialConsole is initialized, so LOG_* here would dereference a null pointer.
// This path can run during portduinoSetup() before the console is initialized.
#ifndef PIO_UNIT_TESTING
LOG_ERROR("No radio instance available to provide entropy");
#endif
return false;
}

View File

@@ -1,6 +1,7 @@
#include "TypeConversions.h"
#include "mesh/generated/meshtastic/deviceonly.pb.h"
#include "mesh/generated/meshtastic/mesh.pb.h"
#include "meshUtils.h"
meshtastic_NodeInfo TypeConversions::ConvertToNodeInfo(const meshtastic_NodeInfoLite *lite)
{
@@ -82,8 +83,10 @@ meshtastic_UserLite TypeConversions::ConvertToUserLite(meshtastic_User user)
strncpy(lite.long_name, user.long_name, sizeof(lite.long_name));
lite.long_name[sizeof(lite.long_name) - 1] = '\0';
sanitizeUtf8(lite.long_name, sizeof(lite.long_name));
strncpy(lite.short_name, user.short_name, sizeof(lite.short_name));
lite.short_name[sizeof(lite.short_name) - 1] = '\0';
sanitizeUtf8(lite.short_name, sizeof(lite.short_name));
lite.hw_model = user.hw_model;
lite.role = user.role;
lite.is_licensed = user.is_licensed;
@@ -102,8 +105,10 @@ meshtastic_User TypeConversions::ConvertToUser(uint32_t nodeNum, meshtastic_User
snprintf(user.id, sizeof(user.id), "!%08x", nodeNum);
strncpy(user.long_name, lite.long_name, sizeof(user.long_name));
user.long_name[sizeof(user.long_name) - 1] = '\0';
sanitizeUtf8(user.long_name, sizeof(user.long_name));
strncpy(user.short_name, lite.short_name, sizeof(user.short_name));
user.short_name[sizeof(user.short_name) - 1] = '\0';
sanitizeUtf8(user.short_name, sizeof(user.short_name));
user.hw_model = lite.hw_model;
user.role = lite.role;
user.is_licensed = lite.is_licensed;

View File

@@ -289,7 +289,14 @@ typedef enum _meshtastic_Config_LoRaConfig_RegionCode {
/* ITU Region 1 Amateur Radio 2m band (144-146 MHz) */
meshtastic_Config_LoRaConfig_RegionCode_ITU1_2M = 27,
/* ITU Region 2 / 3 Amateur Radio 2m band (144-148 MHz) */
meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M = 28
meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M = 28,
/* EU 866MHz band (Band no. 47b of 2006/771/EC and subsequent amendments) for Non-specific short-range devices (SRD) */
meshtastic_Config_LoRaConfig_RegionCode_EU_866 = 29,
/* EU 874MHz and 917MHz bands (Band no. 1 and 4 of 2022/172/EC and subsequent amendments) for Non-specific short-range devices (SRD) */
meshtastic_Config_LoRaConfig_RegionCode_EU_874 = 30,
meshtastic_Config_LoRaConfig_RegionCode_EU_917 = 31,
/* EU 868MHz band, with narrow presets */
meshtastic_Config_LoRaConfig_RegionCode_EU_N_868 = 32
} meshtastic_Config_LoRaConfig_RegionCode;
/* Standard predefined channel settings
@@ -319,7 +326,24 @@ typedef enum _meshtastic_Config_LoRaConfig_ModemPreset {
meshtastic_Config_LoRaConfig_ModemPreset_SHORT_TURBO = 8,
/* Long Range - Turbo
This preset performs similarly to LongFast, but with 500Khz bandwidth. */
meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO = 9
meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO = 9,
/* Lite Fast
Medium range preset optimized for EU 866MHz SRD band with 125kHz bandwidth.
Comparable link budget to MEDIUM_FAST but compliant with Band no. 47b of 2006/771/EC. */
meshtastic_Config_LoRaConfig_ModemPreset_LITE_FAST = 10,
/* Lite Slow
Medium-to-moderate range preset optimized for EU 866MHz SRD band with 125kHz bandwidth.
Comparable link budget to LONG_FAST but compliant with Band no. 47b of 2006/771/EC. */
meshtastic_Config_LoRaConfig_ModemPreset_LITE_SLOW = 11,
/* Narrow Fast
Medium-to-moderate range preset optimized for EU 868MHz band with 62.5kHz bandwidth.
Comparable link budget to SHORT_SLOW, but with half the data rate.
Intended to avoid interference with other devices. */
meshtastic_Config_LoRaConfig_ModemPreset_NARROW_FAST = 12,
/* Narrow Slow
Moderate range preset optimized for EU 868MHz band with 62.5kHz bandwidth.
Comparable link budget and data rate to LONG_FAST. */
meshtastic_Config_LoRaConfig_ModemPreset_NARROW_SLOW = 13
} meshtastic_Config_LoRaConfig_ModemPreset;
typedef enum _meshtastic_Config_LoRaConfig_FEM_LNA_Mode {
@@ -706,12 +730,12 @@ extern "C" {
#define _meshtastic_Config_DisplayConfig_CompassOrientation_ARRAYSIZE ((meshtastic_Config_DisplayConfig_CompassOrientation)(meshtastic_Config_DisplayConfig_CompassOrientation_DEGREES_270_INVERTED+1))
#define _meshtastic_Config_LoRaConfig_RegionCode_MIN meshtastic_Config_LoRaConfig_RegionCode_UNSET
#define _meshtastic_Config_LoRaConfig_RegionCode_MAX meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M
#define _meshtastic_Config_LoRaConfig_RegionCode_ARRAYSIZE ((meshtastic_Config_LoRaConfig_RegionCode)(meshtastic_Config_LoRaConfig_RegionCode_ITU23_2M+1))
#define _meshtastic_Config_LoRaConfig_RegionCode_MAX meshtastic_Config_LoRaConfig_RegionCode_EU_N_868
#define _meshtastic_Config_LoRaConfig_RegionCode_ARRAYSIZE ((meshtastic_Config_LoRaConfig_RegionCode)(meshtastic_Config_LoRaConfig_RegionCode_EU_N_868+1))
#define _meshtastic_Config_LoRaConfig_ModemPreset_MIN meshtastic_Config_LoRaConfig_ModemPreset_LONG_FAST
#define _meshtastic_Config_LoRaConfig_ModemPreset_MAX meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO
#define _meshtastic_Config_LoRaConfig_ModemPreset_ARRAYSIZE ((meshtastic_Config_LoRaConfig_ModemPreset)(meshtastic_Config_LoRaConfig_ModemPreset_LONG_TURBO+1))
#define _meshtastic_Config_LoRaConfig_ModemPreset_MAX meshtastic_Config_LoRaConfig_ModemPreset_NARROW_SLOW
#define _meshtastic_Config_LoRaConfig_ModemPreset_ARRAYSIZE ((meshtastic_Config_LoRaConfig_ModemPreset)(meshtastic_Config_LoRaConfig_ModemPreset_NARROW_SLOW+1))
#define _meshtastic_Config_LoRaConfig_FEM_LNA_Mode_MIN meshtastic_Config_LoRaConfig_FEM_LNA_Mode_DISABLED
#define _meshtastic_Config_LoRaConfig_FEM_LNA_Mode_MAX meshtastic_Config_LoRaConfig_FEM_LNA_Mode_NOT_PRESENT

View File

@@ -315,6 +315,8 @@ typedef enum _meshtastic_HardwareModel {
meshtastic_HardwareModel_THINKNODE_M7 = 129,
meshtastic_HardwareModel_THINKNODE_M8 = 130,
meshtastic_HardwareModel_THINKNODE_M9 = 131,
/* The Heltec-V4-R8 uses an ESP32S3R8 chip, plus an SX1262. */
meshtastic_HardwareModel_HELTEC_V4_R8 = 132,
/* ------------------------------------------------------------------------------------------------------------------------------------------
Reserved ID For developing private Ports. These will show up in live traffic sparsely, so we can use a high number. Keep it within 8 bits.
------------------------------------------------------------------------------------------------------------------------------------------ */

View File

@@ -117,4 +117,93 @@ size_t pb_string_length(const char *str, size_t max_len)
}
}
return len;
}
bool sanitizeUtf8(char *buf, size_t bufSize)
{
if (!buf || bufSize == 0)
return false;
// Ensure null-terminated within buffer
buf[bufSize - 1] = '\0';
bool replaced = false;
size_t i = 0;
size_t len = strlen(buf);
while (i < len) {
uint8_t b = (uint8_t)buf[i];
// Determine expected sequence length from lead byte
size_t seqLen;
uint32_t minCodepoint;
if (b <= 0x7F) {
// ASCII — valid single byte
i++;
continue;
} else if ((b & 0xE0) == 0xC0) {
seqLen = 2;
minCodepoint = 0x80; // Reject overlong
} else if ((b & 0xF0) == 0xE0) {
seqLen = 3;
minCodepoint = 0x800;
} else if ((b & 0xF8) == 0xF0) {
seqLen = 4;
minCodepoint = 0x10000;
} else {
// Invalid lead byte (0x80-0xBF or 0xF8+)
buf[i] = '?';
replaced = true;
i++;
continue;
}
// Check that we have enough bytes remaining
if (i + seqLen > len) {
// Truncated sequence at end of string — replace remaining bytes
for (size_t j = i; j < len; j++) {
buf[j] = '?';
}
replaced = true;
break;
}
// Validate continuation bytes (must be 10xxxxxx)
bool valid = true;
for (size_t j = 1; j < seqLen; j++) {
if (((uint8_t)buf[i + j] & 0xC0) != 0x80) {
valid = false;
break;
}
}
if (valid) {
// Decode codepoint to check for overlong encodings and surrogates
uint32_t cp = 0;
if (seqLen == 2)
cp = b & 0x1F;
else if (seqLen == 3)
cp = b & 0x0F;
else
cp = b & 0x07;
for (size_t j = 1; j < seqLen; j++)
cp = (cp << 6) | ((uint8_t)buf[i + j] & 0x3F);
if (cp < minCodepoint || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) {
// Overlong encoding, out of Unicode range, or surrogate half
valid = false;
}
}
if (valid) {
i += seqLen;
} else {
// Replace only the lead byte; continuation bytes will be caught on next iteration
buf[i] = '?';
replaced = true;
i++;
}
}
return replaced;
}

View File

@@ -56,6 +56,10 @@ const std::string vformat(const char *const zcFormat, ...);
// Get actual string length for nanopb char array fields.
size_t pb_string_length(const char *str, size_t max_len);
// Sanitize a fixed-size char buffer in-place by replacing invalid UTF-8 sequences with '?'.
// Ensures the result is null-terminated within bufSize. Returns true if any bytes were replaced.
bool sanitizeUtf8(char *buf, size_t bufSize);
/// Calculate 2^n without calling pow() - used for spreading factor and other calculations
inline uint32_t pow_of_2(uint32_t n)
{

View File

@@ -626,10 +626,14 @@ void AdminModule::handleSetOwner(const meshtastic_User &o)
if (*o.long_name) {
changed |= strcmp(owner.long_name, o.long_name);
strncpy(owner.long_name, o.long_name, sizeof(owner.long_name));
owner.long_name[sizeof(owner.long_name) - 1] = '\0';
sanitizeUtf8(owner.long_name, sizeof(owner.long_name));
}
if (*o.short_name) {
changed |= strcmp(owner.short_name, o.short_name);
strncpy(owner.short_name, o.short_name, sizeof(owner.short_name));
owner.short_name[sizeof(owner.short_name) - 1] = '\0';
sanitizeUtf8(owner.short_name, sizeof(owner.short_name));
}
snprintf(owner.id, sizeof(owner.id), "!%08x", nodeDB->getNodeNum());
@@ -1430,7 +1434,11 @@ void AdminModule::handleSetHamMode(const meshtastic_HamParameters &p)
// Set call sign and override lora limitations for licensed use
strncpy(owner.long_name, p.call_sign, sizeof(owner.long_name));
owner.long_name[sizeof(owner.long_name) - 1] = '\0';
sanitizeUtf8(owner.long_name, sizeof(owner.long_name));
strncpy(owner.short_name, p.short_name, sizeof(owner.short_name));
owner.short_name[sizeof(owner.short_name) - 1] = '\0';
sanitizeUtf8(owner.short_name, sizeof(owner.short_name));
owner.is_licensed = true;
config.lora.override_duty_cycle = true;
config.lora.tx_power = p.tx_power;

View File

@@ -1,6 +1,7 @@
#include "NRF52Bluetooth.h"
#include "BLEDfuSecure.h"
#include "BluetoothCommon.h"
#include "HardwareRNG.h"
#include "PowerFSM.h"
#include "configuration.h"
#include "main.h"
@@ -272,9 +273,13 @@ void NRF52Bluetooth::setup()
Bluefruit.setTxPower(NRF52_BLE_TX_POWER);
#endif
if (config.bluetooth.mode != meshtastic_Config_BluetoothConfig_PairingMode_NO_PIN) {
configuredPasskey = config.bluetooth.mode == meshtastic_Config_BluetoothConfig_PairingMode_FIXED_PIN
? config.bluetooth.fixed_pin
: random(100000, 999999);
if (config.bluetooth.mode == meshtastic_Config_BluetoothConfig_PairingMode_FIXED_PIN) {
configuredPasskey = config.bluetooth.fixed_pin;
} else {
uint32_t hwrand = 0;
HardwareRNG::fill(reinterpret_cast<uint8_t *>(&hwrand), sizeof(hwrand));
configuredPasskey = hwrand % 900000u + 100000u;
}
auto pinString = std::to_string(configuredPasskey);
LOG_INFO("Bluetooth pin set to '%i'", configuredPasskey);
Bluefruit.Security.setPIN(pinString.c_str());

View File

@@ -102,6 +102,14 @@ class Power : public concurrency::OSThread
const uint16_t OCV[11] = {OCV_ARRAY};
bool isLowBattery() { return low_voltage_counter >= 10; };
#ifdef ARCH_ESP32
int beforeLightSleep(void *unused);
int afterLightSleep(esp_sleep_wakeup_cause_t cause);
#endif
void attachPowerInterrupts();
void detachPowerInterrupts();
#ifdef ARCH_ESP32
int beforeLightSleep(void *unused);
int afterLightSleep(esp_sleep_wakeup_cause_t cause);

View File

@@ -0,0 +1,195 @@
#include "meshUtils.h"
#include <cstring>
#include <unity.h>
void setUp(void) {}
void tearDown(void) {}
// --- Valid UTF-8 should pass through unchanged ---
void test_ascii_unchanged()
{
char buf[32] = "Hello World";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("Hello World", buf);
}
void test_valid_2byte_unchanged()
{
// "café" — é is C3 A9
char buf[16] = "caf\xC3\xA9";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("caf\xC3\xA9", buf);
}
void test_valid_3byte_unchanged()
{
// "€" is E2 82 AC
char buf[16] = "\xE2\x82\xAC";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("\xE2\x82\xAC", buf);
}
void test_valid_4byte_emoji_unchanged()
{
// 🌙 is F0 9F 8C 99
char buf[16] = "\xF0\x9F\x8C\x99";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("\xF0\x9F\x8C\x99", buf);
}
void test_valid_mixed_unchanged()
{
// "Hi 🌙!" — mix of ASCII and 4-byte
char buf[16] = "Hi \xF0\x9F\x8C\x99!";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("Hi \xF0\x9F\x8C\x99!", buf);
}
void test_empty_string()
{
char buf[8] = "";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("", buf);
}
// --- Invalid sequences observed in the wild ---
void test_truncated_4byte_at_end()
{
// Name with valid emoji 🌙 followed by a truncated 4-byte sequence + ASCII
char buf[32] = "Lunar Tower \xF0\x9F\x8C\x99\xF0\x9F\x97"
"4";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
// The 🌙 should be preserved; F0 9F 97 is an incomplete 4-byte sequence,
// '4' (0x34) is not a valid continuation byte
TEST_ASSERT_EQUAL_STRING("Lunar Tower \xF0\x9F\x8C\x99???4", buf);
}
void test_lone_lead_bytes_without_continuations()
{
// Mixed ASCII with stray multibyte lead bytes (E1, F3) lacking proper continuations
char buf[32] = "Mesht\xE1\xF3tic 37e2";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
// E1 expects 2 continuation bytes, but F3 is not a continuation → E1 replaced
// F3 expects 3 continuation bytes, 't','i','c' are not continuations → F3 replaced
TEST_ASSERT_EQUAL_STRING("Mesht??tic 37e2", buf);
}
// --- Edge cases ---
void test_bare_continuation_byte()
{
// 0x80 alone is invalid (continuation byte with no lead)
char buf[8] = "\x80";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("?", buf);
}
void test_overlong_2byte()
{
// C0 AF is an overlong encoding of U+002F '/'
char buf[8] = "\xC0\xAF";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
// C0 is a 2-byte lead, AF is valid continuation, but codepoint 0x2F < 0x80 → overlong
// C0 replaced, AF (now bare continuation) also replaced
TEST_ASSERT_EQUAL_STRING("??", buf);
}
void test_surrogate_half()
{
// ED A0 80 encodes U+D800 (surrogate half — invalid in UTF-8)
char buf[8] = "\xED\xA0\x80";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("???", buf);
}
void test_5byte_sequence_rejected()
{
// F8 80 80 80 80 — 5-byte sequence, not valid UTF-8
char buf[8] = "\xF8\x80\x80\x80\x80";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
// F8 is invalid lead (>= 0xF8), each 0x80 is bare continuation
TEST_ASSERT_EQUAL_STRING("?????", buf);
}
void test_truncated_3byte_at_buffer_end()
{
// Buffer is exactly 4 bytes: E2 82 then forced null at [3]
char buf[4];
buf[0] = '\xE2';
buf[1] = '\x82';
buf[2] = '\0'; // String ends before the 3-byte sequence completes
buf[3] = '\0';
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("??", buf);
}
void test_null_termination_enforced()
{
// Fill buffer completely with no null terminator
char buf[5];
memset(buf, 'A', sizeof(buf));
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
// Should be null-terminated and content preserved (all ASCII)
TEST_ASSERT_EQUAL_STRING("AAAA", buf);
}
void test_null_buffer()
{
TEST_ASSERT_FALSE(sanitizeUtf8(nullptr, 10));
}
void test_zero_size()
{
char buf[4] = "Hi";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, 0));
// Buffer should be untouched
TEST_ASSERT_EQUAL_STRING("Hi", buf);
}
void test_valid_max_codepoint()
{
// U+10FFFF = F4 8F BF BF (maximum valid Unicode codepoint)
char buf[8] = "\xF4\x8F\xBF\xBF";
TEST_ASSERT_FALSE(sanitizeUtf8(buf, sizeof(buf)));
TEST_ASSERT_EQUAL_STRING("\xF4\x8F\xBF\xBF", buf);
}
void test_above_max_codepoint()
{
// U+110000 = F4 90 80 80 (just above maximum valid Unicode)
char buf[8] = "\xF4\x90\x80\x80";
TEST_ASSERT_TRUE(sanitizeUtf8(buf, sizeof(buf)));
}
int main(int argc, char **argv)
{
UNITY_BEGIN();
// Valid UTF-8 passthrough
RUN_TEST(test_ascii_unchanged);
RUN_TEST(test_valid_2byte_unchanged);
RUN_TEST(test_valid_3byte_unchanged);
RUN_TEST(test_valid_4byte_emoji_unchanged);
RUN_TEST(test_valid_mixed_unchanged);
RUN_TEST(test_empty_string);
// Invalid sequences observed in the wild
RUN_TEST(test_truncated_4byte_at_end);
RUN_TEST(test_lone_lead_bytes_without_continuations);
// Edge cases
RUN_TEST(test_bare_continuation_byte);
RUN_TEST(test_overlong_2byte);
RUN_TEST(test_surrogate_half);
RUN_TEST(test_5byte_sequence_rejected);
RUN_TEST(test_truncated_3byte_at_buffer_end);
RUN_TEST(test_null_termination_enforced);
RUN_TEST(test_null_buffer);
RUN_TEST(test_zero_size);
RUN_TEST(test_valid_max_codepoint);
RUN_TEST(test_above_max_codepoint);
return UNITY_END();
}