Files
Tasmota/lib/libesp32/berry/tests/property/test_string_interning.py
2026-03-29 22:00:05 +02:00

269 lines
9.0 KiB
Python

"""
Feature: berry-python-port, Property 5: Short string interning
For any string of length <= 64 bytes, creating it twice via be_newstrn should
return the same interned object (identity equality), and strings of length > 64
should be classified as long strings.
Validates: Requirements 8.2, 8.4
"""
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
from hypothesis import given, settings, assume
from hypothesis.strategies import binary, integers, text
from berry_port.be_object import (
bgc, bstringtable, SHORT_STR_MAX_LEN,
)
from berry_port.be_string import (
be_string_init, be_newstrn, be_newstr, be_eqstr, be_str2cstr, str_hash,
)
# ---------------------------------------------------------------------------
# Minimal mock VM with string table support
# ---------------------------------------------------------------------------
class MockVM:
def __init__(self):
self.gc = bgc()
self.gc.usage = 0
self.strtab = bstringtable()
self.compopt = 0
def _fresh_vm():
"""Create a fresh VM with an initialized string table."""
vm = MockVM()
be_string_init(vm)
return vm
# ---------------------------------------------------------------------------
# Strategies
# ---------------------------------------------------------------------------
# Latin-1 byte strings that can round-trip through Berry's char-based strings.
short_strings = binary(min_size=0, max_size=SHORT_STR_MAX_LEN)
long_strings = binary(min_size=SHORT_STR_MAX_LEN + 1, max_size=200)
def _to_latin1(data: bytes) -> str:
"""Convert bytes to a latin-1 str for Berry string creation."""
return data.decode('latin-1')
# ---------------------------------------------------------------------------
# Property 5a: Short strings are interned (identity equality)
# ---------------------------------------------------------------------------
@settings(max_examples=500)
@given(data=short_strings)
def test_short_string_interned_identity(data):
"""Creating the same short string twice via be_newstrn must return
the exact same object (identity / pointer equality)."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
s1 = be_newstrn(vm, s, length)
s2 = be_newstrn(vm, s, length)
assert s1 is s2, (
f"Short string of length {length} not interned: "
f"s1 is not s2 (id {id(s1)} != {id(s2)})"
)
# ---------------------------------------------------------------------------
# Property 5b: Short string slen field matches actual length
# ---------------------------------------------------------------------------
@settings(max_examples=300)
@given(data=short_strings)
def test_short_string_slen_matches_length(data):
"""For short strings, the slen field must equal the actual byte length."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
result = be_newstrn(vm, s, length)
assert result.slen == length, (
f"slen mismatch: got {result.slen}, expected {length}"
)
# ---------------------------------------------------------------------------
# Property 5c: Long strings are NOT interned (distinct objects)
# ---------------------------------------------------------------------------
@settings(max_examples=300)
@given(data=long_strings)
def test_long_string_not_interned(data):
"""Creating the same long string twice must return distinct objects
(long strings are heap-allocated, not interned)."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
s1 = be_newstrn(vm, s, length)
s2 = be_newstrn(vm, s, length)
assert s1 is not s2, (
f"Long string of length {length} should NOT be interned"
)
# ---------------------------------------------------------------------------
# Property 5d: Long strings have slen == 255 and llen == actual length
# ---------------------------------------------------------------------------
@settings(max_examples=300)
@given(data=long_strings)
def test_long_string_slen_is_255(data):
"""Long strings (> 64 bytes) must have slen == 255 and llen set to
the actual length."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
result = be_newstrn(vm, s, length)
assert result.slen == 255, (
f"Long string slen should be 255, got {result.slen}"
)
assert result.llen == length, (
f"Long string llen mismatch: got {result.llen}, expected {length}"
)
# ---------------------------------------------------------------------------
# Property 5e: Content is preserved for both short and long strings
# ---------------------------------------------------------------------------
@settings(max_examples=300)
@given(data=binary(min_size=0, max_size=200))
def test_string_content_preserved(data):
"""be_str2cstr must return the original content for any string."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
result = be_newstrn(vm, s, length)
retrieved = be_str2cstr(result)
assert retrieved == s, (
f"Content mismatch for {length}-byte string"
)
# ---------------------------------------------------------------------------
# Property 5f: be_eqstr returns 1 for interned short strings
# ---------------------------------------------------------------------------
@settings(max_examples=300)
@given(data=short_strings)
def test_eqstr_interned_short_strings(data):
"""be_eqstr must return 1 for two references to the same interned
short string."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
s1 = be_newstrn(vm, s, length)
s2 = be_newstrn(vm, s, length)
assert be_eqstr(s1, s2) == 1, "be_eqstr should return 1 for interned strings"
# ---------------------------------------------------------------------------
# Property 5g: be_eqstr returns 1 for long strings with same content
# ---------------------------------------------------------------------------
@settings(max_examples=200)
@given(data=long_strings)
def test_eqstr_long_strings_same_content(data):
"""be_eqstr must return 1 for two long strings with identical content."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
s1 = be_newstrn(vm, s, length)
s2 = be_newstrn(vm, s, length)
assert be_eqstr(s1, s2) == 1, (
"be_eqstr should return 1 for long strings with same content"
)
# ---------------------------------------------------------------------------
# Property 5h: Different short strings are not identity-equal
# ---------------------------------------------------------------------------
@settings(max_examples=300)
@given(d1=short_strings, d2=short_strings)
def test_different_short_strings_not_same_object(d1, d2):
"""Two short strings with different content must not be the same object."""
assume(d1 != d2)
vm = _fresh_vm()
s1 = be_newstrn(vm, _to_latin1(d1), len(d1))
s2 = be_newstrn(vm, _to_latin1(d2), len(d2))
assert s1 is not s2, (
f"Different short strings should not be the same object: "
f"{d1!r} vs {d2!r}"
)
# ---------------------------------------------------------------------------
# Property 5i: String table count tracks interned strings
# ---------------------------------------------------------------------------
@settings(max_examples=200)
@given(data=short_strings)
def test_strtab_count_increments(data):
"""Each unique short string should increment the string table count
by exactly 1. Re-creating the same string should not change the count."""
vm = _fresh_vm()
s = _to_latin1(data)
length = len(data)
count_before = vm.strtab.count
be_newstrn(vm, s, length)
count_after_first = vm.strtab.count
be_newstrn(vm, s, length)
count_after_second = vm.strtab.count
assert count_after_first == count_before + 1, (
f"First creation should increment count by 1: "
f"{count_before} -> {count_after_first}"
)
assert count_after_second == count_after_first, (
f"Second creation of same string should not change count: "
f"{count_after_first} -> {count_after_second}"
)
# ---------------------------------------------------------------------------
# Property 5j: Boundary — string of exactly SHORT_STR_MAX_LEN is short
# ---------------------------------------------------------------------------
def test_boundary_max_short_is_interned():
"""A string of exactly SHORT_STR_MAX_LEN (64) bytes must be interned."""
vm = _fresh_vm()
data = b'A' * SHORT_STR_MAX_LEN
s = _to_latin1(data)
s1 = be_newstrn(vm, s, SHORT_STR_MAX_LEN)
s2 = be_newstrn(vm, s, SHORT_STR_MAX_LEN)
assert s1 is s2, "String of exactly 64 bytes should be interned"
assert s1.slen == SHORT_STR_MAX_LEN
def test_boundary_one_over_max_is_long():
"""A string of SHORT_STR_MAX_LEN + 1 (65) bytes must be a long string."""
vm = _fresh_vm()
length = SHORT_STR_MAX_LEN + 1
data = b'B' * length
s = _to_latin1(data)
s1 = be_newstrn(vm, s, length)
s2 = be_newstrn(vm, s, length)
assert s1 is not s2, "String of 65 bytes should NOT be interned"
assert s1.slen == 255
assert s1.llen == length