Use stable FNV-1a hash for Overture IDs and harden fetch_range

- Replace DefaultHasher with FNV-1a in gers_id_to_u64. Rust does not
  guarantee DefaultHasher's algorithm across compiler versions, so
  Overture building IDs (and deterministic RNG seeded from them) could
  silently change on toolchain upgrades. FNV-1a is a well-known, stable
  hash with good distribution and zero dependencies.
- Guard fetch_range against length==0: the subtraction
  end = start + length - 1 would underflow to u64::MAX, producing an
  invalid HTTP Range header. Now returns an explicit error.
This commit is contained in:
louis-e
2026-04-06 00:35:56 +02:00
parent 3804c194f8
commit 113debee86

View File

@@ -20,7 +20,6 @@ use parquet::file::serialized_reader::SerializedFileReader;
use parquet::record::Row;
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::time::Duration;
// ─── Constants ────────────────────────────────────────────────────────────
@@ -1231,13 +1230,23 @@ fn overture_class_to_osm_building<'a>(subtype: Option<&'a str>, class: Option<&'
/// Hash a GERS UUID string to a u64 with the high bit set.
///
/// This guarantees no collision with OSM IDs (which are sequential positive u64
/// currently up to ~12 billion, well under 2^34). Setting bit 63 puts our
/// IDs in a completely separate range.
/// Uses FNV-1a (not `DefaultHasher`) so that IDs are deterministic across
/// Rust compiler versions — `DefaultHasher`'s algorithm is explicitly not
/// a stable API contract.
///
/// Setting bit 63 guarantees no collision with OSM IDs (which are sequential
/// positive u64 currently up to ~12 billion, well under 2^34).
fn gers_id_to_u64(gers_id: &str) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
gers_id.hash(&mut hasher);
hasher.finish() | OVERTURE_ID_HIGH_BIT
// FNV-1a parameters for u64
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x100000001b3;
let mut hash = FNV_OFFSET;
for byte in gers_id.as_bytes() {
hash ^= *byte as u64;
hash = hash.wrapping_mul(FNV_PRIME);
}
hash | OVERTURE_ID_HIGH_BIT
}
// ─── Sparse byte reader for row-group-only downloads ─────────────────────
@@ -1340,6 +1349,9 @@ fn fetch_range(
start: u64,
length: u64,
) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
if length == 0 {
return Err("fetch_range called with length 0".into());
}
let end = start + length - 1;
let response = client
.get(url)