Merge pull request #1544 from adamoutler/built-in-tests

Improve built-in test used during system startup - thanks @adamoutler 🙏
This commit is contained in:
Jokob @NetAlertX
2026-03-05 06:48:46 +11:00
committed by GitHub
14 changed files with 265 additions and 165 deletions

View File

@@ -19,6 +19,9 @@ services:
- CHOWN # Required for root-entrypoint to chown /data + /tmp before dropping privileges
- SETUID # Required for root-entrypoint to switch to non-root user
- SETGID # Required for root-entrypoint to switch to non-root group
sysctls: # ARP flux mitigation for host networking accuracy
net.ipv4.conf.all.arp_ignore: 1
net.ipv4.conf.all.arp_announce: 2
volumes:
- type: volume # Persistent Docker-managed Named Volume for storage

View File

@@ -30,6 +30,9 @@ services:
- CHOWN # Required for root-entrypoint to chown /data + /tmp before dropping privileges
- SETUID # Required for root-entrypoint to switch to non-root user
- SETGID # Required for root-entrypoint to switch to non-root group
sysctls: # ARP flux mitigation (reduces duplicate/ambiguous ARP behavior on host networking)
net.ipv4.conf.all.arp_ignore: 1
net.ipv4.conf.all.arp_announce: 2
volumes:
- type: volume # Persistent Docker-managed named volume for config + database

View File

@@ -0,0 +1,51 @@
# ARP Flux Sysctls Not Set
## Issue Description
NetAlertX detected that ARP flux protection sysctls are not set as expected:
- `net.ipv4.conf.all.arp_ignore=1`
- `net.ipv4.conf.all.arp_announce=2`
## Security Ramifications
This is not a direct container breakout risk, but detection quality can degrade:
- Incorrect IP/MAC associations
- Device state flapping
- Unreliable topology or presence data
## Why You're Seeing This Issue
The running environment does not provide the expected kernel sysctl values. This is common in Docker setups where sysctls were not explicitly configured.
## How to Correct the Issue
Set these sysctls at container runtime.
- In `docker-compose.yml` (preferred):
```yaml
services:
netalertx:
sysctls:
net.ipv4.conf.all.arp_ignore: 1
net.ipv4.conf.all.arp_announce: 2
```
- For `docker run`:
```bash
docker run \
--sysctl net.ipv4.conf.all.arp_ignore=1 \
--sysctl net.ipv4.conf.all.arp_announce=2 \
ghcr.io/netalertx/netalertx:latest
```
> **Note:** Setting `net.ipv4.conf.all.arp_ignore` and `net.ipv4.conf.all.arp_announce` may fail with "operation not permitted" unless the container is run with elevated privileges. To resolve this, you can:
> - Use `--privileged` with `docker run`.
> - Use the more restrictive `--cap-add=NET_ADMIN` (or `cap_add: [NET_ADMIN]` in `docker-compose` service definitions) to allow the sysctls to be applied at runtime.
## Additional Resources
For broader Docker Compose guidance, see:
- [DOCKER_COMPOSE.md](https://docs.netalertx.com/DOCKER_COMPOSE)

View File

@@ -13,6 +13,9 @@ services:
- CHOWN
- SETUID
- SETGID
sysctls:
net.ipv4.conf.all.arp_ignore: 1
net.ipv4.conf.all.arp_announce: 2
volumes:
- type: volume
source: netalertx_data

View File

@@ -13,6 +13,9 @@ services:
- CHOWN
- SETUID
- SETGID
sysctls:
net.ipv4.conf.all.arp_ignore: 1
net.ipv4.conf.all.arp_announce: 2
volumes:
- type: volume
source: netalertx_data

View File

@@ -9,28 +9,17 @@ if [ ! -f "${NETALERTX_CONFIG}/app.conf" ]; then
exit 0
fi
# Helper: set or append config key safely
set_config_value() {
_key="$1"
_value="$2"
# Remove newlines just in case
_value=$(printf '%s' "$_value" | tr -d '\n\r')
# Escape sed-sensitive chars
_escaped=$(printf '%s\n' "$_value" | sed 's/[\/&]/\\&/g')
if grep -q "^${_key}=" "${NETALERTX_CONFIG}/app.conf"; then
sed -i "s|^${_key}=.*|${_key}=${_escaped}|" "${NETALERTX_CONFIG}/app.conf"
else
echo "${_key}=${_value}" >> "${NETALERTX_CONFIG}/app.conf"
fi
}
# ------------------------------------------------------------
# LOADED_PLUGINS override
# ------------------------------------------------------------
if [ -n "${LOADED_PLUGINS:-}" ]; then
echo "[ENV] Applying LOADED_PLUGINS override"
set_config_value "LOADED_PLUGINS" "$LOADED_PLUGINS"
value=$(printf '%s' "$LOADED_PLUGINS" | tr -d '\n\r')
# declare delimiter for sed and escape it along with / and &
delim='|'
escaped=$(printf '%s\n' "$value" | sed "s/[\/${delim}&]/\\&/g")
if grep -q '^LOADED_PLUGINS=' "${NETALERTX_CONFIG}/app.conf"; then
# use same delimiter when substituting
sed -i "s${delim}^LOADED_PLUGINS=.*${delim}LOADED_PLUGINS=${escaped}${delim}" "${NETALERTX_CONFIG}/app.conf"
else
echo "LOADED_PLUGINS=${value}" >> "${NETALERTX_CONFIG}/app.conf"
fi
fi

View File

@@ -1,92 +1,30 @@
#!/bin/sh
# 37-host-optimization.sh: Apply and validate network optimizations (ARP flux fix)
# 37-host-optimization.sh: Detect ARP flux sysctl configuration.
#
# This script improves detection accuracy by ensuring proper ARP behavior.
# It attempts to apply sysctl settings and warns if not possible.
# This script does not change host/kernel settings.
# --- Color Codes ---
RED=$(printf '\033[1;31m')
YELLOW=$(printf '\033[1;33m')
RESET=$(printf '\033[0m')
# --- Skip flag ---
if [ -n "${SKIP_OPTIMIZATIONS:-}" ]; then
exit 0
fi
# --- Helpers ---
get_sysctl() {
sysctl -n "$1" 2>/dev/null || echo "unknown"
}
set_sysctl_if_needed() {
key="$1"
expected="$2"
current="$(get_sysctl "$key")"
# Already correct
if [ "$current" = "$expected" ]; then
return 0
fi
# Try to apply
if sysctl -w "$key=$expected" >/dev/null 2>&1; then
return 0
fi
# Failed
return 1
}
# --- Apply Settings (best effort) ---
failed=0
set_sysctl_if_needed net.ipv4.conf.all.arp_ignore 1 || failed=1
set_sysctl_if_needed net.ipv4.conf.all.arp_announce 2 || failed=1
set_sysctl_if_needed net.ipv4.conf.default.arp_ignore 1 || failed=1
set_sysctl_if_needed net.ipv4.conf.default.arp_announce 2 || failed=1
[ "$(sysctl -n net.ipv4.conf.all.arp_ignore 2>/dev/null || echo unknown)" = "1" ] || failed=1
[ "$(sysctl -n net.ipv4.conf.all.arp_announce 2>/dev/null || echo unknown)" = "2" ] || failed=1
# --- Validate final state ---
all_ignore="$(get_sysctl net.ipv4.conf.all.arp_ignore)"
all_announce="$(get_sysctl net.ipv4.conf.all.arp_announce)"
# --- Warning Output ---
if [ "$all_ignore" != "1" ] || [ "$all_announce" != "2" ]; then
if [ "$failed" -eq 1 ]; then
>&2 printf "%s" "${YELLOW}"
>&2 cat <<EOF
>&2 cat <<'EOF'
══════════════════════════════════════════════════════════════════════════════
⚠️ ATTENTION: ARP flux protection not enabled.
NetAlertX relies on ARP for device detection. Your system currently allows
ARP replies from incorrect interfaces (ARP flux), which may result in:
• False devices being detected
• IP/MAC mismatches
• Flapping device states
• Incorrect network topology
This is common when running in Docker or multi-interface environments.
──────────────────────────────────────────────────────────────────────────
Recommended fix (Docker Compose):
sysctls:
net.ipv4.conf.all.arp_ignore: 1
net.ipv4.conf.all.arp_announce: 2
──────────────────────────────────────────────────────────────────────────
Alternatively, apply on the host:
⚠️ WARNING: ARP flux sysctls are not set.
Expected values:
net.ipv4.conf.all.arp_ignore=1
net.ipv4.conf.all.arp_announce=2
Detection accuracy may be reduced until this is configured.
Detection accuracy may be reduced until configured.
See: https://docs.netalertx.com/docker-troubleshooting/arp-flux-sysctls/
══════════════════════════════════════════════════════════════════════════════
EOF
>&2 printf "%s" "${RESET}"

View File

@@ -86,10 +86,11 @@ for script in "${ENTRYPOINT_CHECKS}"/*; do
fi
script_name=$(basename "$script" | sed 's/^[0-9]*-//;s/\.(sh|py)$//;s/-/ /g')
echo "--> ${script_name} "
if [ -n "${SKIP_STARTUP_CHECKS:-}" ] && echo "${SKIP_STARTUP_CHECKS}" | grep -q "\b${script_name}\b"; then
printf "%sskip%s\n" "${GREY}" "${RESET}"
continue
fi
if [ -n "${SKIP_STARTUP_CHECKS:-}" ] &&
printf '%s' "${SKIP_STARTUP_CHECKS}" | grep -wFq -- "${script_name}"; then
printf "%sskip%s\n" "${GREY}" "${RESET}"
continue
fi
"$script"
NETALERTX_DOCKER_ERROR_CHECK=$?

View File

@@ -48,11 +48,13 @@ else
log_error "python /app/server is not running"
fi
# 5. Check port 20211 is open and contains "netalertx"
if curl -sf --max-time 10 "http://localhost:${PORT:-20211}" | grep -i "netalertx" > /dev/null; then
log_success "Port ${PORT:-20211} is responding and contains 'netalertx'"
# 5. Check port 20211 is open
CHECK_ADDR="${LISTEN_ADDR:-127.0.0.1}"
[ "${CHECK_ADDR}" == "0.0.0.0" ] && CHECK_ADDR="127.0.0.1"
if timeout 10 bash -c "</dev/tcp/${CHECK_ADDR}/${PORT:-20211}" 2>/dev/null; then
log_success "Port ${PORT:-20211} is responding"
else
log_error "Port ${PORT:-20211} is not responding or doesn't contain 'netalertx'"
log_error "Port ${PORT:-20211} is not responding"
fi
# NOTE: GRAPHQL_PORT might not be set and is initailized as a setting with a default value in the container. It can also be initialized via APP_CONF_OVERRIDE
@@ -71,4 +73,4 @@ else
echo "[HEALTHCHECK] ❌ One or more health checks failed"
fi
exit $EXIT_CODE
exit $EXIT_CODE

View File

@@ -20,6 +20,7 @@ nav:
- Docker Updates: UPDATES.md
- Docker Maintenance: DOCKER_MAINTENANCE.md
- Docker Startup Troubleshooting:
- ARP flux sysctls: docker-troubleshooting/arp-flux-sysctls.md
- Aufs capabilities: docker-troubleshooting/aufs-capabilities.md
- Excessive capabilities: docker-troubleshooting/excessive-capabilities.md
- File permissions: docker-troubleshooting/file-permissions.md

View File

@@ -43,6 +43,10 @@ def create_dummy(client, api_token, test_mac):
client.post(f"/device/{test_mac}", json=payload, headers=auth_headers(api_token))
def delete_dummy(client, api_token, test_mac):
client.delete("/devices", json={"macs": [test_mac]}, headers=auth_headers(api_token))
def test_get_all_devices(client, api_token, test_mac):
# Ensure there is at least one device
create_dummy(client, api_token, test_mac)
@@ -149,53 +153,62 @@ def test_export_import_cycle_base64(client, api_token, test_mac):
def test_devices_totals(client, api_token, test_mac):
# 1. Create a dummy device
create_dummy(client, api_token, test_mac)
try:
# 1. Call the totals endpoint
resp = client.get("/devices/totals", headers=auth_headers(api_token))
assert resp.status_code == 200
# 2. Call the totals endpoint
resp = client.get("/devices/totals", headers=auth_headers(api_token))
assert resp.status_code == 200
# 2. Ensure the response is a JSON list
data = resp.json
assert isinstance(data, list)
# 3. Ensure the response is a JSON list
data = resp.json
assert isinstance(data, list)
# 3. Dynamically get expected length
conditions = get_device_conditions()
expected_length = len(conditions)
assert len(data) == expected_length
# 4. Dynamically get expected length
conditions = get_device_conditions()
expected_length = len(conditions)
assert len(data) == expected_length
# 5. Check that at least 1 device exists
assert data[0] >= 1 # 'devices' count includes the dummy device
# 4. Check that at least 1 device exists when there are any conditions
if expected_length > 0:
assert data[0] >= 1 # 'devices' count includes the dummy device
else:
# no conditions defined; data should be an empty list
assert data == []
finally:
delete_dummy(client, api_token, test_mac)
def test_devices_by_status(client, api_token, test_mac):
# 1. Create a dummy device
create_dummy(client, api_token, test_mac)
try:
# 1. Request devices by a valid status
resp = client.get("/devices/by-status?status=my", headers=auth_headers(api_token))
assert resp.status_code == 200
data = resp.json
assert isinstance(data, list)
assert any(d["id"] == test_mac for d in data)
# 2. Request devices by a valid status
resp = client.get("/devices/by-status?status=my", headers=auth_headers(api_token))
assert resp.status_code == 200
data = resp.json
assert isinstance(data, list)
assert any(d["id"] == test_mac for d in data)
# 2. Request devices with an invalid/unknown status
resp_invalid = client.get("/devices/by-status?status=invalid_status", headers=auth_headers(api_token))
# Strict validation now returns 422 for invalid status enum values
assert resp_invalid.status_code == 422
# 3. Request devices with an invalid/unknown status
resp_invalid = client.get("/devices/by-status?status=invalid_status", headers=auth_headers(api_token))
# Strict validation now returns 422 for invalid status enum values
assert resp_invalid.status_code == 422
# 3. Check favorite formatting if devFavorite = 1
# Update dummy device to favorite
update_resp = client.post(
f"/device/{test_mac}",
json={"devFavorite": 1},
headers=auth_headers(api_token)
)
assert update_resp.status_code == 200
assert update_resp.json.get("success") is True
# 4. Check favorite formatting if devFavorite = 1
# Update dummy device to favorite
client.post(
f"/device/{test_mac}",
json={"devFavorite": 1},
headers=auth_headers(api_token)
)
resp_fav = client.get("/devices/by-status?status=my", headers=auth_headers(api_token))
fav_data = next((d for d in resp_fav.json if d["id"] == test_mac), None)
assert fav_data is not None
assert "&#9733" in fav_data["title"]
resp_fav = client.get("/devices/by-status?status=my", headers=auth_headers(api_token))
fav_data = next((d for d in resp_fav.json if d["id"] == test_mac), None)
assert fav_data is not None
assert "&#9733" in fav_data["title"]
finally:
delete_dummy(client, api_token, test_mac)
def test_delete_test_devices(client, api_token):

View File

@@ -1,6 +1,7 @@
import pytest
from unittest.mock import patch, MagicMock
from datetime import datetime
import random
from api_server.api_server_start import app
from helper import get_setting_value
@@ -21,6 +22,31 @@ def auth_headers(token):
return {"Authorization": f"Bearer {token}"}
def create_dummy(client, api_token, test_mac):
payload = {
"createNew": True,
"devName": "Test Device MCP",
"devOwner": "Unit Test",
"devType": "Router",
"devVendor": "TestVendor",
}
response = client.post(f"/device/{test_mac}", json=payload, headers=auth_headers(api_token))
assert response.status_code in [200, 201], (
f"Expected status 200/201 for device creation, got {response.status_code}. "
f"Response body: {response.get_data(as_text=True)}"
)
return response
def delete_dummy(client, api_token, test_mac):
response = client.delete("/devices", json={"macs": [test_mac]}, headers=auth_headers(api_token))
assert response.status_code == 200, (
f"Expected status 200 for device deletion, got {response.status_code}. "
f"Response body: {response.get_data(as_text=True)}"
)
return response
# --- Device Search Tests ---
@@ -350,25 +376,22 @@ def test_mcp_devices_import_json(mock_db_conn, client, api_token):
# --- MCP Device Totals Tests ---
@patch("database.get_temp_db_connection")
def test_mcp_devices_totals(mock_db_conn, client, api_token):
def test_mcp_devices_totals(client, api_token):
"""Test MCP devices totals endpoint."""
mock_conn = MagicMock()
mock_sql = MagicMock()
mock_execute_result = MagicMock()
# Mock the getTotals method to return sample data
mock_execute_result.fetchone.return_value = [10, 8, 2, 0, 1, 3] # devices, connected, favorites, new, down, archived
mock_sql.execute.return_value = mock_execute_result
mock_conn.cursor.return_value = mock_sql
mock_db_conn.return_value = mock_conn
test_mac = "aa:bb:cc:" + ":".join(f"{random.randint(0, 255):02X}" for _ in range(3)).lower()
create_dummy(client, api_token, test_mac)
response = client.get("/devices/totals", headers=auth_headers(api_token))
try:
response = client.get("/devices/totals", headers=auth_headers(api_token))
assert response.status_code == 200
data = response.get_json()
# Should return device counts as array
assert isinstance(data, list)
assert len(data) >= 4 # At least online, offline, etc.
assert response.status_code == 200
data = response.get_json()
# Should return device counts as array
assert isinstance(data, list)
assert len(data) >= 4 # At least online, offline, etc.
assert data[0] >= 1
finally:
delete_dummy(client, api_token, test_mac)
# --- MCP Traceroute Tests ---

View File

@@ -317,14 +317,18 @@ def _select_custom_ports(exclude: set[int] | None = None) -> int:
raise RuntimeError("Unable to locate a free high port for compose testing")
def _make_port_check_hook(ports: tuple[int, ...]) -> Callable[[], None]:
def _make_port_check_hook(
ports: tuple[int, ...],
settle_wait_seconds: int = COMPOSE_SETTLE_WAIT_SECONDS,
port_wait_timeout: int = COMPOSE_PORT_WAIT_TIMEOUT,
) -> Callable[[], None]:
"""Return a callback that waits for the provided ports to accept TCP connections."""
def _hook() -> None:
for port in ports:
LAST_PORT_SUCCESSES.pop(port, None)
time.sleep(COMPOSE_SETTLE_WAIT_SECONDS)
_wait_for_ports(ports, timeout=COMPOSE_PORT_WAIT_TIMEOUT)
time.sleep(settle_wait_seconds)
_wait_for_ports(ports, timeout=port_wait_timeout)
return _hook
@@ -344,6 +348,7 @@ def _write_normal_startup_compose(
service_env = service.setdefault("environment", {})
service_env.setdefault("NETALERTX_CHECK_ONLY", "1")
service_env.setdefault("SKIP_STARTUP_CHECKS", "host optimization")
if env_overrides:
service_env.update(env_overrides)
@@ -852,12 +857,18 @@ def test_normal_startup_no_warnings_compose(tmp_path: pathlib.Path) -> None:
default_project = "netalertx-normal-default"
default_compose_file = _write_normal_startup_compose(default_dir, default_project, default_env_overrides)
port_check_timeout = 20
settle_wait_seconds = 2
default_result = _run_docker_compose(
default_compose_file,
default_project,
timeout=8,
detached=True,
post_up=_make_port_check_hook(default_ports),
post_up=_make_port_check_hook(
default_ports,
settle_wait_seconds=settle_wait_seconds,
port_wait_timeout=port_check_timeout,
),
)
# MANDATORY LOGGING - DO NOT REMOVE (see file header for reasoning)
print("\n[compose output default]", default_result.output)
@@ -885,9 +896,14 @@ def test_normal_startup_no_warnings_compose(tmp_path: pathlib.Path) -> None:
f"Unexpected mount row values for /data: {data_parts[2:4]}"
)
allowed_warning = "⚠️ WARNING: ARP flux sysctls are not set."
assert "Write permission denied" not in default_output
assert "CRITICAL" not in default_output
assert "⚠️" not in default_output
assert all(
"⚠️" not in line or allowed_warning in line
for line in default_output.splitlines()
), "Unexpected warning found in default output"
custom_http = _select_custom_ports({default_http_port})
custom_graphql = _select_custom_ports({default_http_port, custom_http})
@@ -913,7 +929,11 @@ def test_normal_startup_no_warnings_compose(tmp_path: pathlib.Path) -> None:
custom_project,
timeout=8,
detached=True,
post_up=_make_port_check_hook(custom_ports),
post_up=_make_port_check_hook(
custom_ports,
settle_wait_seconds=settle_wait_seconds,
port_wait_timeout=port_check_timeout,
),
)
print("\n[compose output custom]", custom_result.output)
custom_output = _assert_ports_ready(custom_result, custom_project, custom_ports)
@@ -922,8 +942,16 @@ def test_normal_startup_no_warnings_compose(tmp_path: pathlib.Path) -> None:
assert "" not in custom_output
assert "Write permission denied" not in custom_output
assert "CRITICAL" not in custom_output
assert "⚠️" not in custom_output
lowered_custom = custom_output.lower()
assert all(
"⚠️" not in line or allowed_warning in line
for line in custom_output.splitlines()
), "Unexpected warning found in custom output"
custom_output_without_allowed_warning = "\n".join(
line
for line in custom_output.splitlines()
if allowed_warning.lower() not in line.lower()
)
lowered_custom = custom_output_without_allowed_warning.lower()
assert "arning" not in lowered_custom
assert "rror" not in lowered_custom

View File

@@ -8,6 +8,7 @@ such as environment variable settings and check skipping.
import subprocess
import uuid
import pytest
import shutil
IMAGE = "netalertx-test"
@@ -85,8 +86,49 @@ def test_no_app_conf_override_when_no_graphql_port():
def test_skip_startup_checks_env_var():
# If SKIP_STARTUP_CHECKS contains the human-readable name of a check (e.g. "mandatory folders"),
# the entrypoint should skip that specific check. We check that the "Creating NetAlertX log directory."
# the entrypoint should skip that specific check. We check that the "Creating NetAlertX log directory."
# message (from the mandatory folders check) is not printed when skipped.
result = _run_entrypoint(env={"SKIP_STARTUP_CHECKS": "mandatory folders"}, check_only=True)
assert "Creating NetAlertX log directory" not in result.stdout
assert result.returncode == 0
@pytest.mark.docker
@pytest.mark.feature_complete
def test_host_optimization_warning_matches_sysctl():
"""Validate host-optimization warning matches actual host sysctl values."""
sysctl_bin = shutil.which("sysctl")
if not sysctl_bin:
pytest.skip("sysctl binary not found on host; skipping host-optimization warning check")
ignore_proc = subprocess.run(
[sysctl_bin, "-n", "net.ipv4.conf.all.arp_ignore"],
capture_output=True,
text=True,
check=False,
timeout=10,
)
announce_proc = subprocess.run(
[sysctl_bin, "-n", "net.ipv4.conf.all.arp_announce"],
capture_output=True,
text=True,
check=False,
timeout=10,
)
if ignore_proc.returncode != 0 or announce_proc.returncode != 0:
pytest.skip("sysctl values unavailable on host; skipping host-optimization warning check")
arp_ignore = ignore_proc.stdout.strip()
arp_announce = announce_proc.stdout.strip()
expected_warning = not (arp_ignore == "1" and arp_announce == "2")
result = _run_entrypoint(check_only=True)
combined_output = result.stdout + result.stderr
warning_present = "WARNING: ARP flux sysctls are not set." in combined_output
assert warning_present == expected_warning, (
"host-optimization warning mismatch: "
f"arp_ignore={arp_ignore}, arp_announce={arp_announce}, "
f"expected_warning={expected_warning}, warning_present={warning_present}"
)