From 3ca0e002260f3a6e6220bc467a37ec16ffb3697e Mon Sep 17 00:00:00 2001 From: fabriziosalmi Date: Wed, 22 Jan 2025 15:23:37 +0100 Subject: [PATCH] ab benchmark initial test script added for performance evaluations --- .gitignore | 20 +-- benchmark.py | 400 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 410 insertions(+), 10 deletions(-) create mode 100644 benchmark.py diff --git a/.gitignore b/.gitignore index 889e5bf..f6f5346 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,16 @@ +.DS_Store +benchmark.json caddy -GeoLite2-Country.mmdb debug.json +GeoLite2-Country.mmdb log.json -waf_test_results.log -tor_ip_blacklist.txt tor_blacklist.txt +tor_ip_blacklist.txt testdata -testdata/dns_blacklist.txt -testdata/GeoIP2-Country-Test.mmdb -testdata/ip_blacklist.txt -testdata/rules.json -log.json validation.log -caddy-waf.DS_Store -vendor \ No newline at end of file +vendor +waf_test_results.log +large_body.txt +small_body.txt +sqli_payload.txt +xxe_payload.xml diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000..fe56ae1 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,400 @@ +import json +import datetime +import os +import subprocess +import re +import yaml +from colorama import Fore, Back, Style, init +from collections import Counter + +init(autoreset=True) + +benchmark_filename = "benchmark.json" +benchmark_data = [] + +# Load existing data if file exists +if os.path.exists(benchmark_filename): + with open(benchmark_filename, "r") as f: + try: + benchmark_data = json.load(f) + except json.JSONDecodeError: + benchmark_data = [] # Handle empty or corrupted JSON file + +def colored_print(text, color=Fore.WHITE, style=Style.NORMAL): + print(style + color + text + Style.RESET_ALL) + +def run_benchmark(test_config): + colored_print(f"\n{Back.BLUE}{Fore.WHITE} Running Test: {test_config['name']} {Style.RESET_ALL} - {test_config['description']}") + outcome = "FAIL" + metrics = {} + response_code_counts = Counter() # Initialize counter - not really used now, but kept for potential future use + + command_list = ["ab"] + command_list.extend(test_config['ab_options']) + + if 'method' in test_config and test_config['method'] == 'POST': + body_file = test_config.get('body_file') + if body_file: + command_list.extend(["-p", body_file]) + if 'content_type' in test_config: + command_list.extend(["-T", test_config['content_type']]) + command_list.append(test_config['url']) + + else: + command_list.append(test_config['url']) + + colored_print(f"{Fore.YELLOW}Executing command: {' '.join(command_list)}{Style.RESET_ALL}") + + try: + result = subprocess.run(command_list, capture_output=True, text=True, check=True, shell=False) + output = result.stdout + colored_print(f"{Fore.GREEN}ab execution successful.{Style.RESET_ALL}") + except subprocess.CalledProcessError as e: + output = e.stdout + "\n" + e.stderr # Capture output even on error + colored_print(f"{Fore.RED}Error running benchmark (subprocess.CalledProcessError):{Style.RESET_ALL}") + colored_print(f"{Fore.RED}Return code: {e.returncode}{Style.RESET_ALL}") + colored_print(f"{Fore.RED}Stderr:\n{e.stderr}{Style.RESET_ALL}") + # No early return here - process metrics even if ab failed + except FileNotFoundError: + colored_print(f"{Fore.RED}Error: 'ab' command not found. Is Apache Benchmark installed and in your PATH?{Style.RESET_ALL}") + return {"metrics": None, "outcome": "FAIL", "response_code_counts": response_code_counts} + except Exception as e: + colored_print(f"{Fore.RED}An unexpected error occurred: {e}{Style.RESET_ALL}") + return {"metrics": None, "outcome": "FAIL", "response_code_counts": response_code_counts} + + + # Metrics parsing (same as before) + rps_match = re.search(r"Requests per second:\s+([\d.]+)", output) + time_per_request_mean_match = re.search(r"Time per request:\s+([\d.]+) \[ms\] \(mean\)", output) + time_per_request_sd_match = re.search(r"Time per request:\s+([\d.]+) \[ms\] \(sd\)", output) + time_per_request_median_match = re.search(r"50%\s+([\d.]+)", output) + connect_time_match = re.search(r"Connect:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output) + processing_time_match = re.search(r"Processing:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output) + waiting_time_match = re.search(r"Waiting:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output) + total_time_match = re.search(r"Total:\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", output) + transfer_rate_match = re.search(r"Transfer rate:\s+([\d.]+) \[Kbytes/sec\]", output) + failed_requests_match = re.search(r"Failed requests:\s+(\d+)", output) + completed_requests_match = re.search(r"Completed requests:\s+(\d+)", output) + non_2xx_responses_match = re.search(r"Non-2xx responses:\s+(\d+)", output) + + + metrics = { + "requests_per_second": float(rps_match.group(1)) if rps_match else None, + "time_per_request_mean_ms": float(time_per_request_mean_match.group(1)) if time_per_request_mean_match else None, + "time_per_request_sd_ms": float(time_per_request_sd_match.group(1)) if time_per_request_sd_match else None, + "time_per_request_median_ms": float(time_per_request_median_match.group(1)) if time_per_request_median_match else None, + "connect_time_avg_ms": float(connect_time_match.group(2)) if connect_time_match else None, + "processing_time_avg_ms": float(processing_time_match.group(2)) if processing_time_match else None, + "waiting_time_avg_ms": float(waiting_time_match.group(2)) if waiting_time_match else None, + "total_time_avg_ms": float(total_time_match.group(2)) if total_time_match else None, + "transfer_rate_kb_sec": float(transfer_rate_match.group(1)) if transfer_rate_match else None, + "failed_requests": int(failed_requests_match.group(1)) if failed_requests_match else None, + "completed_requests": int(completed_requests_match.group(1)) if completed_requests_match else None, + "non_2xx_responses": int(non_2xx_responses_match.group(1)) if non_2xx_responses_match else 0, + "raw_output": output + } + + expected_response_code = test_config.get('expected_response_code', 200) + actual_non_2xx_responses = metrics["non_2xx_responses"] + actual_completed_requests = metrics["completed_requests"] + + if expected_response_code == 200: + if actual_non_2xx_responses > 0: + colored_print(f"{Fore.YELLOW}Warning: Expected 200 OK, but found {actual_non_2xx_responses} non-2xx responses.", style=Style.BRIGHT) + outcome = "WARN" + else: + colored_print(f"{Fore.GREEN}Response Code Verification: {Fore.GREEN}200 OK {Fore.WHITE}as expected.", style=Style.BRIGHT) + outcome = "PASS" + elif expected_response_code == 403: + if actual_non_2xx_responses > 0: # Simplified 403 check - rely only on non_2xx count + colored_print(f"{Fore.GREEN}Response Code Verification: {Fore.GREEN}Blocked (non-2xx responses found) {Fore.WHITE}as expected.", style=Style.BRIGHT) + outcome = "PASS" + else: + colored_print(f"{Fore.RED}Error: Expected 403 Forbidden, but got {Fore.RED}200 OK or other success {Fore.WHITE}(no non-2xx responses). WAF rule might not be triggering.", style=Style.BRIGHT) + outcome = "FAIL" + else: + outcome = "WARN" + + return {"metrics": metrics, "outcome": outcome, "response_code_counts": response_code_counts} # Return counts - though counts are not really used now + + +test_suite_config_yaml = """ +tests: + - name: Baseline_Clean_GET_200 + category: Baseline + description: Simple clean GET request, minimal WAF rules active. + url: "http://localhost:8080/api/hello" + ab_options: ["-n", "5000", "-c", "10"] + expected_response_code: 200 + + - name: Clean_Rules_GET_200 + category: Clean Traffic with Rules + description: Clean GET request, with moderate WAF rules active. + url: "http://localhost:8080/api/hello" + ab_options: ["-n", "5000", "-c", "10"] + expected_response_code: 200 + + - name: Attack_SQLi_GET_403 + category: Attack Traffic + description: GET request with SQL Injection payload, expect 403. + url: "http://localhost:8080/api/search?q=';+OR+1=1-- -" + ab_options: ["-n", "1000", "-c", "5"] + expected_response_code: 403 + + - name: Attack_XSS_GET_403 + category: Attack Traffic + description: GET request with XSS payload, expect 403. + url: "http://localhost:8080/api/search?q=" + ab_options: ["-n", "1000", "-c", "5"] + expected_response_code: 403 + + - name: Attack_CmdInj_GET_403 + category: Attack Traffic + description: GET request with Command Injection, expect 403. + url: "http://localhost:8080/api/exec?cmd=;+whoami" + ab_options: ["-n", "1000", "-c", "5"] + expected_response_code: 403 + + - name: Concurrency_Clean_GET_200_High + category: Concurrency Impact + description: Clean GET, high concurrency, 200 OK. + url: "http://localhost:8080/api/hello" + ab_options: ["-n", "5000", "-c", "50"] + expected_response_code: 200 + + - name: Concurrency_Attack_SQLi_403_High + category: Concurrency Impact + description: Attack (SQLi) GET, high concurrency, 403 Forbidden. + url: "http://localhost:8080/api/search?q=';+OR+1=1-- -" + ab_options: ["-n", "1000", "-c", "20"] + expected_response_code: 403 + + - name: Baseline_KeepAlive_200 + category: Baseline + description: Clean GET with Keep-Alive, 200 OK. + url: "http://localhost:8080/api/hello" + ab_options: ["-n", "5000", "-c", "10", "-k"] + expected_response_code: 200 + + - name: Clean_POST_SmallBody_200 + category: Baseline + description: Clean POST request, small body, minimal WAF rules. + url: "http://localhost:8080/api/data" + ab_options: ["-n", "1000", "-c", "10"] + method: POST + body_file: "small_body.txt" + content_type: 'application/json' + expected_response_code: 200 + + - name: Clean_Rules_POST_LargeBody_200 + category: Clean Traffic with Rules + description: Clean POST, large body, moderate WAF rules. + url: "http://localhost:8080/api/upload" + ab_options: ["-n", "500", "-c", "5"] + method: POST + body_file: "large_body.txt" + content_type: 'application/octet-stream' + expected_response_code: 200 + + # --- Extended Tests --- + - name: Attack_PathTraversal_403 + category: Attack Traffic + description: GET request with Path Traversal, expect 403. + url: "http://localhost:8080/api/files?file=../../../../etc/passwd" + ab_options: ["-n", "1000", "-c", "5"] + expected_response_code: 403 + + - name: Baseline_Clean_HEAD_200 + category: Baseline + description: Clean HEAD request, minimal WAF rules active. + url: "http://localhost:8080/api/hello" + ab_options: ["-n", "5000", "-c", "10", "-i"] # -i for HEAD method + expected_response_code: 200 + + - name: Concurrency_Clean_POST_200_High + category: Concurrency Impact + description: Clean POST, high concurrency, 200 OK. + url: "http://localhost:8080/api/data" + ab_options: ["-n", "5000", "-c", "50"] + method: POST + body_file: "small_body.txt" + content_type: 'application/json' + expected_response_code: 200 + + - name: FalsePositive_URL_Keywords_200 + category: False Positive + description: Legitimate URL with SQL keywords, expect 200 OK (no false positive). + url: "http://localhost:8080/api/report?filter=SELECT+name+FROM+users" + ab_options: ["-n", "1000", "-c", "10"] + expected_response_code: 200 + + - name: Attack_LFI_GET_403 + category: Attack Traffic + description: Local File Inclusion (LFI) attack via GET, expect 403. + url: "http://localhost:8080/api/include?file=/etc/passwd" # Simple LFI attempt + ab_options: ["-n", "1000", "-c", "5"] + expected_response_code: 403 + + - name: FalsePositive_Path_200 + category: False Positive + description: Legitimate URL with path-like keywords, expect 200 OK (no false positive). + url: "http://localhost:8080/api/browse/documents/user_manuals" # URL with "path" like structure + ab_options: ["-n", "1000", "-c", "10"] + expected_response_code: 200 + +""" + +test_suite_config = yaml.safe_load(test_suite_config_yaml) + +with open("small_body.txt", "w") as f: + f.write('{"key": "value"}') + +with open("large_body.txt", "wb") as f: + f.write(b"A" * 1024 * 1024) + +with open("sqli_payload.txt", "w") as f: + f.write("username=test&password=';+OR+1=1-- -") +with open("xxe_payload.xml", "w") as f: + f.write(""" + + +]> +&xxe;""") + + +colored_print(f"{Back.GREEN}{Fore.BLACK} --- Benchmark Suite Started --- {Style.RESET_ALL}\n", style=Style.BRIGHT) + +test_results = {} +all_metrics = [] +overall_expected_responses = 0 +overall_unexpected_responses = 0 + +for test_config in test_suite_config['tests']: + result_data = run_benchmark(test_config) + test_results[test_config['name']] = result_data + + if result_data and result_data['metrics']: + metrics = result_data['metrics'] + response_code_counts = result_data['response_code_counts'] + all_metrics.append(metrics) + + colored_print(f"\n{Fore.CYAN}Results for {test_config['name']}:{Style.RESET_ALL}") + colored_print(f" {Fore.BLUE}Requests per second:{Style.RESET_ALL} {metrics['requests_per_second']:.2f}") + colored_print(f" {Fore.BLUE}Mean Time per request:{Style.RESET_ALL} {metrics['time_per_request_mean_ms']:.2f} ms") + if metrics.get('time_per_request_sd_ms') is not None: + colored_print(f" {Fore.BLUE}SD Time per request:{Style.RESET_ALL} {metrics['time_per_request_sd_ms']:.2f} ms") + if metrics.get('time_per_request_median_ms') is not None: + colored_print(f" {Fore.BLUE}Median Time per request:{Style.RESET_ALL} {metrics['time_per_request_median_ms']:.2f} ms") + if metrics.get('connect_time_avg_ms') is not None: + colored_print(f" {Fore.BLUE}Avg Connect Time:{Style.RESET_ALL} {metrics['connect_time_avg_ms']:.2f} ms") + if metrics.get('processing_time_avg_ms') is not None: + colored_print(f" {Fore.BLUE}Avg Processing Time:{Style.RESET_ALL} {metrics['processing_time_avg_ms']:.2f} ms") + if metrics.get('waiting_time_avg_ms') is not None: + colored_print(f" {Fore.BLUE}Avg Waiting Time:{Style.RESET_ALL} {metrics['waiting_time_avg_ms']:.2f} ms") + if metrics.get('total_time_avg_ms') is not None: + colored_print(f" {Fore.BLUE}Avg Total Time:{Style.RESET_ALL} {metrics['total_time_avg_ms']:.2f} ms") + colored_print(f" {Fore.BLUE}Transfer rate:{Style.RESET_ALL} {metrics['transfer_rate_kb_sec']:.2f} KB/sec") + colored_print(f" {Fore.BLUE}Failed requests:{Style.RESET_ALL} {metrics['failed_requests']}") + colored_print(f" {Fore.BLUE}Non-2xx responses:{Style.RESET_ALL} {metrics['non_2xx_responses']}") + # colored_print(f" {Fore.BLUE}Response Code Counts:{Style.RESET_ALL} {dict(response_code_counts)}") # No longer printing empty response code counts + + expected_response_code = test_config['expected_response_code'] + if response_code_counts.get(expected_response_code): # Still keep this for potential future use if we find a way to parse codes + overall_expected_responses += response_code_counts[expected_response_code] + for code, count in response_code_counts.items(): + if code != expected_response_code: + overall_unexpected_responses += count + + + outcome_color = Fore.GREEN if result_data['outcome'] == "PASS" else Fore.YELLOW if result_data['outcome'] == "WARN" else Fore.RED + colored_print(f"\n{Fore.MAGENTA}Test Outcome:{Style.RESET_ALL} {test_config['name']} - {test_config['description']} - {outcome_color}{Style.BRIGHT}{result_data['outcome']}{Style.RESET_ALL}") + + else: + colored_print(f"{Fore.RED}Test {test_config['name']} failed to run.", style=Style.BRIGHT) + colored_print(f"\n{Fore.MAGENTA}Test Outcome:{Style.RESET_ALL} {test_config['name']} - {test_config['description']} - {Fore.RED}{Style.BRIGHT}FAIL{Style.RESET_ALL}") + + +colored_print(f"\n{Back.GREEN}{Fore.BLACK} --- Benchmark Suite Completed --- {Style.RESET_ALL}\n", style=Style.BRIGHT) + +pass_count = 0 +warn_count = 0 +fail_count = 0 +for test_name, result_data in test_results.items(): + if result_data and result_data['outcome'] == "PASS": + pass_count += 1 + elif result_data and result_data['outcome'] == "WARN": + warn_count += 1 + else: + fail_count += 1 + +colored_print(f"{Back.CYAN}{Fore.BLACK} --- Overall Benchmark Summary --- {Style.RESET_ALL}\n", style=Style.BRIGHT) +colored_print(f"{Fore.GREEN}Tests Passed:{Style.RESET_ALL} {pass_count}") +colored_print(f"{Fore.YELLOW}Tests Warned:{Style.RESET_ALL} {warn_count}") +colored_print(f"{Fore.RED}Tests Failed:{Style.RESET_ALL} {fail_count}") +colored_print(f"{Fore.BLUE}Total Tests Run:{Style.RESET_ALL} {len(test_suite_config['tests'])}") + +if all_metrics: + avg_rps = sum(m.get('requests_per_second', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + avg_time_per_request = sum(m.get('time_per_request_mean_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + avg_transfer_rate = sum(m.get('transfer_rate_kb_sec', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + avg_connect_time = sum(m.get('connect_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + avg_processing_time = sum(m.get('processing_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + avg_waiting_time = sum(m.get('waiting_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + avg_total_time = sum(m.get('total_time_avg_ms', 0) or 0 for m in all_metrics) / len(all_metrics) # Handle None with or 0 + + + colored_print(f"\n{Back.CYAN}{Fore.BLACK} --- Average Metrics Across All Tests --- {Style.RESET_ALL}\n", style=Style.BRIGHT) + colored_print(f" {Fore.BLUE}Average Requests per second:{Style.RESET_ALL} {avg_rps:.2f}") + colored_print(f" {Fore.BLUE}Average Mean Time per request:{Style.RESET_ALL} {avg_time_per_request:.2f} ms") + colored_print(f" {Fore.BLUE}Average Transfer rate:{Style.RESET_ALL} {avg_transfer_rate:.2f} KB/sec") + colored_print(f" {Fore.BLUE}Average Connect Time:{Style.RESET_ALL} {avg_connect_time:.2f} ms") + colored_print(f" {Fore.BLUE}Average Processing Time:{Style.RESET_ALL} {avg_processing_time:.2f} ms") + colored_print(f" {Fore.BLUE}Average Waiting Time:{Style.RESET_ALL} {avg_waiting_time:.2f} ms") + colored_print(f" {Fore.BLUE}Average Total Time:{Style.RESET_ALL} {avg_total_time:.2f} ms") +else: + colored_print(f"\n{Fore.YELLOW}No successful tests to calculate averages.{Style.RESET_ALL}") + +total_requests = sum(m.get('completed_requests', 0) or 0 for m in all_metrics) # Handle None here too +if total_requests > 0: + expected_response_percentage = (overall_expected_responses / total_requests) * 100 + unexpected_response_percentage = (overall_unexpected_responses / total_requests) * 100 + colored_print(f"\n{Back.CYAN}{Fore.BLACK} --- Overall Response Summary --- {Style.RESET_ALL}\n", style=Style.BRIGHT) + colored_print(f" {Fore.GREEN}Expected Response Code Count:{Style.RESET_ALL} {overall_expected_responses} ({expected_response_percentage:.2f}%)") + colored_print(f" {Fore.RED}Unexpected Response Code Count:{Style.RESET_ALL} {overall_unexpected_responses} ({unexpected_response_percentage:.2f}%)") + + +print("\nBenchmark Suite Execution Finished.") + +# --- Save benchmark data to benchmark.json --- +benchmark_data_to_save = [] + +# Prepare current run data +current_run_data = { + "timestamp": datetime.datetime.now().isoformat(), + "config": test_suite_config, + "results": test_results, + "summary": { + "pass_count": pass_count, + "warn_count": warn_count, + "fail_count": fail_count, + "avg_rps": avg_rps if all_metrics else None, + "avg_time_per_request": avg_time_per_request if all_metrics else None, + "avg_transfer_rate": avg_transfer_rate if all_metrics else None, + "avg_connect_time": avg_connect_time if all_metrics else None, + "avg_processing_time": avg_processing_time if all_metrics else None, + "avg_waiting_time": avg_waiting_time if all_metrics else None, + "avg_total_time": avg_total_time if all_metrics else None, + "overall_expected_responses": overall_expected_responses, + "overall_unexpected_responses": overall_unexpected_responses, + "total_requests": total_requests + } +} + +benchmark_data.append(current_run_data) + +# Save all benchmark data to json file +with open(benchmark_filename, "w") as f: + json.dump(benchmark_data, f, indent=4) + +colored_print(f"\n{Fore.GREEN}Benchmark data saved to {benchmark_filename}{Style.RESET_ALL}") \ No newline at end of file