Create get_spiderlabs_rules.py

This commit is contained in:
fab
2025-01-13 12:46:33 +01:00
committed by GitHub
parent d3343d9b06
commit 52ec2640e0

119
get_spiderlabs_rules.py Normal file
View File

@@ -0,0 +1,119 @@
import requests
import re
import json
import time
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def extract_rules(rule_text):
"""
Extracts individual ModSecurity rules from rule file content using regex.
Args:
rule_text (str): Content of ModSecurity rule file
Returns:
list: List of dictionaries containing parsed rule information
"""
rules = []
# Regex pattern to match ModSecurity SecRule directives
rule_pattern = r'SecRule\s+([^"]*)"([^"]+)"\s*(\\\s*\n\s*.*?|.*?)(?=\s*SecRule|\s*$)'
for match in re.finditer(rule_pattern, rule_text, re.MULTILINE | re.DOTALL):
try:
variables, pattern, actions = match.groups()
# Extract key rule properties using regex
rule_id = re.search(r'id:(\d+)', actions)
severity = re.search(r'severity:\'?([^,\'\s]+)', actions)
action = re.search(r'action:\'?([^,\'\s]+)', actions)
phase = re.search(r'phase:(\d+)', actions)
description = re.search(r'msg:\'?([^\']+)\'', actions)
if not rule_id:
continue
# Handle special characters in pattern
pattern = pattern.replace('[CDATA[', '\\[CDATA\\[')
# Validate regex pattern
try:
re.compile(pattern)
except re.error:
logging.warning(f"Invalid regex pattern in rule {rule_id.group(1)}: {pattern}")
continue
# Extract targeted variables from rule
targets = []
if variables:
# List of possible ModSecurity variables to check for
for target in ["ARGS", "BODY", "URL", "HEADERS", "REQUEST_HEADERS",
"RESPONSE_HEADERS", "REQUEST_COOKIES", "USER_AGENT",
"CONTENT_TYPE", "X-FORWARDED-FOR", "X-REAL-IP"]:
if target in variables.upper():
targets.append(target)
# Set default values if properties are missing
severity_val = severity.group(1) if severity else "LOW"
action_val = action.group(1) if action else "log"
description_val = description.group(1) if description else "No description provided."
# Calculate rule score based on severity and action
score = 0 if action_val == "pass" else \
5 if action_val == "block" else \
4 if severity_val.upper() == "HIGH" else \
3 if severity_val.upper() == "MEDIUM" else 1
# Create rule dictionary with extracted information
rule = {
"id": rule_id.group(1),
"phase": int(phase.group(1)) if phase else 2,
"pattern": pattern,
"targets": targets,
"severity": severity_val,
"action": action_val,
"score": score,
"description": description_val
}
rules.append(rule)
except (AttributeError, ValueError) as e:
continue
return rules
def download_spiderlabs_rules(output_path):
"""Downloads and processes SpiderLabs ModSecurity rules."""
repo_url = "SpiderLabs/owasp-modsecurity-crs"
rules_dir = "rules"
all_rules = []
headers = {}
try:
api_url = f"https://api.github.com/repos/{repo_url}/contents/{rules_dir}"
response = requests.get(api_url, headers=headers)
response.raise_for_status()
for file in response.json():
if not file['name'].endswith('.conf'):
continue
time.sleep(1)
response = requests.get(file["download_url"], headers=headers)
logging.info(f"Processing rule file: {file['name']}")
rules = extract_rules(response.text)
all_rules.extend(rules)
except requests.exceptions.RequestException as e:
logging.error(f"Error: {e}")
return
with open(output_path, 'w') as f:
json.dump(all_rules, f, indent=2)
logging.info(f"Saved {len(all_rules)} rules to {output_path}")
if __name__ == "__main__":
output_path = "spiderlabs_rules.json"
download_spiderlabs_rules(output_path)