Create get_spiderlabs_rules.py

2025-12-23 22:27:46 -05:00 · 2025-01-13 12:46:33 +01:00
parent d3343d9b06
commit 52ec2640e0
1 changed files with 119 additions and 0 deletions
--- a/get_spiderlabs_rules.py
+++ b/get_spiderlabs_rules.py
@@ -0,0 +1,119 @@
+import requests
+import re
+import json
+import time
+import logging
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+def extract_rules(rule_text):
+    """
+    Extracts individual ModSecurity rules from rule file content using regex.
+    
+    Args:
+        rule_text (str): Content of ModSecurity rule file
+        
+    Returns:
+        list: List of dictionaries containing parsed rule information
+    """
+    rules = []
+    # Regex pattern to match ModSecurity SecRule directives
+    rule_pattern = r'SecRule\s+([^"]*)"([^"]+)"\s*(\\\s*\n\s*.*?|.*?)(?=\s*SecRule|\s*$)'
+    
+    for match in re.finditer(rule_pattern, rule_text, re.MULTILINE | re.DOTALL):
+        try:
+            variables, pattern, actions = match.groups()
+            
+            # Extract key rule properties using regex
+            rule_id = re.search(r'id:(\d+)', actions)
+            severity = re.search(r'severity:\'?([^,\'\s]+)', actions)
+            action = re.search(r'action:\'?([^,\'\s]+)', actions)
+            phase = re.search(r'phase:(\d+)', actions)
+            description = re.search(r'msg:\'?([^\']+)\'', actions)
+            
+            if not rule_id:
+                continue
+                
+            # Handle special characters in pattern
+            pattern = pattern.replace('[CDATA[', '\\[CDATA\\[')
+            
+            # Validate regex pattern
+            try:
+                re.compile(pattern)
+            except re.error:
+                logging.warning(f"Invalid regex pattern in rule {rule_id.group(1)}: {pattern}")
+                continue
+                
+            # Extract targeted variables from rule
+            targets = []
+            if variables:
+                # List of possible ModSecurity variables to check for
+                for target in ["ARGS", "BODY", "URL", "HEADERS", "REQUEST_HEADERS", 
+                             "RESPONSE_HEADERS", "REQUEST_COOKIES", "USER_AGENT", 
+                             "CONTENT_TYPE", "X-FORWARDED-FOR", "X-REAL-IP"]:
+                    if target in variables.upper():
+                        targets.append(target)
+            
+            # Set default values if properties are missing
+            severity_val = severity.group(1) if severity else "LOW"
+            action_val = action.group(1) if action else "log"
+            description_val = description.group(1) if description else "No description provided."
+            
+            # Calculate rule score based on severity and action
+            score = 0 if action_val == "pass" else \
+                    5 if action_val == "block" else \
+                    4 if severity_val.upper() == "HIGH" else \
+                    3 if severity_val.upper() == "MEDIUM" else 1
+            
+            # Create rule dictionary with extracted information
+            rule = {
+                "id": rule_id.group(1),
+                "phase": int(phase.group(1)) if phase else 2,
+                "pattern": pattern,
+                "targets": targets,
+                "severity": severity_val,
+                "action": action_val,
+                "score": score,
+                "description": description_val
+            }
+            rules.append(rule)
+            
+        except (AttributeError, ValueError) as e:
+            continue
+            
+    return rules
+
+def download_spiderlabs_rules(output_path):
+    """Downloads and processes SpiderLabs ModSecurity rules."""
+    repo_url = "SpiderLabs/owasp-modsecurity-crs"
+    rules_dir = "rules"
+    all_rules = []
+    headers = {}
+    
+    try:
+        api_url = f"https://api.github.com/repos/{repo_url}/contents/{rules_dir}"
+        response = requests.get(api_url, headers=headers)
+        response.raise_for_status()
+        
+        for file in response.json():
+            if not file['name'].endswith('.conf'):
+                continue
+            
+            time.sleep(1)
+            response = requests.get(file["download_url"], headers=headers)
+            logging.info(f"Processing rule file: {file['name']}")
+            
+            rules = extract_rules(response.text)
+            all_rules.extend(rules)
+
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Error: {e}")
+        return
+        
+    with open(output_path, 'w') as f:
+        json.dump(all_rules, f, indent=2)
+    logging.info(f"Saved {len(all_rules)} rules to {output_path}")
+
+if __name__ == "__main__":
+    output_path = "spiderlabs_rules.json"
+    download_spiderlabs_rules(output_path)