mirror of
https://github.com/f-droid/fdroidclient.git
synced 2026-02-08 06:03:06 -05:00
254 lines
8.5 KiB
Python
254 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Convert a Checkstyle XML report to a Code Climate JSON report.
|
|
|
|
Usage examples:
|
|
# Basic (defaults)
|
|
python tools/checkstyle-to-codeclimate.py
|
|
|
|
# Explicit input/output
|
|
python tools/checkstyle-to-codeclimate.py -i build/reports/checkstyle-result.xml -o gl-code-quality-report.json
|
|
|
|
# Using positional file path(s) (new):
|
|
python tools/checkstyle-to-codeclimate.py build/reports/checkstyle-result.xml another-report.xml
|
|
|
|
# Read from stdin (single '-') and write pretty JSON
|
|
cat checkstyle-result.xml | python tools/checkstyle-to-codeclimate.py - -o report.json --pretty
|
|
|
|
# Strip a common base directory from paths (useful for CI where absolute paths leak)
|
|
python tools/checkstyle-to-codeclimate.py --base-dir "$PWD" -i build/reports/checkstyle-result.xml
|
|
|
|
If no input is provided via positional arguments or -i/--input, defaults to 'checkstyle-result.xml'.
|
|
If --output is omitted, defaults to 'gl-code-quality-report.json'.
|
|
|
|
The resulting JSON is an array of issue objects compatible with GitLab's
|
|
Code Quality widget (Code Climate format subset).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
import pathlib
|
|
import xml.etree.ElementTree as ET
|
|
from typing import Dict, List, Any, Iterable, Iterator, Optional, Union
|
|
|
|
# NOTE: Keep this script compatible with Python 3.8+ (avoid PEP 604 union syntax).
|
|
|
|
# Severity mapping from Checkstyle -> Code Climate (GitLab)
|
|
CONVERSION_MAP: Dict[str, str] = {
|
|
"ignore": "info",
|
|
"info": "minor",
|
|
"warning": "major",
|
|
"error": "critical",
|
|
}
|
|
|
|
Issue = Dict[str, Any]
|
|
|
|
|
|
def parse_args(argv: List[str]) -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Convert Checkstyle XML to Code Climate JSON"
|
|
)
|
|
parser.add_argument(
|
|
"-i",
|
|
"--input",
|
|
metavar="FILE",
|
|
help="Path to checkstyle XML input file (deprecated in favor of positional FILEs).",
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
metavar="FILE",
|
|
default="gl-code-quality-report.json",
|
|
help="Path to output JSON file (default: gl-code-quality-report.json)",
|
|
)
|
|
parser.add_argument(
|
|
"--pretty", action="store_true", help="Pretty-print JSON output"
|
|
)
|
|
parser.add_argument(
|
|
"--base-dir",
|
|
metavar="DIR",
|
|
help="If set, emitted issue paths are made relative to this directory when possible.",
|
|
)
|
|
parser.add_argument(
|
|
"files",
|
|
nargs="*",
|
|
metavar="FILE",
|
|
help="One or more Checkstyle XML files. Use '-' to read from stdin (only once).",
|
|
)
|
|
return parser.parse_args(argv)
|
|
|
|
|
|
def compute_fingerprint(
|
|
path: str, line: str, message: str, severity_raw: str, source: Optional[str]
|
|
) -> str:
|
|
"""Compute a stable fingerprint for an issue.
|
|
|
|
Combining multiple attributes reduces collision risk across similar messages.
|
|
"""
|
|
base = f"{path}:{line}:{severity_raw}:{message}:{source or ''}".encode("utf-8")
|
|
return hashlib.sha256(base).hexdigest()
|
|
|
|
|
|
def _relativize(path: str, base_dir: Optional[str]) -> str:
|
|
if not base_dir:
|
|
return path
|
|
try:
|
|
p = pathlib.Path(path)
|
|
base = pathlib.Path(base_dir)
|
|
return str(p.relative_to(base))
|
|
except Exception:
|
|
return path # Fallback to original if not relative
|
|
|
|
|
|
def _stream_issues_from_file(
|
|
path: pathlib.Path, base_dir: Optional[str]
|
|
) -> Iterator[Issue]:
|
|
"""Memory-efficient streaming parser using iterparse.
|
|
|
|
We use start/end events to track the current <file> context and emit issues
|
|
when encountering <error> end events. Elements are cleared after their
|
|
subtrees are processed to free memory for very large reports.
|
|
"""
|
|
current_file: Optional[str] = None
|
|
rel_file: Optional[str] = None
|
|
for event, elem in ET.iterparse(str(path), events=("start", "end")):
|
|
if event == "start" and elem.tag == "file":
|
|
current_file = elem.get("name") or None
|
|
if current_file:
|
|
rel_file = _relativize(current_file, base_dir)
|
|
elif event == "end" and elem.tag == "error" and current_file and rel_file:
|
|
line = elem.get("line") or "1"
|
|
message = elem.get("message") or "No description provided by Checkstyle."
|
|
severity_raw = (elem.get("severity") or "").lower()
|
|
severity = CONVERSION_MAP.get(severity_raw, "info")
|
|
source = elem.get("source")
|
|
fingerprint = compute_fingerprint(
|
|
rel_file, line, message, severity_raw, source
|
|
)
|
|
yield {
|
|
"description": message,
|
|
"fingerprint": fingerprint,
|
|
"severity": severity,
|
|
"location": {
|
|
"path": rel_file,
|
|
"lines": {"begin": int(line) if line.isdigit() else 1},
|
|
},
|
|
}
|
|
elem.clear()
|
|
elif event == "end" and elem.tag == "file":
|
|
elem.clear()
|
|
current_file = None
|
|
rel_file = None
|
|
|
|
|
|
def _issues_from_stdin(base_dir: Optional[str]) -> List[Issue]:
|
|
data = sys.stdin.read()
|
|
try:
|
|
root = ET.fromstring(data)
|
|
except ET.ParseError as e:
|
|
raise SystemExit(f"Failed to parse XML from stdin: {e}") from e
|
|
issues: List[Issue] = []
|
|
for file_elem in root.findall("file"):
|
|
file_path = file_elem.get("name")
|
|
if not file_path:
|
|
continue
|
|
rel_path = _relativize(file_path, base_dir)
|
|
for err in file_elem.findall("error"):
|
|
line = err.get("line") or "1"
|
|
message = err.get("message") or "No description provided by Checkstyle."
|
|
severity_raw = (err.get("severity") or "").lower()
|
|
severity = CONVERSION_MAP.get(severity_raw, "info")
|
|
source = err.get("source")
|
|
fingerprint = compute_fingerprint(
|
|
rel_path, line, message, severity_raw, source
|
|
)
|
|
issues.append(
|
|
{
|
|
"description": message,
|
|
"fingerprint": fingerprint,
|
|
"severity": severity,
|
|
"location": {
|
|
"path": rel_path,
|
|
"lines": {"begin": int(line) if line.isdigit() else 1},
|
|
},
|
|
}
|
|
)
|
|
return issues
|
|
|
|
|
|
def convert_paths(
|
|
inputs: Iterable[Union[pathlib.Path, str]], base_dir: Optional[str]
|
|
) -> List[Issue]:
|
|
issues: List[Issue] = []
|
|
for inp in inputs:
|
|
if inp == "-":
|
|
issues.extend(_issues_from_stdin(base_dir))
|
|
continue
|
|
path = pathlib.Path(str(inp))
|
|
if not path.is_file():
|
|
raise FileNotFoundError(f"Input file not found: {path}")
|
|
try:
|
|
for issue in _stream_issues_from_file(path, base_dir):
|
|
issues.append(issue)
|
|
except ET.ParseError as e:
|
|
raise SystemExit(f"Failed to parse XML '{path}': {e}") from e
|
|
return issues
|
|
|
|
|
|
def write_output(issues: List[Issue], output_path: pathlib.Path, pretty: bool) -> None:
|
|
indent = 2 if pretty else None
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with output_path.open("w", encoding="utf-8") as f:
|
|
json.dump(issues, f, indent=indent)
|
|
if pretty:
|
|
f.write("\n")
|
|
|
|
|
|
def main(argv: List[str]) -> int:
|
|
args = parse_args(argv)
|
|
|
|
# Resolve input list precedence: positional > -i > default
|
|
input_files: List[str] = []
|
|
if args.files:
|
|
input_files.extend(args.files)
|
|
elif args.input:
|
|
input_files.append(args.input)
|
|
else:
|
|
input_files.append("checkstyle-result.xml")
|
|
|
|
# Basic validation: only one '-' allowed
|
|
if input_files.count("-") > 1:
|
|
print("Only one stdin ('-') source allowed.", file=sys.stderr)
|
|
return 4
|
|
|
|
base_dir = args.base_dir
|
|
if base_dir:
|
|
base_dir = os.path.abspath(base_dir)
|
|
|
|
try:
|
|
issues = convert_paths(input_files, base_dir)
|
|
except FileNotFoundError as e:
|
|
print(str(e), file=sys.stderr)
|
|
return 2
|
|
except SystemExit as e: # parse error forwarded
|
|
print(str(e), file=sys.stderr)
|
|
return 3
|
|
except Exception as e: # unexpected
|
|
print(f"Unexpected error: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
output_path = pathlib.Path(args.output)
|
|
write_output(issues, output_path, args.pretty)
|
|
|
|
print(
|
|
f"Converted {len(issues)} issue(s) from {len(input_files)} input file(s) -> '{output_path}'."
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__": # pragma: no cover
|
|
raise SystemExit(main(sys.argv[1:]))
|