#!/usr/bin/env python3 """ GitHub adapter for Spacedrive. Indexes issues, pull requests, and comments from GitHub repositories using the REST API. Supports incremental sync via `updated_at` cursor. Requires a personal access token with `repo` scope (or fine-grained token with Issues and Pull Requests read permissions). """ import json import sys import urllib.request import urllib.error from datetime import datetime, timezone API_BASE = "https://api.github.com" def log(level: str, message: str): print(json.dumps({"log": level, "message": message}), flush=True) def emit(operation: dict): print(json.dumps(operation), flush=True) def api_get(path: str, token: str, params: dict = None) -> list: """Make a GET request to GitHub API. Handles pagination, returns all results.""" results = [] url = f"{API_BASE}{path}" if params: query_parts = [] for k, v in params.items(): query_parts.append(f"{k}={v}") url += "?" + "&".join(query_parts) page = 1 while True: separator = "&" if "?" in url else "?" page_url = f"{url}{separator}page={page}&per_page=100" req = urllib.request.Request(page_url) req.add_header("Authorization", f"Bearer {token}") req.add_header("Accept", "application/vnd.github+json") req.add_header("X-GitHub-Api-Version", "2022-11-28") req.add_header("User-Agent", "spacedrive-adapter/0.1") try: with urllib.request.urlopen(req) as resp: data = json.loads(resp.read().decode()) except urllib.error.HTTPError as e: if e.code == 403: # Rate limited — stop pagination log("warn", f"GitHub API rate limited on {path}") break elif e.code == 404: log("warn", f"Not found: {path}") return [] else: raise if not isinstance(data, list): # Single object response (e.g., /repos/owner/repo) return [data] results.extend(data) if len(data) < 100: break page += 1 # Safety limit if page > 50: log("warn", f"Pagination limit reached for {path}") break return results def parse_iso(dt_str: str) -> str: """Normalize GitHub's ISO 8601 timestamps to UTC.""" if not dt_str: return "" try: # GitHub returns "2025-01-15T10:30:00Z" format s = dt_str.replace("Z", "+00:00") dt = datetime.fromisoformat(s) return dt.astimezone(timezone.utc).isoformat() except (ValueError, TypeError): return dt_str def main(): try: input_data = json.loads(sys.stdin.read()) except json.JSONDecodeError as e: log("error", f"Invalid input JSON: {e}") sys.exit(2) config = input_data.get("config", {}) cursor = input_data.get("cursor") token = config.get("token", "") if not token: log("error", "Missing required config: token") sys.exit(2) repos_str = config.get("repos", "") include_prs = config.get("include_prs", True) if isinstance(include_prs, str): include_prs = include_prs.lower() in ("true", "1", "yes") max_items = int(config.get("max_items", 500)) # ── Determine repos to index ───────────────────────────────────────── if repos_str: repo_list = [r.strip() for r in repos_str.split(",") if r.strip()] else: # Fetch all repos the user has access to log("info", "No repos specified, fetching all accessible repos") try: all_repos = api_get("/user/repos", token, {"sort": "updated", "type": "all"}) repo_list = [r["full_name"] for r in all_repos if r.get("full_name")] except Exception as e: log("error", f"Failed to fetch repos: {e}") sys.exit(1) log("info", f"Indexing {len(repo_list)} repositories") max_updated = cursor or "" total_issues = 0 total_comments = 0 for repo_full_name in repo_list: # ── Upsert repository ──────────────────────────────────────────── try: repo_data = api_get(f"/repos/{repo_full_name}", token) if not repo_data: log("warn", f"Could not fetch repo: {repo_full_name}") continue repo = repo_data[0] except Exception as e: log("warn", f"Failed to fetch repo {repo_full_name}: {e}") continue repo_id = str(repo.get("id", repo_full_name)) emit({ "upsert": "repository", "external_id": repo_id, "fields": { "name": repo.get("name", ""), "full_name": repo.get("full_name", repo_full_name), "description": (repo.get("description") or "")[:5000], "language": repo.get("language") or "", "stars": repo.get("stargazers_count", 0), "url": repo.get("html_url", ""), } }) # ── Fetch issues (and PRs if enabled) ──────────────────────────── params = { "state": "all", "sort": "updated", "direction": "desc", } if cursor: params["since"] = cursor try: issues = api_get(f"/repos/{repo_full_name}/issues", token, params) except Exception as e: log("warn", f"Failed to fetch issues for {repo_full_name}: {e}") continue repo_issue_count = 0 for item in issues: if repo_issue_count >= max_items: break is_pr = "pull_request" in item if is_pr and not include_prs: continue issue_number = item.get("number", 0) issue_id = f"{repo_full_name}#{issue_number}" title = item.get("title", "") body = item.get("body") or "" author = item.get("user", {}).get("login", "") if item.get("user") else "" state = item.get("state", "") url = item.get("html_url", "") created_at = parse_iso(item.get("created_at", "")) updated_at = parse_iso(item.get("updated_at", "")) closed_at = parse_iso(item.get("closed_at", "")) comments_count = item.get("comments", 0) # Labels labels = [] for label in item.get("labels", []): if isinstance(label, dict): labels.append(label.get("name", "")) elif isinstance(label, str): labels.append(label) labels_str = ", ".join(labels) emit({ "upsert": "issue", "external_id": issue_id, "fields": { "title": title[:500], "body": body[:50000], "author": author, "state": state, "number": issue_number, "url": url, "is_pr": is_pr, "labels": labels_str[:1000], "comments_count": comments_count, "created_at": created_at, "updated_at": updated_at, "closed_at": closed_at, "repository_id": repo_id, } }) total_issues += 1 repo_issue_count += 1 # Track latest update for cursor raw_updated = item.get("updated_at", "") if raw_updated > max_updated: max_updated = raw_updated # ── Fetch comments for this issue ──────────────────────────── if comments_count > 0: try: comments = api_get( f"/repos/{repo_full_name}/issues/{issue_number}/comments", token ) for comment in comments: comment_id = str(comment.get("id", "")) if not comment_id: continue emit({ "upsert": "comment", "external_id": comment_id, "fields": { "body": (comment.get("body") or "")[:50000], "author": comment.get("user", {}).get("login", "") if comment.get("user") else "", "created_at": parse_iso(comment.get("created_at", "")), "url": comment.get("html_url", ""), "issue_id": issue_id, } }) total_comments += 1 except Exception as e: log("warn", f"Failed to fetch comments for {issue_id}: {e}") log("info", f"{repo_full_name}: {repo_issue_count} issues/PRs") # Emit cursor if max_updated: emit({"cursor": max_updated}) log("info", f"Synced {total_issues} issues/PRs and {total_comments} comments from {len(repo_list)} repos") if __name__ == "__main__": main()