mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-04-18 05:27:38 -04:00
276 lines
9.4 KiB
Python
276 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
GitHub adapter for Spacedrive.
|
|
|
|
Indexes issues, pull requests, and comments from GitHub repositories
|
|
using the REST API. Supports incremental sync via `updated_at` cursor.
|
|
|
|
Requires a personal access token with `repo` scope (or fine-grained
|
|
token with Issues and Pull Requests read permissions).
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import urllib.request
|
|
import urllib.error
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
API_BASE = "https://api.github.com"
|
|
|
|
|
|
def log(level: str, message: str):
|
|
print(json.dumps({"log": level, "message": message}), flush=True)
|
|
|
|
|
|
def emit(operation: dict):
|
|
print(json.dumps(operation), flush=True)
|
|
|
|
|
|
def api_get(path: str, token: str, params: dict = None) -> list:
|
|
"""Make a GET request to GitHub API. Handles pagination, returns all results."""
|
|
results = []
|
|
url = f"{API_BASE}{path}"
|
|
|
|
if params:
|
|
query_parts = []
|
|
for k, v in params.items():
|
|
query_parts.append(f"{k}={v}")
|
|
url += "?" + "&".join(query_parts)
|
|
|
|
page = 1
|
|
while True:
|
|
separator = "&" if "?" in url else "?"
|
|
page_url = f"{url}{separator}page={page}&per_page=100"
|
|
|
|
req = urllib.request.Request(page_url)
|
|
req.add_header("Authorization", f"Bearer {token}")
|
|
req.add_header("Accept", "application/vnd.github+json")
|
|
req.add_header("X-GitHub-Api-Version", "2022-11-28")
|
|
req.add_header("User-Agent", "spacedrive-adapter/0.1")
|
|
|
|
try:
|
|
with urllib.request.urlopen(req) as resp:
|
|
data = json.loads(resp.read().decode())
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 403:
|
|
# Rate limited — stop pagination
|
|
log("warn", f"GitHub API rate limited on {path}")
|
|
break
|
|
elif e.code == 404:
|
|
log("warn", f"Not found: {path}")
|
|
return []
|
|
else:
|
|
raise
|
|
|
|
if not isinstance(data, list):
|
|
# Single object response (e.g., /repos/owner/repo)
|
|
return [data]
|
|
|
|
results.extend(data)
|
|
|
|
if len(data) < 100:
|
|
break
|
|
page += 1
|
|
|
|
# Safety limit
|
|
if page > 50:
|
|
log("warn", f"Pagination limit reached for {path}")
|
|
break
|
|
|
|
return results
|
|
|
|
|
|
def parse_iso(dt_str: str) -> str:
|
|
"""Normalize GitHub's ISO 8601 timestamps to UTC."""
|
|
if not dt_str:
|
|
return ""
|
|
try:
|
|
# GitHub returns "2025-01-15T10:30:00Z" format
|
|
s = dt_str.replace("Z", "+00:00")
|
|
dt = datetime.fromisoformat(s)
|
|
return dt.astimezone(timezone.utc).isoformat()
|
|
except (ValueError, TypeError):
|
|
return dt_str
|
|
|
|
|
|
def main():
|
|
try:
|
|
input_data = json.loads(sys.stdin.read())
|
|
except json.JSONDecodeError as e:
|
|
log("error", f"Invalid input JSON: {e}")
|
|
sys.exit(2)
|
|
|
|
config = input_data.get("config", {})
|
|
cursor = input_data.get("cursor")
|
|
|
|
token = config.get("token", "")
|
|
if not token:
|
|
log("error", "Missing required config: token")
|
|
sys.exit(2)
|
|
|
|
repos_str = config.get("repos", "")
|
|
include_prs = config.get("include_prs", True)
|
|
if isinstance(include_prs, str):
|
|
include_prs = include_prs.lower() in ("true", "1", "yes")
|
|
max_items = int(config.get("max_items", 500))
|
|
|
|
# ── Determine repos to index ─────────────────────────────────────────
|
|
if repos_str:
|
|
repo_list = [r.strip() for r in repos_str.split(",") if r.strip()]
|
|
else:
|
|
# Fetch all repos the user has access to
|
|
log("info", "No repos specified, fetching all accessible repos")
|
|
try:
|
|
all_repos = api_get("/user/repos", token, {"sort": "updated", "type": "all"})
|
|
repo_list = [r["full_name"] for r in all_repos if r.get("full_name")]
|
|
except Exception as e:
|
|
log("error", f"Failed to fetch repos: {e}")
|
|
sys.exit(1)
|
|
|
|
log("info", f"Indexing {len(repo_list)} repositories")
|
|
|
|
max_updated = cursor or ""
|
|
total_issues = 0
|
|
total_comments = 0
|
|
|
|
for repo_full_name in repo_list:
|
|
# ── Upsert repository ────────────────────────────────────────────
|
|
try:
|
|
repo_data = api_get(f"/repos/{repo_full_name}", token)
|
|
if not repo_data:
|
|
log("warn", f"Could not fetch repo: {repo_full_name}")
|
|
continue
|
|
repo = repo_data[0]
|
|
except Exception as e:
|
|
log("warn", f"Failed to fetch repo {repo_full_name}: {e}")
|
|
continue
|
|
|
|
repo_id = str(repo.get("id", repo_full_name))
|
|
|
|
emit({
|
|
"upsert": "repository",
|
|
"external_id": repo_id,
|
|
"fields": {
|
|
"name": repo.get("name", ""),
|
|
"full_name": repo.get("full_name", repo_full_name),
|
|
"description": (repo.get("description") or "")[:5000],
|
|
"language": repo.get("language") or "",
|
|
"stars": repo.get("stargazers_count", 0),
|
|
"url": repo.get("html_url", ""),
|
|
}
|
|
})
|
|
|
|
# ── Fetch issues (and PRs if enabled) ────────────────────────────
|
|
params = {
|
|
"state": "all",
|
|
"sort": "updated",
|
|
"direction": "desc",
|
|
}
|
|
if cursor:
|
|
params["since"] = cursor
|
|
|
|
try:
|
|
issues = api_get(f"/repos/{repo_full_name}/issues", token, params)
|
|
except Exception as e:
|
|
log("warn", f"Failed to fetch issues for {repo_full_name}: {e}")
|
|
continue
|
|
|
|
repo_issue_count = 0
|
|
|
|
for item in issues:
|
|
if repo_issue_count >= max_items:
|
|
break
|
|
|
|
is_pr = "pull_request" in item
|
|
if is_pr and not include_prs:
|
|
continue
|
|
|
|
issue_number = item.get("number", 0)
|
|
issue_id = f"{repo_full_name}#{issue_number}"
|
|
|
|
title = item.get("title", "")
|
|
body = item.get("body") or ""
|
|
author = item.get("user", {}).get("login", "") if item.get("user") else ""
|
|
state = item.get("state", "")
|
|
url = item.get("html_url", "")
|
|
created_at = parse_iso(item.get("created_at", ""))
|
|
updated_at = parse_iso(item.get("updated_at", ""))
|
|
closed_at = parse_iso(item.get("closed_at", ""))
|
|
comments_count = item.get("comments", 0)
|
|
|
|
# Labels
|
|
labels = []
|
|
for label in item.get("labels", []):
|
|
if isinstance(label, dict):
|
|
labels.append(label.get("name", ""))
|
|
elif isinstance(label, str):
|
|
labels.append(label)
|
|
labels_str = ", ".join(labels)
|
|
|
|
emit({
|
|
"upsert": "issue",
|
|
"external_id": issue_id,
|
|
"fields": {
|
|
"title": title[:500],
|
|
"body": body[:50000],
|
|
"author": author,
|
|
"state": state,
|
|
"number": issue_number,
|
|
"url": url,
|
|
"is_pr": is_pr,
|
|
"labels": labels_str[:1000],
|
|
"comments_count": comments_count,
|
|
"created_at": created_at,
|
|
"updated_at": updated_at,
|
|
"closed_at": closed_at,
|
|
"repository_id": repo_id,
|
|
}
|
|
})
|
|
total_issues += 1
|
|
repo_issue_count += 1
|
|
|
|
# Track latest update for cursor
|
|
raw_updated = item.get("updated_at", "")
|
|
if raw_updated > max_updated:
|
|
max_updated = raw_updated
|
|
|
|
# ── Fetch comments for this issue ────────────────────────────
|
|
if comments_count > 0:
|
|
try:
|
|
comments = api_get(
|
|
f"/repos/{repo_full_name}/issues/{issue_number}/comments",
|
|
token
|
|
)
|
|
for comment in comments:
|
|
comment_id = str(comment.get("id", ""))
|
|
if not comment_id:
|
|
continue
|
|
|
|
emit({
|
|
"upsert": "comment",
|
|
"external_id": comment_id,
|
|
"fields": {
|
|
"body": (comment.get("body") or "")[:50000],
|
|
"author": comment.get("user", {}).get("login", "") if comment.get("user") else "",
|
|
"created_at": parse_iso(comment.get("created_at", "")),
|
|
"url": comment.get("html_url", ""),
|
|
"issue_id": issue_id,
|
|
}
|
|
})
|
|
total_comments += 1
|
|
except Exception as e:
|
|
log("warn", f"Failed to fetch comments for {issue_id}: {e}")
|
|
|
|
log("info", f"{repo_full_name}: {repo_issue_count} issues/PRs")
|
|
|
|
# Emit cursor
|
|
if max_updated:
|
|
emit({"cursor": max_updated})
|
|
|
|
log("info", f"Synced {total_issues} issues/PRs and {total_comments} comments from {len(repo_list)} repos")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|