mirror of
https://github.com/nicotsx/zerobyte.git
synced 2026-04-26 18:08:14 -04:00
feat(backups): ensure volume readiness before backup (#815)
* feat(backups): ensure volume readiness before backup Backup preflight checks were relying on a db cached status. Now a real volume healthcheck is performed before backing up. Closes #811 #695 * chore: fix ci
This commit is contained in:
@@ -17,10 +17,7 @@ export class VolumeHealthCheckJob extends Job {
|
||||
for (const volume of volumes) {
|
||||
try {
|
||||
await withContext({ organizationId: volume.organizationId }, async () => {
|
||||
const { status } = await volumeService.checkHealth(volume.shortId);
|
||||
if (status === "error" && volume.autoRemount) {
|
||||
await volumeService.mountVolume(volume.shortId);
|
||||
}
|
||||
await volumeService.ensureHealthyVolume(volume.shortId);
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`Health check failed for volume ${volume.name}:`, error);
|
||||
|
||||
@@ -21,6 +21,8 @@ import { notificationsService } from "~/server/modules/notifications/notificatio
|
||||
import { agentManager } from "~/server/modules/agents/agents-manager";
|
||||
import { createAgentBackupMocks } from "~/test/helpers/agent-mock";
|
||||
import { getScheduleByIdOrShortId } from "../helpers/backup-schedule-lookups";
|
||||
import { volumeService } from "~/server/modules/volumes/volume.service";
|
||||
import { db } from "~/server/db/db";
|
||||
|
||||
const setup = () => {
|
||||
const resticBackupMock = vi.fn((_: SafeSpawnParams) =>
|
||||
@@ -47,6 +49,31 @@ const setup = () => {
|
||||
vi.spyOn(agentManager, "runBackup").mockImplementation(runBackupMock);
|
||||
vi.spyOn(agentManager, "cancelBackup").mockImplementation(cancelBackupMock);
|
||||
vi.spyOn(context, "getOrganizationId").mockReturnValue(TEST_ORG_ID);
|
||||
const ensureHealthyVolumeMock = vi.spyOn(volumeService, "ensureHealthyVolume").mockImplementation(async (shortId) => {
|
||||
const volume = await db.query.volumesTable.findFirst({
|
||||
where: {
|
||||
AND: [{ shortId: { eq: shortId } }, { organizationId: TEST_ORG_ID }],
|
||||
},
|
||||
});
|
||||
|
||||
if (!volume) {
|
||||
throw new NotFoundError("Volume not found");
|
||||
}
|
||||
|
||||
if (volume.status !== "mounted") {
|
||||
return {
|
||||
ready: false as const,
|
||||
volume,
|
||||
reason: "Volume is not mounted",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
ready: true as const,
|
||||
volume,
|
||||
remounted: false,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
resticBackupMock,
|
||||
@@ -55,6 +82,7 @@ const setup = () => {
|
||||
runBackupMock,
|
||||
cancelBackupMock,
|
||||
refreshStatsMock,
|
||||
ensureHealthyVolumeMock,
|
||||
};
|
||||
};
|
||||
|
||||
@@ -85,6 +113,50 @@ describe("backup execution - validation failures", () => {
|
||||
expect(resticBackupMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("runs a preflight volume health check before starting a backup", async () => {
|
||||
setup();
|
||||
const volume = await createTestVolume();
|
||||
const repository = await createTestRepository();
|
||||
const schedule = await createTestBackupSchedule({
|
||||
volumeId: volume.id,
|
||||
repositoryId: repository.id,
|
||||
});
|
||||
const ensureHealthyVolumeSpy = vi.spyOn(volumeService, "ensureHealthyVolume").mockResolvedValue({
|
||||
ready: true,
|
||||
volume,
|
||||
remounted: false,
|
||||
});
|
||||
|
||||
const result = await backupsService.validateBackupExecution(schedule.id);
|
||||
|
||||
expect(result.type).toBe("success");
|
||||
expect(ensureHealthyVolumeSpy).toHaveBeenCalledWith(volume.shortId);
|
||||
});
|
||||
|
||||
test("fails validation when the preflight health check cannot recover the volume", async () => {
|
||||
const { resticBackupMock } = setup();
|
||||
const volume = await createTestVolume();
|
||||
const repository = await createTestRepository();
|
||||
const schedule = await createTestBackupSchedule({
|
||||
volumeId: volume.id,
|
||||
repositoryId: repository.id,
|
||||
});
|
||||
vi.spyOn(volumeService, "ensureHealthyVolume").mockResolvedValue({
|
||||
ready: false,
|
||||
volume: { ...volume, status: "error", lastError: "stale mount" },
|
||||
reason: "stale mount",
|
||||
});
|
||||
|
||||
const result = await backupsService.validateBackupExecution(schedule.id);
|
||||
|
||||
expect(result.type).toBe("failure");
|
||||
if (result.type === "failure") {
|
||||
expect(result.error).toBeInstanceOf(BadRequestError);
|
||||
expect(result.error.message).toBe("stale mount");
|
||||
}
|
||||
expect(resticBackupMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("should fail backup when volume does not exist", async () => {
|
||||
// arrange
|
||||
setup();
|
||||
|
||||
@@ -17,6 +17,8 @@ import { repositoriesService } from "~/server/modules/repositories/repositories.
|
||||
import { agentManager } from "~/server/modules/agents/agents-manager";
|
||||
import { createAgentBackupMocks } from "~/test/helpers/agent-mock";
|
||||
import { getScheduleByIdOrShortId } from "../helpers/backup-schedule-lookups";
|
||||
import { volumeService } from "~/server/modules/volumes/volume.service";
|
||||
import { NotFoundError } from "http-errors-enhanced";
|
||||
|
||||
const setup = () => {
|
||||
const resticBackupMock = vi.fn((_: SafeSpawnParams) => Promise.resolve({ exitCode: 0, summary: "", error: "" }));
|
||||
@@ -36,6 +38,31 @@ const setup = () => {
|
||||
vi.spyOn(agentManager, "runBackup").mockImplementation(runBackupMock);
|
||||
vi.spyOn(agentManager, "cancelBackup").mockImplementation(cancelBackupMock);
|
||||
vi.spyOn(context, "getOrganizationId").mockReturnValue(TEST_ORG_ID);
|
||||
vi.spyOn(volumeService, "ensureHealthyVolume").mockImplementation(async (shortId) => {
|
||||
const volume = await db.query.volumesTable.findFirst({
|
||||
where: {
|
||||
AND: [{ shortId: { eq: shortId } }, { organizationId: TEST_ORG_ID }],
|
||||
},
|
||||
});
|
||||
|
||||
if (!volume) {
|
||||
throw new NotFoundError("Volume not found");
|
||||
}
|
||||
|
||||
if (volume.status !== "mounted") {
|
||||
return {
|
||||
ready: false as const,
|
||||
volume,
|
||||
reason: "Volume is not mounted",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
ready: true as const,
|
||||
volume,
|
||||
remounted: false,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
resticBackupMock,
|
||||
|
||||
@@ -12,6 +12,7 @@ import { calculateNextRun } from "../backup.helpers";
|
||||
import { scheduleQueries } from "../backups.queries";
|
||||
import type { BackupExecutionProgress } from "../../agents/agents-manager";
|
||||
import { repositoriesService } from "../../repositories/repositories.service";
|
||||
import { volumeService } from "../../volumes/volume.service";
|
||||
import { copyToMirrors, runForget } from "./backup-maintenance";
|
||||
|
||||
export interface BackupContext {
|
||||
@@ -71,17 +72,19 @@ export async function validateBackupExecution(scheduleId: number, manual = false
|
||||
return { type: "failure", error: new NotFoundError("Repository not found"), partialContext: { schedule, volume } };
|
||||
}
|
||||
|
||||
if (volume.status !== "mounted") {
|
||||
const volumeReadiness = await volumeService.ensureHealthyVolume(volume.shortId);
|
||||
|
||||
if (!volumeReadiness.ready) {
|
||||
return {
|
||||
type: "failure",
|
||||
error: new BadRequestError("Volume is not mounted"),
|
||||
partialContext: { schedule, volume, repository },
|
||||
error: new BadRequestError(volumeReadiness.reason),
|
||||
partialContext: { schedule, volume: volumeReadiness.volume, repository },
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: "success",
|
||||
context: { schedule, volume, repository, organizationId },
|
||||
context: { schedule, volume: volumeReadiness.volume, repository, organizationId },
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { afterEach, describe, expect, test, vi } from "vitest";
|
||||
import { volumeService } from "../volume.service";
|
||||
import { db } from "~/server/db/db";
|
||||
import { volumesTable } from "~/server/db/schema";
|
||||
@@ -9,6 +9,12 @@ import path from "node:path";
|
||||
import { createTestSession } from "~/test/helpers/auth";
|
||||
import { withContext } from "~/server/core/request-context";
|
||||
import { asShortId } from "~/server/utils/branded";
|
||||
import { createTestVolume } from "~/test/helpers/volume";
|
||||
import * as backendModule from "../../backends/backend";
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("volumeService.getVolume", () => {
|
||||
test("should find volume by shortId", async () => {
|
||||
@@ -124,3 +130,84 @@ describe("volumeService.listFiles security", () => {
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("volumeService.ensureHealthyVolume", () => {
|
||||
test("returns ready when the mounted volume passes its health check", async () => {
|
||||
const { organizationId, user } = await createTestSession();
|
||||
const volume = await createTestVolume({ organizationId, status: "mounted" });
|
||||
const mount = vi.fn().mockResolvedValue({ status: "mounted" });
|
||||
const checkHealth = vi.fn().mockResolvedValue({ status: "mounted" });
|
||||
|
||||
vi.spyOn(backendModule, "createVolumeBackend").mockImplementation(() => ({
|
||||
mount,
|
||||
unmount: vi.fn().mockResolvedValue({ status: "unmounted" }),
|
||||
checkHealth,
|
||||
}));
|
||||
|
||||
await withContext({ organizationId, userId: user.id }, async () => {
|
||||
const result = await volumeService.ensureHealthyVolume(volume.shortId);
|
||||
|
||||
expect(result).toEqual({
|
||||
ready: true,
|
||||
volume: expect.objectContaining({ id: volume.id, status: "mounted", lastError: null }),
|
||||
remounted: false,
|
||||
});
|
||||
expect(checkHealth).toHaveBeenCalledOnce();
|
||||
expect(mount).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
test("auto-remounts when the mounted volume fails its health check", async () => {
|
||||
const { organizationId, user } = await createTestSession();
|
||||
const volume = await createTestVolume({ organizationId, status: "mounted", autoRemount: true });
|
||||
const mount = vi.fn().mockResolvedValue({ status: "mounted" });
|
||||
const checkHealth = vi.fn().mockResolvedValue({ status: "error", error: "stale mount" });
|
||||
|
||||
vi.spyOn(backendModule, "createVolumeBackend").mockImplementation(() => ({
|
||||
mount,
|
||||
unmount: vi.fn().mockResolvedValue({ status: "unmounted" }),
|
||||
checkHealth,
|
||||
}));
|
||||
|
||||
await withContext({ organizationId, userId: user.id }, async () => {
|
||||
const result = await volumeService.ensureHealthyVolume(volume.shortId);
|
||||
|
||||
expect(result).toEqual({
|
||||
ready: true,
|
||||
volume: expect.objectContaining({ id: volume.id, status: "mounted", lastError: null }),
|
||||
remounted: true,
|
||||
});
|
||||
expect(checkHealth).toHaveBeenCalledOnce();
|
||||
expect(mount).toHaveBeenCalledOnce();
|
||||
|
||||
const updatedVolume = await db.query.volumesTable.findFirst({ where: { id: volume.id } });
|
||||
expect(updatedVolume?.status).toBe("mounted");
|
||||
expect(updatedVolume?.lastError).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
test("returns not ready when the health check fails and auto-remount is disabled", async () => {
|
||||
const { organizationId, user } = await createTestSession();
|
||||
const volume = await createTestVolume({ organizationId, status: "mounted", autoRemount: false });
|
||||
const mount = vi.fn().mockResolvedValue({ status: "mounted" });
|
||||
const checkHealth = vi.fn().mockResolvedValue({ status: "error", error: "stale mount" });
|
||||
|
||||
vi.spyOn(backendModule, "createVolumeBackend").mockImplementation(() => ({
|
||||
mount,
|
||||
unmount: vi.fn().mockResolvedValue({ status: "unmounted" }),
|
||||
checkHealth,
|
||||
}));
|
||||
|
||||
await withContext({ organizationId, userId: user.id }, async () => {
|
||||
const result = await volumeService.ensureHealthyVolume(volume.shortId);
|
||||
|
||||
expect(result).toEqual({
|
||||
ready: false,
|
||||
volume: expect.objectContaining({ id: volume.id, status: "error", lastError: "stale mount" }),
|
||||
reason: "stale mount",
|
||||
});
|
||||
expect(checkHealth).toHaveBeenCalledOnce();
|
||||
expect(mount).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -14,12 +14,25 @@ import type { UpdateVolumeBody } from "./volume.dto";
|
||||
import { getVolumePath } from "./helpers";
|
||||
import { logger } from "@zerobyte/core/node";
|
||||
import { serverEvents } from "../../core/events";
|
||||
import type { Volume } from "../../db/schema";
|
||||
import { volumeConfigSchema, type BackendConfig } from "~/schemas/volumes";
|
||||
import { getOrganizationId } from "~/server/core/request-context";
|
||||
import { isNodeJSErrnoException } from "~/server/utils/fs";
|
||||
import { asShortId, type ShortId } from "~/server/utils/branded";
|
||||
import { encryptVolumeConfig } from "./volume-config-secrets";
|
||||
|
||||
type EnsureHealthyVolumeResult =
|
||||
| {
|
||||
ready: true;
|
||||
volume: Volume;
|
||||
remounted: boolean;
|
||||
}
|
||||
| {
|
||||
ready: false;
|
||||
volume: Volume;
|
||||
reason: string;
|
||||
};
|
||||
|
||||
const listVolumes = async () => {
|
||||
const organizationId = getOrganizationId();
|
||||
const volumes = await db.query.volumesTable.findMany({
|
||||
@@ -274,6 +287,59 @@ const checkHealth = async (shortId: ShortId) => {
|
||||
return { status, error };
|
||||
};
|
||||
|
||||
const ensureHealthyVolume = async (shortId: ShortId): Promise<EnsureHealthyVolumeResult> => {
|
||||
const volume = await findVolume(shortId);
|
||||
|
||||
if (!volume) {
|
||||
throw new NotFoundError("Volume not found");
|
||||
}
|
||||
|
||||
if (volume.status === "unmounted") {
|
||||
return { ready: false, volume, reason: volume.lastError ?? "Volume is not mounted" };
|
||||
}
|
||||
|
||||
let failureReason = volume.lastError ?? "Volume health check failed";
|
||||
let failedVolume = volume;
|
||||
|
||||
if (volume.status !== "error") {
|
||||
const health = await checkHealth(shortId);
|
||||
|
||||
if (health.status === "mounted") {
|
||||
return {
|
||||
ready: true,
|
||||
volume: { ...volume, status: "mounted", lastError: null },
|
||||
remounted: false,
|
||||
};
|
||||
}
|
||||
|
||||
failureReason = health.error ?? failureReason;
|
||||
failedVolume = { ...volume, status: "error", lastError: health.error ?? null };
|
||||
}
|
||||
|
||||
if (!volume.autoRemount) {
|
||||
return { ready: false, volume: failedVolume, reason: failureReason };
|
||||
}
|
||||
|
||||
logger.warn(
|
||||
`${volume.name} is not healthy. Auto-remount is enabled, attempting to remount. Reason: ${failureReason}`,
|
||||
);
|
||||
const remount = await mountVolume(shortId);
|
||||
|
||||
if (remount.status !== "mounted") {
|
||||
return {
|
||||
ready: false,
|
||||
volume: { ...volume, status: remount.status, lastError: remount.error ?? null },
|
||||
reason: remount.error ?? failureReason,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
ready: true,
|
||||
volume: { ...volume, status: "mounted", lastError: null },
|
||||
remounted: true,
|
||||
};
|
||||
};
|
||||
|
||||
const DEFAULT_PAGE_SIZE = 500;
|
||||
const MAX_PAGE_SIZE = 500;
|
||||
|
||||
@@ -407,6 +473,7 @@ export const volumeService = {
|
||||
testConnection,
|
||||
unmountVolume,
|
||||
checkHealth,
|
||||
ensureHealthyVolume,
|
||||
listFiles,
|
||||
browseFilesystem,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user