snapshot: reworked error handling and added fail-fast option (#840)

Fixes #690 This is a breaking change for folks who are expecting snapshots to fail quickly without writing a snapshot manifest in case of an error. Before this change, any source read failure would cause the entire snapshot to fail (and not write a snapshot manifest as a result), unless `ignoreFileErrors` or `ignoreDirectoryErrors` was set. The new behavior is to continue snapshotting remaining files and directories (this can be disabled by passing `--fail-fast` flag or setting `KOPIA_SNAPSHOT_FAIL_FAST=1` environment variable) and defer returning an error until the very end. After snapshotting we will always attempt to write the snapshot manifest (except when the root of the snapshot itself cannot be opened). In case of a fail-fast error, the manifest will be marked as 'partial' and the directory tree will contain only partial set of files. In case of any errors, the manifest (and each directory object) will list the number if failures and no more than 10 examples of failed files/directories along with their respective errors. Once the snapshot is complete we will return non-zero exit code to the operating system if there were any fatal errors during snapshotting. With this change we are repurposing `ignoreFileErrors` and `ignoreDirectoryErrors` to designate some errors as non-fatal. Non-fatal errors are reported as warnings in the logs and will not cause a non-zero exit code to be returned.
2026-05-18 11:44:36 -04:00 · 2021-02-17 10:29:01 -08:00
parent fe9ebfb671
commit 23273af1cd
15 changed files with 442 additions and 196 deletions
--- a/cli/app.go
+++ b/cli/app.go
@@ -148,9 +148,9 @@ type repositoryAccessMode struct {

 func maybeRepositoryAction(act func(ctx context.Context, rep repo.Repository) error, mode repositoryAccessMode) func(ctx *kingpin.ParseContext) error {
 	return func(kpc *kingpin.ParseContext) error {
-		return withProfiling(func() error {
-			ctx := rootContext()
+		ctx := rootContext()

+		if err := withProfiling(func() error {
 			startMemoryTracking(ctx)
 			defer finishMemoryTracking(ctx)

@@ -184,7 +184,13 @@ func maybeRepositoryAction(act func(ctx context.Context, rep repo.Repository) er
 			}

 			return err
-		})
+		}); err != nil {
+			// print error in red
+			log(ctx).Errorf("ERROR: %v", err.Error())
+			os.Exit(1)
+		}
+
+		return nil
 	}
 }

--- a/cli/cli_progress.go
+++ b/cli/cli_progress.go
@@ -38,7 +38,8 @@ type cliProgress struct {
 	inProgressHashing int32
 	hashedFiles       int32
 	uploadedFiles     int32
-	errorCount        int32
+	ignoredErrorCount int32
+	fatalErrorCount   int32

 	uploading      int32
 	uploadFinished int32
@@ -78,9 +79,14 @@ func (p *cliProgress) HashedBytes(numBytes int64) {
 	p.maybeOutput()
 }

-func (p *cliProgress) IgnoredError(path string, err error) {
-	atomic.AddInt32(&p.errorCount, 1)
-	p.output(warningColor, fmt.Sprintf("Ignored error when processing \"%v\": %v\n", path, err))
+func (p *cliProgress) Error(path string, err error, isIgnored bool) {
+	if isIgnored {
+		atomic.AddInt32(&p.ignoredErrorCount, 1)
+		p.output(warningColor, fmt.Sprintf("Ignored error when processing \"%v\": %v\n", path, err))
+	} else {
+		atomic.AddInt32(&p.fatalErrorCount, 1)
+		p.output(warningColor, fmt.Sprintf("Error when processing \"%v\": %v\n", path, err))
+	}
 }

 func (p *cliProgress) CachedFile(fname string, numBytes int64) {
@@ -118,11 +124,11 @@ func (p *cliProgress) output(col *color.Color, msg string) {
 	cachedFiles := atomic.LoadInt32(&p.cachedFiles)
 	inProgressHashing := atomic.LoadInt32(&p.inProgressHashing)
 	hashedFiles := atomic.LoadInt32(&p.hashedFiles)
-	uploadedFiles := atomic.LoadInt32(&p.uploadedFiles)
-	errorCount := atomic.LoadInt32(&p.errorCount)
+	ignoredErrorCount := atomic.LoadInt32(&p.ignoredErrorCount)
+	fatalErrorCount := atomic.LoadInt32(&p.fatalErrorCount)

 	line := fmt.Sprintf(
-		" %v %v hashing, %v hashed (%v), %v cached (%v), %v uploaded (%v), %v errors",
+		" %v %v hashing, %v hashed (%v), %v cached (%v), uploaded %v",
 		p.spinnerCharacter(),

 		inProgressHashing,
@@ -133,12 +139,17 @@ func (p *cliProgress) output(col *color.Color, msg string) {
 		cachedFiles,
 		units.BytesStringBase10(cachedBytes),

-		uploadedFiles,
 		units.BytesStringBase10(uploadedBytes),
-
-		errorCount,
 	)

+	if fatalErrorCount > 0 {
+		line += fmt.Sprintf(" (%v fatal errors)", fatalErrorCount)
+	}
+
+	if ignoredErrorCount > 0 {
+		line += fmt.Sprintf(" (%v errors ignored)", ignoredErrorCount)
+	}
+
 	if msg != "" {
 		prefix := "\n ! "
 		if !*enableProgress {
--- a/cli/command_ls.go
+++ b/cli/command_ls.go
@@ -58,8 +58,8 @@ func listDirectory(ctx context.Context, d fs.Directory, prefix, indent string) e
 	}

 	if dws, ok := d.(fs.DirectoryWithSummary); ok && *lsCommandErrorSummary {
-		if ds, _ := dws.Summary(ctx); ds != nil && ds.NumFailed > 0 {
-			errorColor.Fprintf(os.Stderr, "\nNOTE: Encountered %v errors while snapshotting this directory:\n\n", ds.NumFailed) //nolint:errcheck
+		if ds, _ := dws.Summary(ctx); ds != nil && ds.FatalErrorCount > 0 {
+			errorColor.Fprintf(os.Stderr, "\nNOTE: Encountered %v errors while snapshotting this directory:\n\n", ds.FatalErrorCount) //nolint:errcheck

 			for _, e := range ds.FailedEntries {
 				errorColor.Fprintf(os.Stderr, "- Error in \"%v\": %v\n", e.EntryPath, e.Error) //nolint:errcheck
@@ -81,8 +81,8 @@ func printDirectoryEntry(ctx context.Context, e fs.Entry, prefix, indent string)
 	)

 	if dws, ok := e.(fs.DirectoryWithSummary); ok && *lsCommandErrorSummary {
-		if ds, _ := dws.Summary(ctx); ds != nil && ds.NumFailed > 0 {
-			errorSummary = fmt.Sprintf(" (%v errors)", ds.NumFailed)
+		if ds, _ := dws.Summary(ctx); ds != nil && ds.FatalErrorCount > 0 {
+			errorSummary = fmt.Sprintf(" (%v errors)", ds.FatalErrorCount)
 			col = errorColor
 		}
 	}
--- a/cli/command_snapshot_create.go
+++ b/cli/command_snapshot_create.go
@@ -8,7 +8,6 @@

 	"github.com/pkg/errors"

-	"github.com/kopia/kopia/internal/clock"
 	"github.com/kopia/kopia/repo"
 	"github.com/kopia/kopia/snapshot"
 	"github.com/kopia/kopia/snapshot/policy"
@@ -28,6 +27,7 @@
 	snapshotCreateCheckpointUploadLimitMB = snapshotCreateCommand.Flag("upload-limit-mb", "Stop the backup process after the specified amount of data (in MB) has been uploaded.").PlaceHolder("MB").Default("0").Int64()
 	snapshotCreateCheckpointInterval      = snapshotCreateCommand.Flag("checkpoint-interval", "Frequency for creating periodic checkpoint.").Duration()
 	snapshotCreateDescription             = snapshotCreateCommand.Flag("description", "Free-form snapshot description.").String()
+	snapshotCreateFailFast                = snapshotCreateCommand.Flag("fail-fast", "Fail fast when creating snapshot.").Envar("KOPIA_SNAPSHOT_FAIL_FAST").Bool()
 	snapshotCreateForceHash               = snapshotCreateCommand.Flag("force-hash", "Force hashing of source files for a given percentage of files [0..100]").Default("0").Int()
 	snapshotCreateParallelUploads         = snapshotCreateCommand.Flag("parallel", "Upload N files in parallel").PlaceHolder("N").Default("0").Int()
 	snapshotCreateStartTime               = snapshotCreateCommand.Flag("start-time", "Override snapshot start timestamp.").String()
@@ -94,6 +94,10 @@ func runSnapshotCommand(ctx context.Context, rep repo.RepositoryWriter) error {
 		return nil
 	}

+	if len(finalErrors) == 1 {
+		return errors.New(finalErrors[0])
+	}
+
 	return errors.Errorf("encountered %v errors:\n%v", len(finalErrors), strings.Join(finalErrors, "\n"))
 }

@@ -131,10 +135,12 @@ func setupUploader(rep repo.RepositoryWriter) *snapshotfs.Uploader {
 		u.CheckpointInterval = interval
 	}

-	u.ForceHashPercentage = *snapshotCreateForceHash
-	u.ParallelUploads = *snapshotCreateParallelUploads
 	onCtrlC(u.Cancel)

+	u.ForceHashPercentage = *snapshotCreateForceHash
+	u.ParallelUploads = *snapshotCreateParallelUploads
+
+	u.FailFast = *snapshotCreateFailFast
 	u.Progress = progress

 	return u
@@ -157,8 +163,6 @@ func startTimeAfterEndTime(startTime, endTime time.Time) bool {
 func snapshotSingleSource(ctx context.Context, rep repo.RepositoryWriter, u *snapshotfs.Uploader, sourceInfo snapshot.SourceInfo) error {
 	log(ctx).Infof("Snapshotting %v ...", sourceInfo)

-	t0 := clock.Now()
-
 	localEntry, err := getLocalFSEntry(ctx, sourceInfo.Path)
 	if err != nil {
 		return errors.Wrap(err, "unable to get local filesystem entry")
@@ -178,6 +182,8 @@ func snapshotSingleSource(ctx context.Context, rep repo.RepositoryWriter, u *sna

 	manifest, err := u.Upload(ctx, localEntry, policyTree, sourceInfo, previous...)
 	if err != nil {
+		// fail-fast uploads will fail here without recording a manifest, other uploads will
+		// possibly fail later.
 		return errors.Wrap(err, "upload error")
 	}

@@ -204,8 +210,7 @@ func snapshotSingleSource(ctx context.Context, rep repo.RepositoryWriter, u *sna
 		manifest.EndTime = endTimeOverride
 	}

-	snapID, err := snapshot.SaveSnapshot(ctx, rep, manifest)
-	if err != nil {
+	if _, err = snapshot.SaveSnapshot(ctx, rep, manifest); err != nil {
 		return errors.Wrap(err, "cannot save manifest")
 	}

@@ -219,20 +224,32 @@ func snapshotSingleSource(ctx context.Context, rep repo.RepositoryWriter, u *sna

 	progress.Finish()

+	return reportSnapshotStatus(ctx, manifest)
+}
+
+func reportSnapshotStatus(ctx context.Context, manifest *snapshot.Manifest) error {
 	var maybePartial string
 	if manifest.IncompleteReason != "" {
 		maybePartial = " partial"
 	}

+	sourceInfo := manifest.Source
+
+	snapID := manifest.ID
+
+	log(ctx).Infof("Created%v snapshot with root %v and ID %v in %v", maybePartial, manifest.RootObjectID(), snapID, manifest.EndTime.Sub(manifest.StartTime).Truncate(time.Second))
+
 	if ds := manifest.RootEntry.DirSummary; ds != nil {
-		if ds.NumFailed > 0 {
-			log(ctx).Warningf("Ignored %v errors while snapshotting %v.", ds.NumFailed, sourceInfo)
+		if ds.IgnoredErrorCount > 0 {
+			log(ctx).Warningf("Ignored %v error(s) while snapshotting %v.", ds.IgnoredErrorCount, sourceInfo)
+		}
+
+		if ds.FatalErrorCount > 0 {
+			return errors.Errorf("Found %v fatal error(s) while snapshotting %v.", ds.FatalErrorCount, sourceInfo)
 		}
 	}

-	log(ctx).Infof("Created%v snapshot with root %v and ID %v in %v", maybePartial, manifest.RootObjectID(), snapID, clock.Since(t0).Truncate(time.Second))
-
-	return errors.Wrap(err, "error snapshotting")
+	return nil
 }

 // findPreviousSnapshotManifest returns the list of previous snapshots for a given source, including
--- a/cli/command_snapshot_list.go
+++ b/cli/command_snapshot_list.go
@@ -257,8 +257,8 @@ func entryBits(ctx context.Context, m *snapshot.Manifest, ent fs.Entry, lastTota
 			bits = append(bits,
 				fmt.Sprintf("files:%v", s.TotalFileCount),
 				fmt.Sprintf("dirs:%v", s.TotalDirCount))
-			if s.NumFailed > 0 {
-				bits = append(bits, fmt.Sprintf("errors:%v", s.NumFailed))
+			if s.FatalErrorCount > 0 {
+				bits = append(bits, fmt.Sprintf("errors:%v", s.FatalErrorCount))
 				col = errorColor
 			}
 		}
--- a/cli/command_snapshot_migrate.go
+++ b/cli/command_snapshot_migrate.go
@@ -22,7 +22,6 @@
 	migratePolicies          = migrateCommand.Flag("policies", "Migrate policies too").Default("true").Bool()
 	migrateOverwritePolicies = migrateCommand.Flag("overwrite-policies", "Overwrite policies").Bool()
 	migrateLatestOnly        = migrateCommand.Flag("latest-only", "Only migrate the latest snapshot").Bool()
-	migrateIgnoreErrors      = migrateCommand.Flag("ignore-errors", "Ignore errors when reading source snapshot").Bool()
 	migrateParallel          = migrateCommand.Flag("parallel", "Number of sources to migrate in parallel").Default("1").Int()
 )

@@ -83,7 +82,6 @@ func runMigrateCommand(ctx context.Context, destRepo repo.RepositoryWriter) erro

 		uploader := snapshotfs.NewUploader(destRepo)
 		uploader.Progress = progress
-		uploader.IgnoreReadErrors = *migrateIgnoreErrors
 		activeUploaders[s] = uploader
 		mu.Unlock()

--- a/fs/entry.go
+++ b/fs/entry.go
@@ -98,7 +98,8 @@ type DirectorySummary struct {
 	IncompleteReason  string    `json:"incomplete,omitempty"`

 	// number of failed files
-	NumFailed int `json:"numFailed"`
+	FatalErrorCount   int `json:"numFailed"`
+	IgnoredErrorCount int `json:"numIgnoredErrors,omitempty"`

 	// first 10 failed entries
 	FailedEntries []*EntryWithError `json:"errors,omitempty"`
--- a/internal/server/source_manager.go
+++ b/internal/server/source_manager.go
@@ -435,9 +435,9 @@ func (t *uitaskProgress) HashedBytes(numBytes int64) {
 	t.maybeReport()
 }

-// IgnoredError is emitted when an error is encountered and ignored.
-func (t *uitaskProgress) IgnoredError(path string, err error) {
-	t.p.IgnoredError(path, err)
+// Error is emitted when an error is encountered.
+func (t *uitaskProgress) Error(path string, err error, isIgnored bool) {
+	t.p.Error(path, err, isIgnored)
 	t.maybeReport()
 }

--- a/snapshot/snapshotfs/upload.go
+++ b/snapshot/snapshotfs/upload.go
@@ -53,9 +53,6 @@ type Uploader struct {
 	// automatically cancel the Upload after certain number of bytes
 	MaxUploadBytes int64

-	// ignore read errors
-	IgnoreReadErrors bool
-
 	// probability with cached entries will be ignored, must be [0..100]
 	// 0=always use cached object entries if possible
 	// 100=never use cached entries
@@ -67,6 +64,9 @@ type Uploader struct {
 	// Enable snapshot actions
 	EnableActions bool

+	// Fail the entire snapshot on source file/directory error.
+	FailFast bool
+
 	// How frequently to create checkpoint snapshot entries.
 	CheckpointInterval time.Duration

@@ -520,7 +520,8 @@ func (b *dirManifestBuilder) addEntry(de *snapshot.DirEntry) {
 			b.summary.TotalFileCount += childSummary.TotalFileCount
 			b.summary.TotalFileSize += childSummary.TotalFileSize
 			b.summary.TotalDirCount += childSummary.TotalDirCount
-			b.summary.NumFailed += childSummary.NumFailed
+			b.summary.FatalErrorCount += childSummary.FatalErrorCount
+			b.summary.IgnoredErrorCount += childSummary.IgnoredErrorCount
 			b.summary.FailedEntries = append(b.summary.FailedEntries, childSummary.FailedEntries...)

 			if childSummary.MaxModTime.After(b.summary.MaxModTime) {
@@ -530,11 +531,16 @@ func (b *dirManifestBuilder) addEntry(de *snapshot.DirEntry) {
 	}
 }

-func (b *dirManifestBuilder) addFailedEntry(relPath string, err error) {
+func (b *dirManifestBuilder) addFailedEntry(relPath string, isIgnoredError bool, err error) {
 	b.mu.Lock()
 	defer b.mu.Unlock()

-	b.summary.NumFailed++
+	if isIgnoredError {
+		b.summary.IgnoredErrorCount++
+	} else {
+		b.summary.FatalErrorCount++
+	}
+
 	b.summary.FailedEntries = append(b.summary.FailedEntries, &fs.EntryWithError{
 		EntryPath: relPath,
 		Error:     err.Error(),
@@ -554,16 +560,7 @@ func (b *dirManifestBuilder) Build(dirModTime time.Time, incompleteReason string

 	s.IncompleteReason = incompleteReason

-	// take top N sorted failed entries
-	if len(b.summary.FailedEntries) > 0 {
-		sort.Slice(b.summary.FailedEntries, func(i, j int) bool {
-			return b.summary.FailedEntries[i].EntryPath < b.summary.FailedEntries[j].EntryPath
-		})
-
-		if len(b.summary.FailedEntries) > fs.MaxFailedEntriesPerDirectorySummary {
-			b.summary.FailedEntries = b.summary.FailedEntries[0:fs.MaxFailedEntriesPerDirectorySummary]
-		}
-	}
+	b.summary.FailedEntries = sortedTopFailures(b.summary.FailedEntries)

 	// sort the result, directories first, then non-directories, ordered by name
 	sort.Slice(b.entries, func(i, j int) bool {
@@ -582,6 +579,18 @@ func (b *dirManifestBuilder) Build(dirModTime time.Time, incompleteReason string
 	}
 }

+func sortedTopFailures(entries []*fs.EntryWithError) []*fs.EntryWithError {
+	sort.Slice(entries, func(i, j int) bool {
+		return entries[i].EntryPath < entries[j].EntryPath
+	})
+
+	if len(entries) > fs.MaxFailedEntriesPerDirectorySummary {
+		entries = entries[0:fs.MaxFailedEntriesPerDirectorySummary]
+	}
+
+	return entries
+}
+
 func isDir(e *snapshot.DirEntry) bool {
 	return e.Type == snapshot.EntryTypeDirectory
 }
@@ -642,7 +651,9 @@ func (u *Uploader) processSubdirectories(
 			childLocalDirPathOrEmpty = filepath.Join(localDirPathOrEmpty, entry.Name())
 		}

-		de, err := uploadDirInternal(ctx, u, dir, policyTree.Child(entry.Name()), previousDirs, childLocalDirPathOrEmpty, entryRelativePath, childDirBuilder, parentDirCheckpointRegistry)
+		childTree := policyTree.Child(entry.Name())
+
+		de, err := uploadDirInternal(ctx, u, dir, childTree, previousDirs, childLocalDirPathOrEmpty, entryRelativePath, childDirBuilder, parentDirCheckpointRegistry)
 		if errors.Is(err, errCanceled) {
 			return err
 		}
@@ -651,22 +662,20 @@ func (u *Uploader) processSubdirectories(
 			// Note: This only catches errors in subdirectories of the snapshot root, not on the snapshot
 			// root itself. The intention is to always fail if the top level directory can't be read,
 			// otherwise a meaningless, empty snapshot is created that can't be restored.
-			ignoreDirErr := u.shouldIgnoreDirectoryReadErrors(policyTree)

 			var dre dirReadError
-			if errors.As(err, &dre) && ignoreDirErr {
-				rc := rootCauseError(dre.error)
-
-				u.Progress.IgnoredError(entryRelativePath, rc)
-				parentDirBuilder.addFailedEntry(entryRelativePath, rc)
-				return nil
+			if errors.As(err, &dre) {
+				u.reportErrorAndMaybeCancel(dre.error,
+					childTree.EffectivePolicy().ErrorHandlingPolicy.IgnoreDirectoryErrorsOrDefault(false),
+					parentDirBuilder,
+					entryRelativePath)
+			} else {
+				return errors.Wrapf(err, "unable to process directory %q", entry.Name())
 			}
-
-			return errors.Errorf("unable to process directory %q: %s", entry.Name(), err)
+		} else {
+			parentDirBuilder.addEntry(de)
 		}

-		parentDirBuilder.addEntry(de)
-
 		return nil
 	})
 }
@@ -772,20 +781,27 @@ func (u *Uploader) processNonDirectories(ctx context.Context, parentCheckpointRe
 		case fs.Symlink:
 			de, err := u.uploadSymlinkInternal(ctx, entryRelativePath, entry)
 			if err != nil {
-				return u.maybeIgnoreFileReadError(err, parentDirBuilder, entryRelativePath, policyTree)
+				u.reportErrorAndMaybeCancel(
+					err,
+					policyTree.EffectivePolicy().ErrorHandlingPolicy.IgnoreFileErrorsOrDefault(false),
+					parentDirBuilder, entryRelativePath)
+			} else {
+				parentDirBuilder.addEntry(de)
 			}

-			parentDirBuilder.addEntry(de)
 			return nil

 		case fs.File:
 			atomic.AddInt32(&u.stats.NonCachedFiles, 1)
 			de, err := u.uploadFileInternal(ctx, parentCheckpointRegistry, entryRelativePath, entry, policyTree.Child(entry.Name()).EffectivePolicy(), asyncWritesPerFile)
 			if err != nil {
-				return u.maybeIgnoreFileReadError(err, parentDirBuilder, entryRelativePath, policyTree)
+				u.reportErrorAndMaybeCancel(
+					err, policyTree.EffectivePolicy().ErrorHandlingPolicy.IgnoreFileErrorsOrDefault(false),
+					parentDirBuilder, entryRelativePath)
+			} else {
+				parentDirBuilder.addEntry(de)
 			}

-			parentDirBuilder.addEntry(de)
 			return nil

 		default:
@@ -942,28 +958,20 @@ func (u *Uploader) writeDirManifest(ctx context.Context, dirRelativePath string,
 	return oid, nil
 }

-func (u *Uploader) maybeIgnoreFileReadError(err error, dmb *dirManifestBuilder, entryRelativePath string, policyTree *policy.Tree) error {
-	errHandlingPolicy := policyTree.EffectivePolicy().ErrorHandlingPolicy
-
-	if u.IgnoreReadErrors || errHandlingPolicy.IgnoreFileErrorsOrDefault(false) {
-		err = rootCauseError(err)
-		u.Progress.IgnoredError(entryRelativePath, err)
-		dmb.addFailedEntry(entryRelativePath, err)
-
-		return nil
+func (u *Uploader) reportErrorAndMaybeCancel(err error, isIgnored bool, dmb *dirManifestBuilder, entryRelativePath string) {
+	if isIgnored {
+		atomic.AddInt32(&u.stats.IgnoredErrorCount, 1)
+	} else {
+		atomic.AddInt32(&u.stats.ErrorCount, 1)
 	}

-	return err
-}
+	rc := rootCauseError(err)
+	u.Progress.Error(entryRelativePath, rc, isIgnored)
+	dmb.addFailedEntry(entryRelativePath, isIgnored, rc)

-func (u *Uploader) shouldIgnoreDirectoryReadErrors(policyTree *policy.Tree) bool {
-	errHandlingPolicy := policyTree.EffectivePolicy().ErrorHandlingPolicy
-
-	if u.IgnoreReadErrors {
-		return true
+	if u.FailFast && !isIgnored {
+		u.Cancel()
 	}
-
-	return errHandlingPolicy.IgnoreDirectoryErrorsOrDefault(false)
 }

 // NewUploader creates new Uploader object for a given repository.
@@ -971,7 +979,6 @@ func NewUploader(r repo.RepositoryWriter) *Uploader {
 	return &Uploader{
 		repo:               r,
 		Progress:           &NullUploadProgress{},
-		IgnoreReadErrors:   false,
 		ParallelUploads:    1,
 		EnableActions:      r.ClientOptions().EnableActions,
 		CheckpointInterval: DefaultCheckpointInterval,
--- a/snapshot/snapshotfs/upload_progress.go
+++ b/snapshot/snapshotfs/upload_progress.go
@@ -33,8 +33,8 @@ type UploadProgress interface {
 	// HashedBytes is emitted while hashing any blocks of bytes.
 	HashedBytes(numBytes int64)

-	// IgnoredError is emitted when an error is encountered and ignored
-	IgnoredError(path string, err error)
+	// Error is emitted when an error is encountered.
+	Error(path string, err error, isIgnored bool)

 	// UploadedBytes is emitted whenever bytes are written to the blob storage.
 	UploadedBytes(numBytes int64)
@@ -88,8 +88,8 @@ func (p *NullUploadProgress) StartedDirectory(dirname string) {}
 // FinishedDirectory implements UploadProgress.
 func (p *NullUploadProgress) FinishedDirectory(dirname string) {}

-// IgnoredError implements UploadProgress.
-func (p *NullUploadProgress) IgnoredError(path string, err error) {}
+// Error implements UploadProgress.
+func (p *NullUploadProgress) Error(path string, err error, isIgnored bool) {}

 var _ UploadProgress = (*NullUploadProgress)(nil)

@@ -107,8 +107,9 @@ type UploadCounters struct {
 	TotalExcludedFiles int32 `json:"excludedFiles"`
 	TotalExcludedDirs  int32 `json:"excludedDirs"`

-	TotalIgnoredErrors int32 `json:"ignoredErrors"`
-	EstimatedFiles     int32 `json:"estimatedFiles"`
+	FatalErrorCount   int32 `json:"errors"`
+	IgnoredErrorCount int32 `json:"ignoredErrors"`
+	EstimatedFiles    int32 `json:"estimatedFiles"`

 	CurrentDirectory string `json:"directory"`

@@ -168,12 +169,17 @@ func (p *CountingUploadProgress) ExcludedFile(fname string, numBytes int64) {
 	atomic.AddInt32(&p.counters.TotalExcludedFiles, 1)
 }

-// IgnoredError implements UploadProgress.
-func (p *CountingUploadProgress) IgnoredError(path string, err error) {
+// Error implements UploadProgress.
+func (p *CountingUploadProgress) Error(path string, err error, isIgnored bool) {
 	p.mu.Lock()
 	defer p.mu.Unlock()

-	p.counters.TotalIgnoredErrors++
+	if isIgnored {
+		p.counters.IgnoredErrorCount++
+	} else {
+		p.counters.FatalErrorCount++
+	}
+
 	p.counters.LastErrorPath = path
 	p.counters.LastError = err.Error()
 }
@@ -227,7 +233,7 @@ func (p *CountingUploadProgress) UITaskCounters(final bool) map[string]uitask.Co
 		"Excluded Files":       uitask.SimpleCounter(int64(atomic.LoadInt32(&p.counters.TotalExcludedFiles))),
 		"Excluded Directories": uitask.SimpleCounter(int64(atomic.LoadInt32(&p.counters.TotalExcludedDirs))),

-		"Errors": uitask.ErrorCounter(int64(atomic.LoadInt32(&p.counters.TotalIgnoredErrors))),
+		"Errors": uitask.ErrorCounter(int64(atomic.LoadInt32(&p.counters.IgnoredErrorCount))),
 	}

 	if !final {
--- a/snapshot/snapshotfs/upload_test.go
+++ b/snapshot/snapshotfs/upload_test.go
@@ -224,29 +224,43 @@ func TestUpload_TopLevelDirectoryReadFailure(t *testing.T) {
 	}
 }

-func TestUpload_SubDirectoryReadFailure(t *testing.T) {
+func TestUpload_SubDirectoryReadFailureFailFast(t *testing.T) {
 	ctx := testlogging.Context(t)
 	th := newUploadTestHarness(ctx, t)

 	defer th.cleanup()

 	th.sourceDir.Subdir("d1").FailReaddir(errTest)
+	th.sourceDir.Subdir("d2").Subdir("d1").FailReaddir(errTest)

 	u := NewUploader(th.repo)
+	u.ParallelUploads = 1
+	u.FailFast = true

 	policyTree := policy.BuildTree(nil, policy.DefaultPolicy)

-	_, err := u.Upload(ctx, th.sourceDir, policyTree, snapshot.SourceInfo{})
-	if err == nil {
-		t.Errorf("expected error")
+	man, err := u.Upload(ctx, th.sourceDir, policyTree, snapshot.SourceInfo{})
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
 	}
+
+	if man.IncompleteReason == "" {
+		t.Fatalf("snapshot not marked as incomplete")
+	}
+
+	// will have one error because we're canceling early.
+	verifyErrors(t, man, 1, 0,
+		[]*fs.EntryWithError{
+			{EntryPath: "d1", Error: errTest.Error()},
+		},
+	)
 }

 func objectIDsEqual(o1, o2 object.ID) bool {
 	return reflect.DeepEqual(o1, o2)
 }

-func TestUpload_SubDirectoryReadFailureIgnoreFailures(t *testing.T) {
+func TestUpload_SubDirectoryReadFailureIgnoredNoFailFast(t *testing.T) {
 	ctx := testlogging.Context(t)
 	th := newUploadTestHarness(ctx, t)

@@ -271,21 +285,107 @@ func TestUpload_SubDirectoryReadFailureIgnoreFailures(t *testing.T) {
 		t.Errorf("unexpected error: %v", err)
 	}

+	// 0 failed, 2 ignored
+	verifyErrors(t, man, 0, 2,
+		[]*fs.EntryWithError{
+			{EntryPath: "d1", Error: errTest.Error()},
+			{EntryPath: "d2/d1", Error: errTest.Error()},
+		},
+	)
+}
+
+func TestUpload_SubDirectoryReadFailureNoFailFast(t *testing.T) {
+	ctx := testlogging.Context(t)
+	th := newUploadTestHarness(ctx, t)
+
+	defer th.cleanup()
+
+	th.sourceDir.Subdir("d1").FailReaddir(errTest)
+	th.sourceDir.Subdir("d2").Subdir("d1").FailReaddir(errTest)
+
+	u := NewUploader(th.repo)
+
+	policyTree := policy.BuildTree(nil, policy.DefaultPolicy)
+
+	man, err := u.Upload(ctx, th.sourceDir, policyTree, snapshot.SourceInfo{})
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+
 	// make sure we have 2 errors
-	if got, want := man.RootEntry.DirSummary.NumFailed, 2; got != want {
+	if got, want := man.RootEntry.DirSummary.FatalErrorCount, 2; got != want {
 		t.Errorf("invalid number of failed entries: %v, want %v", got, want)
 	}

-	wantErrors := []*fs.EntryWithError{
-		{EntryPath: "d1", Error: errTest.Error()},
-		{EntryPath: "d2/d1", Error: errTest.Error()},
+	verifyErrors(t, man,
+		2, 0,
+		[]*fs.EntryWithError{
+			{EntryPath: "d1", Error: errTest.Error()},
+			{EntryPath: "d2/d1", Error: errTest.Error()},
+		},
+	)
+}
+
+func verifyErrors(t *testing.T, man *snapshot.Manifest, wantFatalErrors, wantIgnoredErrors int, wantErrors []*fs.EntryWithError) {
+	t.Helper()
+
+	if got, want := man.RootEntry.DirSummary.FatalErrorCount, wantFatalErrors; got != want {
+		t.Fatalf("invalid number of fatal errors: %v, want %v", got, want)
+	}
+
+	if got, want := man.RootEntry.DirSummary.IgnoredErrorCount, wantIgnoredErrors; got != want {
+		t.Fatalf("invalid number of ignored errors: %v, want %v", got, want)
 	}

 	if diff := pretty.Compare(man.RootEntry.DirSummary.FailedEntries, wantErrors); diff != "" {
-		t.Errorf("unexpected directory tree, diff(-got,+want): %v\n", diff)
+		t.Errorf("unexpected errors, diff(-got,+want): %v\n", diff)
 	}
 }

+func TestUpload_SubDirectoryReadFailureSomeIgnoredNoFailFast(t *testing.T) {
+	ctx := testlogging.Context(t)
+	th := newUploadTestHarness(ctx, t)
+
+	defer th.cleanup()
+
+	th.sourceDir.Subdir("d1").FailReaddir(errTest)
+	th.sourceDir.Subdir("d2").Subdir("d1").FailReaddir(errTest)
+	th.sourceDir.AddDir("d3", defaultPermissions)
+	th.sourceDir.Subdir("d3").FailReaddir(errTest)
+
+	u := NewUploader(th.repo)
+
+	trueValue := true
+
+	// set up a policy tree where errors from d3 are ignored.
+	policyTree := policy.BuildTree(map[string]*policy.Policy{
+		"./d3": {
+			ErrorHandlingPolicy: policy.ErrorHandlingPolicy{
+				IgnoreFileErrors:      &trueValue,
+				IgnoreDirectoryErrors: &trueValue,
+			},
+		},
+	}, policy.DefaultPolicy)
+
+	if got, want := policyTree.Child("d3").EffectivePolicy().ErrorHandlingPolicy.IgnoreDirectoryErrorsOrDefault(false), true; got != want {
+		t.Fatalf("policy not effective")
+	}
+
+	man, err := u.Upload(ctx, th.sourceDir, policyTree, snapshot.SourceInfo{})
+	if err != nil {
+		t.Errorf("unexpected error: %v", err)
+	}
+
+	verifyErrors(t, man,
+		2, 1,
+		[]*fs.EntryWithError{
+			{EntryPath: "d1", Error: errTest.Error()},
+			{EntryPath: "d2/d1", Error: errTest.Error()},
+			{EntryPath: "d3", Error: errTest.Error()},
+		},
+	)
+}
+
 func TestUploadWithCheckpointing(t *testing.T) {
 	ctx := testlogging.Context(t)
 	th := newUploadTestHarness(ctx, t)
--- a/snapshot/stats.go
+++ b/snapshot/stats.go
@@ -21,6 +21,9 @@ type Stats struct {

 	ExcludedFileCount int32 `json:"excludedFileCount"`
 	ExcludedDirCount  int32 `json:"excludedDirCount"`
+
+	IgnoredErrorCount int32 `json:"ignoredErrorCount"`
+	ErrorCount        int32 `json:"errorCount"`
 }

 // AddExcluded adds the information about excluded file to the statistics.
--- a/tests/end_to_end_test/restore_fail_test.go
+++ b/tests/end_to_end_test/restore_fail_test.go
@@ -49,7 +49,7 @@ func TestRestoreFail(t *testing.T) {
 	beforeBlobList := e.RunAndExpectSuccess(t, "blob", "list")

 	_, errOut := e.RunAndExpectSuccessWithErrOut(t, "snapshot", "create", sourceDir)
-	snapID := parseSnapID(t, errOut)
+	parsed := parseSnapshotResult(t, errOut)

 	afterBlobList := e.RunAndExpectSuccess(t, "blob", "list")

@@ -64,10 +64,10 @@ func TestRestoreFail(t *testing.T) {
 	e.RunAndExpectSuccess(t, "blob", "delete", blobIDToDelete)

 	// Expect a subsequent restore to fail
-	e.RunAndExpectFailure(t, "snapshot", "restore", snapID, targetDir)
+	e.RunAndExpectFailure(t, "snapshot", "restore", parsed.manifestID, targetDir)

 	// --ignore-errors allows the snapshot to succeed despite missing blob.
-	e.RunAndExpectSuccess(t, "snapshot", "restore", "--ignore-errors", snapID, targetDir)
+	e.RunAndExpectSuccess(t, "snapshot", "restore", "--ignore-errors", parsed.manifestID, targetDir)
 }

 func findPackBlob(blobIDs []string) string {
--- a/tests/end_to_end_test/snapshot_fail_test.go
+++ b/tests/end_to_end_test/snapshot_fail_test.go
@@ -7,6 +7,8 @@
 	"path/filepath"
 	"regexp"
 	"runtime"
+	"strconv"
+	"strings"
 	"testing"

 	"github.com/kopia/kopia/internal/testutil"
@@ -28,9 +30,43 @@ func TestSnapshotNonexistent(t *testing.T) {
 	e.RunAndExpectFailure(t, "snapshot", "create", filepath.Join(scratchDir, "notExist"))
 }

-func TestSnapshotFail(t *testing.T) {
+func TestSnapshotFail_Default(t *testing.T) {
 	t.Parallel()
+	testSnapshotFail(t, false, nil, nil)
+}

+func TestSnapshotFail_EnvOverride(t *testing.T) {
+	t.Parallel()
+	testSnapshotFail(t, true, nil, []string{"KOPIA_SNAPSHOT_FAIL_FAST=true"})
+}
+
+func TestSnapshotFail_NoFailFast(t *testing.T) {
+	t.Parallel()
+	testSnapshotFail(t, false, []string{"--no-fail-fast"}, nil)
+}
+
+func TestSnapshotFail_FailFast(t *testing.T) {
+	t.Parallel()
+	testSnapshotFail(t, true, []string{"--fail-fast"}, nil)
+}
+
+type expectedSnapshotResult struct {
+	success           bool
+	wantErrors        int
+	wantIgnoredErrors int
+	wantPartial       bool
+}
+
+func cond(c bool, a, b int) int {
+	if c {
+		return a
+	}
+
+	return b
+}
+
+// nolint:thelper
+func testSnapshotFail(t *testing.T, isFailFast bool, snapshotCreateFlags, snapshotCreateEnv []string) {
 	if runtime.GOOS == windowsOSName {
 		t.Skip("this test does not work on Windows")
 	}
@@ -51,131 +87,138 @@ func TestSnapshotFail(t *testing.T) {
 				ignoreDirErr = "inherit"
 			}

+			var (
+				expectedSuccess           = expectedSnapshotResult{success: true}
+				expectEarlyFailure        = expectedSnapshotResult{success: false}
+				expectedWhenIgnoringFiles = expectedSnapshotResult{success: ignoringFiles, wantErrors: cond(ignoringFiles, 0, 1), wantIgnoredErrors: cond(ignoringFiles, 1, 0), wantPartial: !ignoringFiles && isFailFast}
+				expectedWhenIgnoringDirs  = expectedSnapshotResult{success: ignoringDirs, wantErrors: cond(ignoringDirs, 0, 1), wantIgnoredErrors: cond(ignoringDirs, 1, 0), wantPartial: !ignoringDirs && isFailFast}
+			)
+
 			// Test the root dir permissions
 			for ti, tt := range []struct {
 				desc          string
 				modifyEntry   string
 				snapSource    string
-				expectSuccess map[os.FileMode]bool
+				expectSuccess map[os.FileMode]expectedSnapshotResult
 			}{
 				{
 					desc:        "Modify permissions of the parent dir of the snapshot source (source is a FILE)",
 					modifyEntry: dir0Path,
 					snapSource:  filepath.Join(dir0Path, "file1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: false, // --- permission: cannot read directory
-						0o100: true,  // --X permission: can enter directory and take snapshot of the file (with full permissions)
-						0o400: false, // R-- permission: can read the file name, but will be unable to snapshot it without entering directory
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectEarlyFailure, // --- permission: cannot read directory
+						0o100: expectedSuccess,    // --X permission: can enter directory and take snapshot of the file (with full permissions)
+						0o400: expectEarlyFailure, // R-- permission: can read the file name, but will be unable to snapshot it without entering directory
 					},
 				},
 				{
 					desc:        "Modify permissions of the parent dir of the snapshot source (source is a DIRECTORY)",
 					modifyEntry: dir0Path,
 					snapSource:  filepath.Join(dir0Path, "dir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: false,
-						0o100: true,
-						0o400: false,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectEarlyFailure,
+						0o100: expectedSuccess,
+						0o400: expectEarlyFailure,
 					},
 				},
 				{
 					desc:        "Modify permissions of the parent dir of the snapshot source (source is an EMPTY directory)",
 					modifyEntry: dir0Path,
 					snapSource:  filepath.Join(dir0Path, "emptyDir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: false,
-						0o100: true,
-						0o400: false,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectEarlyFailure,
+						0o100: expectedSuccess,
+						0o400: expectEarlyFailure,
 					},
 				},
 				{
 					desc:        "Modify permissions of the snapshot source itself (source is a FILE)",
 					modifyEntry: filepath.Join(dir0Path, "file1"),
 					snapSource:  filepath.Join(dir0Path, "file1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: false,
-						0o100: false,
-						0o400: true,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectEarlyFailure,
+						0o100: expectEarlyFailure,
+						0o400: expectedSuccess,
 					},
 				},
 				{
 					desc:        "Modify permissions of the snapshot source itself (source is a DIRECTORY)",
 					modifyEntry: filepath.Join(dir0Path, "dir1"),
 					snapSource:  filepath.Join(dir0Path, "dir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: false,
-						0o100: false,
-						0o400: false,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectEarlyFailure,
+						0o100: expectEarlyFailure,
+						0o400: expectEarlyFailure,
 					},
 				},
 				{
 					desc:        "Modify permissions of the snapshot source itself (source is an EMPTY directory)",
 					modifyEntry: filepath.Join(dir0Path, "emptyDir1"),
 					snapSource:  filepath.Join(dir0Path, "emptyDir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: false,
-						0o100: false,
-						0o400: true,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectEarlyFailure,
+						0o100: expectEarlyFailure,
+						0o400: expectedSuccess,
 					},
 				},
 				{
 					desc:        "Modify permissions of a FILE in the snapshot directory",
 					modifyEntry: filepath.Join(dir0Path, "dir1", "file2"),
 					snapSource:  filepath.Join(dir0Path, "dir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: ignoringFiles,
-						0o100: ignoringFiles,
-						0o400: true,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectedWhenIgnoringFiles,
+						0o100: expectedWhenIgnoringFiles,
+						0o400: expectedSuccess,
 					},
 				},
 				{
 					desc:        "Modify permissions of a DIRECTORY in the snapshot directory",
 					modifyEntry: filepath.Join(dir0Path, "dir1", "dir2"),
 					snapSource:  filepath.Join(dir0Path, "dir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: ignoringDirs,
-						0o100: ignoringDirs,
-						0o400: ignoringDirs,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectedWhenIgnoringDirs,
+						0o100: expectedWhenIgnoringDirs,
+						0o400: expectedWhenIgnoringDirs,
 					},
 				},
 				{
 					desc:        "Modify permissions of an EMPTY directory in the snapshot directory",
 					modifyEntry: filepath.Join(dir0Path, "dir1", "emptyDir2"),
 					snapSource:  filepath.Join(dir0Path, "dir1"),
-					expectSuccess: map[os.FileMode]bool{
-						0o000: ignoringDirs,
-						0o100: ignoringDirs,
-						0o400: true,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectedWhenIgnoringDirs,
+						0o100: expectedWhenIgnoringDirs,
+						0o400: expectedSuccess,
 					},
 				},
 				{
 					desc:        "Modify permissions of a FILE in a subdirectory of the snapshot root directory",
 					modifyEntry: filepath.Join(dir0Path, "dir1", "file2"),
 					snapSource:  dir0Path,
-					expectSuccess: map[os.FileMode]bool{
-						0o000: ignoringFiles,
-						0o100: ignoringFiles,
-						0o400: true,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectedWhenIgnoringFiles,
+						0o100: expectedWhenIgnoringFiles,
+						0o400: expectedSuccess,
 					},
 				},
 				{
 					desc:        "Modify permissions of a DIRECTORY in a subdirectory of the snapshot root directory",
 					modifyEntry: filepath.Join(dir0Path, "dir1", "dir2"),
 					snapSource:  dir0Path,
-					expectSuccess: map[os.FileMode]bool{
-						0o000: ignoringDirs,
-						0o100: ignoringDirs,
-						0o400: ignoringDirs,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectedWhenIgnoringDirs,
+						0o100: expectedWhenIgnoringDirs,
+						0o400: expectedWhenIgnoringDirs,
 					},
 				},
 				{
 					desc:        "Modify permissions of an EMPTY directory in a subdirectory of the snapshot root directory",
 					modifyEntry: filepath.Join(dir0Path, "dir1", "emptyDir2"),
 					snapSource:  dir0Path,
-					expectSuccess: map[os.FileMode]bool{
-						0o000: ignoringDirs,
-						0o100: ignoringDirs,
-						0o400: true,
+					expectSuccess: map[os.FileMode]expectedSnapshotResult{
+						0o000: expectedWhenIgnoringDirs,
+						0o100: expectedWhenIgnoringDirs,
+						0o400: expectedSuccess,
 					},
 				},
 			} {
@@ -184,7 +227,7 @@ func TestSnapshotFail(t *testing.T) {
 				tcIgnoreFileErr := ignoreFileErr
 				tcIdx := ti
 				tc := tt
-				tname := fmt.Sprintf("%s_ignoreFileErr_%s_ignoreDirErr_%s", tc.desc, ignoreDirErr, ignoreFileErr)
+				tname := fmt.Sprintf("%s_ignoreFileErr_%s_ignoreDirErr_%s_failFast_%v", tc.desc, ignoreDirErr, ignoreFileErr, isFailFast)

 				t.Run(tname, func(t *testing.T) {
 					t.Parallel()
@@ -208,7 +251,7 @@ func TestSnapshotFail(t *testing.T) {

 					e.RunAndExpectSuccess(t, "policy", "set", snapSource, "--ignore-dir-errors", tcIgnoreDirErr, "--ignore-file-errors", tcIgnoreFileErr)
 					restoreDir := fmt.Sprintf("%s%d_%v_%v", restoreDirPrefix, tcIdx, tcIgnoreDirErr, tcIgnoreFileErr)
-					testPermissions(t, e, snapSource, modifyEntry, restoreDir, tc.expectSuccess)
+					testPermissions(t, e, snapSource, modifyEntry, restoreDir, tc.expectSuccess, snapshotCreateFlags, snapshotCreateEnv)

 					e.RunAndExpectSuccess(t, "policy", "remove", snapSource)
 				})
@@ -246,58 +289,112 @@ func createSimplestFileTree(t *testing.T, dirDepth, currDepth int, currPath stri
 // files and directories (if present). It issues the kopia snapshot command
 // against "source" and will test permissions against all entries in "parentDir".
 // It returns the number of successful snapshot operations.
-func testPermissions(t *testing.T, e *testenv.CLITest, source, modifyEntry, restoreDir string, expectSuccess map[os.FileMode]bool) int {
-	t.Helper()
-
+// nolint:thelper
+func testPermissions(t *testing.T, e *testenv.CLITest, source, modifyEntry, restoreDir string, expect map[os.FileMode]expectedSnapshotResult, snapshotCreateFlags, snapshotCreateEnv []string) int {
 	var numSuccessfulSnapshots int

 	changeFile, err := os.Stat(modifyEntry)
 	testenv.AssertNoError(t, err)

 	// Iterate over all permission bit configurations
-	for chmod, shouldSucceed := range expectSuccess {
-		mode := changeFile.Mode()
-		t.Logf("Chmod: path: %s, isDir: %v, prevMode: %v, newMode: %v", modifyEntry, changeFile.IsDir(), mode, chmod)
+	for chmod, expected := range expect {
+		// run in nested function go be able to do defer
+		func() {
+			mode := changeFile.Mode()

-		err := os.Chmod(modifyEntry, chmod)
-		testenv.AssertNoError(t, err)
+			// restore permissions even if we fail to avoid leaving non-deletable files behind.
+			defer func() {
+				t.Logf("restoring file mode on %s to %v", modifyEntry, mode)
+				testenv.AssertNoError(t, os.Chmod(modifyEntry, mode.Perm()))
+			}()

-		if shouldSucceed {
-			// Currently by default, the uploader has IgnoreFileErrors set to true.
-			// Expect warning and successful snapshot creation
-			_, errOut := e.RunAndExpectSuccessWithErrOut(t, "snapshot", "create", source)
-			snapID := parseSnapID(t, errOut)
-			numSuccessfulSnapshots++
+			t.Logf("Chmod: path: %s, isDir: %v, prevMode: %v, newMode: %v", modifyEntry, changeFile.IsDir(), mode, chmod)

-			// Expect that since the snapshot succeeded, the data can be restored
-			e.RunAndExpectSuccess(t, "snapshot", "restore", snapID, restoreDir)
-		} else {
-			e.RunAndExpectFailure(t, "snapshot", "create", source)
-		}
+			err := os.Chmod(modifyEntry, chmod)
+			testenv.AssertNoError(t, err)

-		// Change permissions back and expect success
-		err = os.Chmod(modifyEntry, mode.Perm())
-		testenv.AssertNoError(t, err)
-		e.RunAndExpectSuccess(t, "snapshot", "create", source)
-		numSuccessfulSnapshots++
+			// set up environment for the child process.
+			oldEnv := e.Environment
+			e.Environment = append(append([]string{}, e.Environment...), snapshotCreateEnv...)
+
+			defer func() { e.Environment = oldEnv }()
+
+			snapshotCreateWithArgs := append([]string{"snapshot", "create", source}, snapshotCreateFlags...)
+
+			_, errOut, runErr := e.Run(t, expected.success, snapshotCreateWithArgs...)
+
+			if got, want := (runErr == nil), expected.success; got != want {
+				t.Fatalf("unexpected success %v, want %v", got, want)
+			}
+
+			parsed := parseSnapshotResult(t, errOut)
+
+			if expected.success {
+				numSuccessfulSnapshots++
+
+				e.RunAndExpectSuccess(t, "snapshot", "restore", parsed.manifestID, restoreDir)
+			}
+
+			if got, want := parsed.errorCount, expected.wantErrors; got != want {
+				t.Fatalf("unexpected number of errors: %v, want %v", got, want)
+			}
+
+			if got, want := parsed.ignoredErrorCount, expected.wantIgnoredErrors; got != want {
+				t.Fatalf("unexpected number of ignored errors: %v, want %v", got, want)
+			}
+
+			if got, want := parsed.partial, expected.wantPartial; got != want {
+				t.Fatalf("unexpected partial %v, want %v (%s)", got, want, errOut)
+			}
+		}()
 	}

 	return numSuccessfulSnapshots
 }

-func parseSnapID(t *testing.T, lines []string) string {
+var (
+	createdSnapshotPattern = regexp.MustCompile(`Created (.*)snapshot with root (\S+) and ID (\S+) in .*`)
+	fatalErrorsPattern     = regexp.MustCompile(`Found (\d+) fatal error\(s\) while snapshotting`)
+	ignoredErrorsPattern   = regexp.MustCompile(`Ignored (\d+) error\(s\) while snapshotting`)
+)
+
+type parsedSnapshotResult struct {
+	partial           bool
+	rootID            string
+	manifestID        string
+	errorCount        int
+	ignoredErrorCount int
+}
+
+func parseSnapshotResult(t *testing.T, lines []string) parsedSnapshotResult {
 	t.Helper()

-	pattern := regexp.MustCompile(`Created snapshot with root \S+ and ID (\S+) in .*`)
+	var (
+		err error
+		res parsedSnapshotResult
+	)

 	for _, l := range lines {
-		match := pattern.FindAllStringSubmatch(l, 1)
-		if len(match) > 0 && len(match[0]) > 1 {
-			return match[0][1]
+		if match := createdSnapshotPattern.FindStringSubmatch(l); match != nil {
+			res.partial = strings.TrimSpace(match[1]) == "partial"
+			res.rootID = match[2]
+			res.manifestID = match[3]
+		}
+
+		if match := fatalErrorsPattern.FindStringSubmatch(l); match != nil {
+			res.errorCount, err = strconv.Atoi(match[1])
+			if err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		if match := ignoredErrorsPattern.FindStringSubmatch(l); match != nil {
+			res.ignoredErrorCount, err = strconv.Atoi(match[1])
+			if err != nil {
+				t.Fatal(err)
+			}
 		}
 	}

-	t.Fatal("Snap ID could not be parsed")
-
-	return ""
+	return res
 }
--- a/tests/testenv/cli_test_env.go
+++ b/tests/testenv/cli_test_env.go
@@ -85,7 +85,7 @@ func NewCLITest(t *testing.T) *CLITest {
 	logsDir := filepath.Join(os.TempDir(), "kopia-logs", cleanName+"."+clock.Now().Local().Format("20060102150405"))

 	t.Cleanup(func() {
-		if t.Failed() {
+		if t.Failed() && os.Getenv("KOPIA_DISABLE_LOG_DUMP_ON_FAILURE") == "" {
 			dumpLogs(t, logsDir)
 		}