feat(cli): save observability metric before exiting the process (#3201)

Objective: to facilitate capturing metrics for relatively short-lived kopia
executions.

Motivation: Currently, kopia allows a. exposing metrics via a Prometheus
exporter; and b. pushing metrics to a Prometheus gateway. In certain
scenarios, such as short lived executions, it is not possible to reliably
scrape the metrics from the exporter endpoint. And, while the pusher
approach would work, it requires starting a separate push gateway.

This change allows saving the metrics to a local file before kopia exists
by specifying the metrics output directory. For example,

`kopia --metrics-directory /tmp/kopia-metrics/ snapshot create ....`

In this case, after kopia exists, the `/tmp/kopia-metrics/` directory will
contain a file with a named of the form:
`<date>-<time>-<command_subcommand>.prom`
This commit is contained in:
Julio Lopez
2023-08-09 17:40:45 -07:00
committed by GitHub
parent 36b84edf42
commit 8b8d4a574b
2 changed files with 61 additions and 0 deletions

View File

@@ -4,6 +4,8 @@
"context"
"net/http"
"net/http/pprof"
"os"
"path/filepath"
"sort"
"strings"
"sync"
@@ -21,9 +23,13 @@
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
"github.com/kopia/kopia/internal/clock"
"github.com/kopia/kopia/repo"
)
// DirMode is the directory mode for output directories.
const DirMode = 0o700
//nolint:gochecknoglobals
var metricsPushFormats = map[string]expfmt.Format{
"text": expfmt.FmtText,
@@ -44,6 +50,8 @@ type observabilityFlags struct {
metricsPushUsername string
metricsPushPassword string
metricsPushFormat string
metricsOutputDir string
metricsFileName string
enableJaeger bool
@@ -76,6 +84,36 @@ func (c *observabilityFlags) setup(svc appServices, app *kingpin.Application) {
sort.Strings(formats)
app.Flag("metrics-push-format", "Format to use for push gateway").Envar(svc.EnvName("KOPIA_METRICS_FORMAT")).Hidden().EnumVar(&c.metricsPushFormat, formats...)
app.Flag("metrics-directory", "Directory where the metrics should be saved when kopia exits. A file per process execution will be created in this directory").Hidden().StringVar(&c.metricsOutputDir)
app.PreAction(c.initialize)
}
func (c *observabilityFlags) initialize(ctx *kingpin.ParseContext) error {
if c.metricsOutputDir == "" {
return nil
}
c.metricsOutputDir = filepath.Clean(c.metricsOutputDir)
// ensure the metrics output dir can be created
if err := os.MkdirAll(c.metricsOutputDir, DirMode); err != nil {
return errors.Wrapf(err, "could not create metrics output directory: %s", c.metricsOutputDir)
}
// write metrics in a separate file per command and process execution
// to avoid conflicts with previously created profiles
c.metricsFileName = clock.Now().Format("20060102-150405")
if cmd := ctx.SelectedCommand; cmd != nil {
c.metricsFileName += "-" + strings.ReplaceAll(cmd.FullCommand(), " ", "-")
} else {
c.metricsFileName += "-unknown"
}
c.metricsFileName += ".prom"
return nil
}
func (c *observabilityFlags) startMetrics(ctx context.Context) error {
@@ -196,6 +234,12 @@ func (c *observabilityFlags) stopMetrics(ctx context.Context) {
log(ctx).Warnf("unable to shutdown trace provicer: %v", err)
}
}
if c.metricsOutputDir != "" {
if err := prometheus.WriteToTextfile(filepath.Join(c.metricsOutputDir, c.metricsFileName), prometheus.DefaultGatherer); err != nil {
log(ctx).Warnf("unable to write metrics file '%s': %v", c.metricsFileName, err)
}
}
}
func (c *observabilityFlags) pushPeriodically(ctx context.Context, p *push.Pusher) {

View File

@@ -4,6 +4,7 @@
"io"
"net/http"
"net/http/httptest"
"os"
"sync"
"testing"
@@ -77,3 +78,19 @@ func TestMetricsPushFlags(t *testing.T) {
"--metrics-push-grouping=a=s",
)
}
func TestMetricsSaveToOutputDirFlags(t *testing.T) {
env := testenv.NewCLITest(t, testenv.RepoFormatNotImportant, testenv.NewInProcRunner(t))
tmp1 := testutil.TempDirectory(t)
env.RunAndExpectSuccess(t, "repo", "create", "filesystem", "--path", tmp1)
tmp2 := testutil.TempDirectory(t)
env.RunAndExpectSuccess(t, "repo", "status", "--metrics-directory", tmp2)
entries, err := os.ReadDir(tmp2)
require.NoError(t, err)
require.Len(t, entries, 1, "a metrics output file should have been created")
}