Files
2026-04-01 06:01:18 +00:00

233 lines
6.3 KiB
Go

package main
import (
"context"
"flag"
"fmt"
"log/slog"
"net/http"
"os"
"os/signal"
"regexp"
"strings"
"sync"
"syscall"
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/client"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const (
// cachePeriod indicates the period of time the collector will reuse the results of docker inspect.
cachePeriod = 1 * time.Second
)
type ContainerClient interface {
ContainerList(ctx context.Context, options container.ListOptions) ([]types.Container, error)
ContainerInspect(ctx context.Context, containerID string) (types.ContainerJSON, error)
}
type dockerHealthCollector struct {
mu sync.Mutex
containerClient ContainerClient
containerInfoCache []types.ContainerJSON
lastseen time.Time
}
type descSource struct {
name string
help string
}
func (desc *descSource) Desc(labels prometheus.Labels) *prometheus.Desc {
return prometheus.NewDesc(desc.name, desc.help, nil, labels)
}
var (
namespace = "container_state_"
healthStatusDesc = descSource{
namespace + "health_status",
"Container health status."}
statusDesc = descSource{
namespace + "status",
"Container status."}
oomkilledDesc = descSource{
namespace + "oomkilled",
"Container was killed by OOMKiller."}
startedatDesc = descSource{
namespace + "startedat",
"Time when the Container started."}
finishedatDesc = descSource{
namespace + "finishedat",
"Time when the Container finished."}
restartcountDesc = descSource{
"container_restartcount",
"Number of times the container has been restarted"}
)
func (c *dockerHealthCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- healthStatusDesc.Desc(nil)
ch <- statusDesc.Desc(nil)
ch <- oomkilledDesc.Desc(nil)
ch <- startedatDesc.Desc(nil)
ch <- finishedatDesc.Desc(nil)
ch <- restartcountDesc.Desc(nil)
}
func (c *dockerHealthCollector) Collect(ch chan<- prometheus.Metric) {
c.mu.Lock()
defer c.mu.Unlock()
now := time.Now()
if now.Sub(c.lastseen) >= cachePeriod {
c.collectContainer()
c.lastseen = now
}
c.collectMetrics(ch)
}
func (c *dockerHealthCollector) collectMetrics(ch chan<- prometheus.Metric) {
for _, info := range c.containerInfoCache {
var labels = map[string]string{}
rep := regexp.MustCompile("[^a-zA-Z0-9_]")
for k, v := range info.Config.Labels {
label := strings.ToLower("container_label_" + k)
labels[rep.ReplaceAllLiteralString(label, "_")] = v
}
labels["id"] = "/docker/" + info.ID
labels["image"] = info.Config.Image
labels["name"] = strings.TrimPrefix(info.Name, "/")
b2f := func(b bool) float64 {
if b {
return 1
}
return 0
}
mapcopy := func(src map[string]string) prometheus.Labels {
dst := map[string]string{}
for k, v := range labels {
dst[k] = v
}
return dst
}
for _, lv := range []string{"none", "starting", "healthy", "unhealthy"} {
tmpLabels := mapcopy(labels)
tmpLabels["status"] = lv
ch <- prometheus.MustNewConstMetric(healthStatusDesc.Desc(tmpLabels), prometheus.GaugeValue, b2f(info.State.Health.Status == lv))
}
for _, lv := range []string{"paused", "restarting", "running", "removing", "dead", "created", "exited"} {
tmpLabels := mapcopy(labels)
tmpLabels["status"] = lv
ch <- prometheus.MustNewConstMetric(statusDesc.Desc(tmpLabels), prometheus.GaugeValue, b2f(info.State.Status == lv))
}
ch <- prometheus.MustNewConstMetric(oomkilledDesc.Desc(labels), prometheus.GaugeValue, b2f(info.State.OOMKilled))
startedat, err := time.Parse(time.RFC3339Nano, info.State.StartedAt)
errCheck(err)
finishedat, err := time.Parse(time.RFC3339Nano, info.State.FinishedAt)
errCheck(err)
ch <- prometheus.MustNewConstMetric(startedatDesc.Desc(labels), prometheus.GaugeValue, float64(startedat.Unix()))
ch <- prometheus.MustNewConstMetric(finishedatDesc.Desc(labels), prometheus.GaugeValue, float64(finishedat.Unix()))
ch <- prometheus.MustNewConstMetric(restartcountDesc.Desc(labels), prometheus.GaugeValue, float64(info.RestartCount))
}
}
func (c *dockerHealthCollector) collectContainer() {
containers, err := c.containerClient.ContainerList(context.Background(), container.ListOptions{All: true})
errCheck(err)
c.containerInfoCache = []types.ContainerJSON{}
for _, ct := range containers {
info, err := c.containerClient.ContainerInspect(context.Background(), ct.ID)
errCheck(err)
c.containerInfoCache = append(c.containerInfoCache, info)
if info.Config == nil {
info.Config = &container.Config{Labels: map[string]string{}}
}
if info.State.Health == nil {
info.State.Health = &types.Health{Status: "none"}
}
}
}
var logger *slog.Logger
func errCheck(err error) {
if err != nil {
logger.Error("error occurred", "err", err)
os.Exit(1)
}
}
// Define flags.
var (
address = flag.String("listen-address", ":8080", "The address to listen on for HTTP requests.")
)
func init() {
opts := &slog.HandlerOptions{
Level: slog.LevelInfo,
}
handler := slog.NewJSONHandler(os.Stdout, opts)
logger = slog.New(handler)
prometheus.MustRegister(prometheus.NewBuildInfoCollector())
}
func main() {
flag.Parse()
client, err := client.NewEnvClient()
errCheck(err)
defer client.Close()
_, err = client.Ping(context.Background())
errCheck(err)
prometheus.MustRegister(&dockerHealthCollector{
containerClient: client,
})
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "<h1>docker state exporter</h1>")
})
http.HandleFunc("/-/healthy", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "up")
})
http.Handle("/metrics", promhttp.HandlerFor(
prometheus.DefaultGatherer,
promhttp.HandlerOpts{EnableOpenMetrics: true}))
logger.Info("Server listening", "address", *address)
server := &http.Server{Addr: *address, Handler: nil}
go func() {
err = server.ListenAndServe()
if err != http.ErrServerClosed {
errCheck(err)
}
}()
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGTERM, os.Interrupt)
<-quit
logger.Info("Server shutting down")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := server.Shutdown(ctx); err != nil {
logger.Error("Failed to gracefully shutdown", "err", err)
}
logger.Info("Server shutdown")
}