Files
glances/glances/plugins/glances_docker.py

438 lines
17 KiB
Python

# -*- coding: utf-8 -*-
#
# This file is part of Glances.
#
# Copyright (C) 2015 Nicolargo <nicolas@nicolargo.com>
#
# Glances is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Glances is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Docker plugin."""
import numbers
import os
import re
# Import Glances libs
from glances.core.glances_timer import getTimeSinceLastUpdate
from glances.core.glances_logging import logger
from glances.plugins.glances_plugin import GlancesPlugin
# Docker-py library (optional and Linux-only)
# https://github.com/docker/docker-py
try:
import docker
import requests
except ImportError as e:
logger.debug("Docker library not found (%s). Glances cannot grab Docker info." % e)
docker_tag = False
else:
docker_tag = True
class Plugin(GlancesPlugin):
"""Glances' Docker plugin.
stats is a list
"""
def __init__(self, args=None):
"""Init the plugin."""
GlancesPlugin.__init__(self, args=args)
# The plgin can be disable using: args.disable_docker
self.args = args
# We want to display the stat in the curse interface
self.display_curse = True
# Init the Docker API
self.docker_client = False
def connect(self, version=None):
"""Connect to the Docker server"""
# Init connection to the Docker API
try:
if version is None:
ret = docker.Client(base_url='unix://var/run/docker.sock')
else:
ret = docker.Client(base_url='unix://var/run/docker.sock',
version=version)
except NameError:
# docker lib not found
return None
try:
ret.version()
except requests.exceptions.ConnectionError as e:
# Connexion error (Docker not detected)
# Let this message in debug mode
logger.debug("Can't connect to the Docker server (%s)" % e)
return None
except docker.errors.APIError as e:
if version is None:
# API error (Version mismatch ?)
logger.debug("Docker API error (%s)" % e)
# Try the connection with the server version
version = re.search('server\:\ (.*)\)\".*\)', str(e))
if version:
logger.debug("Try connection with Docker API version %s" % version.group(1))
ret = self.connect(version=version.group(1))
else:
logger.debug("Can not retreive Docker server version")
ret = None
else:
# API error
logger.error("Docker API error (%s)" % e)
ret = None
except Exception as e:
# Others exceptions...
# Connexion error (Docker not detected)
logger.error("Can't connect to the Docker server (%s)" % e)
ret = None
# Log an info if Docker plugin is disabled
if ret is None:
logger.debug("Docker plugin is disable because an error has been detected")
return ret
def reset(self):
"""Reset/init the stats."""
self.stats = {}
@GlancesPlugin._log_result_decorator
def update(self):
"""Update Docker stats using the input method.
"""
# Reset stats
self.reset()
# Get the current Docker API client
if not self.docker_client:
# First time, try to connect to the server
self.docker_client = self.connect()
if self.docker_client is None:
global docker_tag
docker_tag = False
# The Docker-py lib is mandatory
if not docker_tag or (self.args is not None and self.args.disable_docker):
return self.stats
if self.input_method == 'local':
# Update stats
# Exemple: {
# "KernelVersion": "3.16.4-tinycore64",
# "Arch": "amd64",
# "ApiVersion": "1.15",
# "Version": "1.3.0",
# "GitCommit": "c78088f",
# "Os": "linux",
# "GoVersion": "go1.3.3"
# }
self.stats['version'] = self.docker_client.version()
# Example: [{u'Status': u'Up 36 seconds',
# u'Created': 1420378904,
# u'Image': u'nginx:1',
# u'Ports': [{u'Type': u'tcp', u'PrivatePort': 443},
# {u'IP': u'0.0.0.0', u'Type': u'tcp', u'PublicPort': 8080, u'PrivatePort': 80}],
# u'Command': u"nginx -g 'daemon off;'",
# u'Names': [u'/webstack_nginx_1'],
# u'Id': u'b0da859e84eb4019cf1d965b15e9323006e510352c402d2f442ea632d61faaa5'}]
self.stats['containers'] = self.docker_client.containers()
# Get stats for all containers
for c in self.stats['containers']:
if not hasattr(self, 'docker_stats'):
# Create a dict with all the containers' stats instance
self.docker_stats = {}
if c['Id'] not in self.docker_stats:
# Create the stats instance for the current container
try:
self.docker_stats[c['Id']] = self.docker_client.stats(c['Id'], decode=True)
logger.debug("Create Docker stats object for container {}".format(c['Id']))
except (AttributeError, docker.errors.InvalidVersion) as e:
logger.error("Can not call Docker stats method {}".format(e))
# Get the docker stats
try:
all_stats = self.docker_stats[c['Id']].next()
except:
all_stats = {}
c['cpu'] = self.get_docker_cpu(c['Id'], all_stats)
c['memory'] = self.get_docker_memory(c['Id'], all_stats)
# c['network'] = self.get_docker_network(c['Id'], all_stats)
elif self.input_method == 'snmp':
# Update stats using SNMP
# Not available
pass
return self.stats
def get_docker_cpu_old(self, container_id):
"""Return the container CPU usage by reading /sys/fs/cgroup/...
Input: id is the full container id
Output: a dict {'total': 1.49, 'user': 0.65, 'system': 0.84}"""
ret = {}
# Read the stats
try:
with open('/sys/fs/cgroup/cpuacct/docker/' + container_id + '/cpuacct.stat', 'r') as f:
for line in f:
m = re.search(r"(system|user)\s+(\d+)", line)
if m:
ret[m.group(1)] = int(m.group(2))
except IOError as e:
logger.error("Can not grab container CPU stat ({0})".format(e))
return ret
if isinstance(ret["system"], numbers.Number) and isinstance(ret["user"], numbers.Number):
ret["total"] = ret["system"] + ret["user"]
# Return the stats
return ret
def get_docker_cpu(self, container_id, all_stats):
"""Return the container CPU usage
Input: id is the full container id
all_stats is the output of the stats method of the Docker API
Output: a dict {'total': 1.49}"""
cpu_new = {}
ret = {'total': 0.0}
# Read the stats
# For each container, you will find a pseudo-file cpuacct.stat,
# containing the CPU usage accumulated by the processes of the container.
# Those times are expressed in ticks of 1/USER_HZ of a second.
# On x86 systems, USER_HZ is 100.
try:
cpu_new['total'] = all_stats['cpu_stats']['cpu_usage']['total_usage']
cpu_new['system'] = all_stats['cpu_stats']['system_cpu_usage']
cpu_new['nb_core'] = len(all_stats['cpu_stats']['cpu_usage']['percpu_usage'])
except KeyError as e:
# all_stats do not have CPU information
logger.debug("Can not grab CPU usage for container {0} ({1}). Trying fallback method.".format(container_id, e))
# Trying fallback to old grab method
ret = self.get_docker_cpu_old(container_id)
# Get the user ticks
ticks = self.get_user_ticks()
for k in ret.keys():
ret[k] = float(ret[k]) / ticks
else:
# Previous CPU stats stored in the cpu_old variable
if not hasattr(self, 'cpu_old'):
# First call, we init the cpu_old variable
self.cpu_old = {}
try:
self.cpu_old[container_id] = cpu_new
except (IOError, UnboundLocalError):
pass
if container_id not in self.cpu_old:
try:
self.cpu_old[container_id] = cpu_new
except (IOError, UnboundLocalError):
pass
else:
#
cpu_delta = float(cpu_new['total'] - self.cpu_old[container_id]['total'])
system_delta = float(cpu_new['system'] - self.cpu_old[container_id]['system'])
if cpu_delta > 0.0 and system_delta > 0.0:
ret['total'] = (cpu_delta / system_delta) * float(cpu_new['nb_core']) * 100
# Save stats to compute next stats
self.cpu_old[container_id] = cpu_new
# Return the stats
return ret
def get_docker_memory_old(self, container_id):
"""Return the container MEMORY usage by reading /sys/fs/cgroup/...
Input: id is the full container id
Output: a dict {'rss': 1015808, 'cache': 356352}"""
ret = {}
# Read the stats
try:
with open('/sys/fs/cgroup/memory/docker/' + container_id + '/memory.stat', 'r') as f:
for line in f:
m = re.search(r"(rss|cache)\s+(\d+)", line)
if m:
ret[m.group(1)] = int(m.group(2))
except IOError as e:
logger.error("Can not grab container MEM stat ({0})".format(e))
return ret
# Return the stats
return ret
def get_docker_memory(self, container_id, all_stats):
"""Return the container MEMORY
Input: id is the full container id
all_stats is the output of the stats method of the Docker API
Output: a dict {'rss': 1015808, 'cache': 356352, 'usage': ..., 'max_usage': ...}"""
ret = {}
# Read the stats
try:
ret['rss'] = all_stats['memory_stats']['stats']['rss']
ret['cache'] = all_stats['memory_stats']['stats']['cache']
ret['usage'] = all_stats['memory_stats']['usage']
ret['max_usage'] = all_stats['memory_stats']['max_usage']
except KeyError as e:
# all_stats do not have MEM information
logger.debug("Can not grab MEM usage for container {0} ({1}). Trying fallback method.".format(container_id, e))
# Trying fallback to old grab method
ret = self.get_docker_memory_old(container_id)
# Return the stats
return ret
def get_docker_network(self, container_id, all_stats):
"""Return the container network usage using the Docker API (v1.0 or higher)
Input: id is the full container id
Output: a dict {'time_since_update': 3000, 'rx': 10, 'tx': 65}"""
# Init the returned dict
network_new = {}
# Read the rx/tx stats (in bytes)
try:
netiocounters = all_stats["network"]
except KeyError as e:
# all_stats do not have NETWORK information
logger.debug("Can not grab NET usage for container {0} ({1})".format(container_id, e))
# No fallback available...
return network_new
# Previous network interface stats are stored in the network_old variable
if not hasattr(self, 'netiocounters_old'):
# First call, we init the network_old var
self.netiocounters_old = {}
try:
self.netiocounters_old[container_id] = netiocounters
except (IOError, UnboundLocalError):
pass
if container_id not in self.netiocounters_old:
try:
self.netiocounters_old[container_id] = netiocounters
except (IOError, UnboundLocalError):
pass
else:
# By storing time data we enable Rx/s and Tx/s calculations in the
# XML/RPC API, which would otherwise be overly difficult work
# for users of the API
network_new['time_since_update'] = getTimeSinceLastUpdate('docker_net_{}'.format(container_id))
network_new['rx'] = netiocounters["rx_bytes"] - self.netiocounters_old[container_id]["rx_bytes"]
network_new['tx'] = netiocounters["tx_bytes"] - self.netiocounters_old[container_id]["tx_bytes"]
network_new['cumulative_rx'] = netiocounters["rx_bytes"]
network_new['cumulative_tx'] = netiocounters["tx_bytes"]
# Save stats to compute next bitrate
self.netiocounters_old[container_id] = netiocounters
# Return the stats
return network_new
def get_user_ticks(self):
"""return the user ticks by reading the environment variable"""
return os.sysconf(os.sysconf_names['SC_CLK_TCK'])
def msg_curse(self, args=None):
"""Return the dict to display in the curse interface."""
# Init the return message
ret = []
# Only process if stats exist (and non null) and display plugin enable...
if not self.stats or args.disable_docker or len(self.stats['containers']) == 0:
return ret
# Build the string message
# Title
msg = '{0}'.format(_("CONTAINERS"))
ret.append(self.curse_add_line(msg, "TITLE"))
msg = ' {0}'.format(len(self.stats['containers']))
ret.append(self.curse_add_line(msg))
msg = ' ({0} {1})'.format(_("served by Docker"),
self.stats['version']["Version"])
ret.append(self.curse_add_line(msg))
ret.append(self.curse_new_line())
# Header
ret.append(self.curse_new_line())
msg = '{0:>14}'.format(_("Id"))
ret.append(self.curse_add_line(msg))
msg = ' {0:20}'.format(_("Name"))
ret.append(self.curse_add_line(msg))
msg = '{0:>26}'.format(_("Status"))
ret.append(self.curse_add_line(msg))
msg = '{0:>6}'.format(_("CPU%"))
ret.append(self.curse_add_line(msg))
msg = '{0:>7}'.format(_("MEM"))
ret.append(self.curse_add_line(msg))
# msg = '{0:>6}'.format(_("Rx/s"))
# ret.append(self.curse_add_line(msg))
# msg = '{0:>6}'.format(_("Tx/s"))
# ret.append(self.curse_add_line(msg))
msg = ' {0:8}'.format(_("Command"))
ret.append(self.curse_add_line(msg))
# Data
for container in self.stats['containers']:
ret.append(self.curse_new_line())
# Id
msg = '{0:>14}'.format(container['Id'][0:12])
ret.append(self.curse_add_line(msg))
# Name
name = container['Names'][0]
if len(name) > 20:
name = '_' + name[:-19]
else:
name = name[:20]
msg = ' {0:20}'.format(name)
ret.append(self.curse_add_line(msg))
# Status
status = self.container_alert(container['Status'])
msg = container['Status'].replace("minute", "min")
msg = '{0:>26}'.format(msg[0:25])
ret.append(self.curse_add_line(msg, status))
# CPU
try:
msg = '{0:>6.1f}'.format(container['cpu']['total'])
except KeyError:
msg = '{0:>6}'.format('?')
ret.append(self.curse_add_line(msg))
# MEM
try:
msg = '{0:>7}'.format(self.auto_unit(container['memory']['usage']))
except KeyError:
msg = '{0:>7}'.format('?')
ret.append(self.curse_add_line(msg))
# NET RX/TX
# for r in ['rx', 'tx']:
# try:
# value = self.auto_unit(int(container['network'][r] // container['network']['time_since_update'] * 8)) + "b"
# #value = self.auto_unit(int(container['network']['cumulative_' + r])) + 'b'
# msg = '{0:>6}'.format(value)
# except KeyError:
# msg = '{0:>6}'.format('?')
# ret.append(self.curse_add_line(msg))
# Command
msg = ' {0}'.format(container['Command'])
ret.append(self.curse_add_line(msg))
return ret
def container_alert(self, status):
"""Analyse the container status"""
if "Paused" in status:
return 'CAREFUL'
else:
return 'OK'