Skip to content

Commit

Permalink
Merge pull request ceph#41444 from Daniel-Pivonka/cephadm-monitoring-…
Browse files Browse the repository at this point in the history
…ip-bind

mgr/cephadm: allow monitoring stack ip binding and custom ports

Reviewed-by: Sebastian Wagner <[email protected]>
Reviewed-by: Patrick Seidensal <[email protected]>
Reviewed-by: Adam King <[email protected]>
  • Loading branch information
tchaikov authored Jun 8, 2021
2 parents fb25fa7 + adecb2d commit 04fa416
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 26 deletions.
18 changes: 18 additions & 0 deletions doc/cephadm/monitoring.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,24 @@ update its configuration:

The ``reconfig`` command also sets the proper URL for Ceph Dashboard.

Networks and Ports
~~~~~~~~~~~~~~~~~~

All monitoring services can have the network and port they bind to configured with a yaml service specification

example spec file:

.. code-block:: yaml
service_type: grafana
service_name: grafana
placement:
count: 1
networks:
- 192.169.142.0/24
spec:
port: 4200
Using custom images
~~~~~~~~~~~~~~~~~~~

Expand Down
24 changes: 11 additions & 13 deletions src/cephadm/cephadm
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ class Monitoring(object):
'args': [
'--config.file=/etc/prometheus/prometheus.yml',
'--storage.tsdb.path=/prometheus',
'--web.listen-address=:{}'.format(port_map['prometheus'][0]),
],
'config-json-files': [
'prometheus.yml',
Expand Down Expand Up @@ -311,7 +310,6 @@ class Monitoring(object):
'cpus': '2',
'memory': '2GB',
'args': [
'--web.listen-address=:{}'.format(port_map['alertmanager'][0]),
'--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
],
'config-json-files': [
Expand Down Expand Up @@ -2144,6 +2142,17 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
elif daemon_type in Monitoring.components:
metadata = Monitoring.components[daemon_type]
r += metadata.get('args', list())
# set ip and port to bind to for nodeexporter,alertmanager,prometheus
if daemon_type != 'grafana':
ip = ''
port = Monitoring.port_map[daemon_type][0]
if 'meta_json' in ctx and ctx.meta_json:
meta = json.loads(ctx.meta_json) or {}
if 'ip' in meta and meta['ip']:
ip = meta['ip']
if 'ports' in meta and meta['ports']:
port = meta['ports'][0]
r += [f'--web.listen-address={ip}:{port}']
if daemon_type == 'alertmanager':
config = get_parm(ctx.config_json)
peers = config.get('peers', list()) # type: ignore
Expand Down Expand Up @@ -2943,15 +2952,7 @@ class Firewalld(object):
def update_firewalld(ctx, daemon_type):
# type: (CephadmContext, str) -> None
firewall = Firewalld(ctx)

firewall.enable_service_for(daemon_type)

fw_ports = []

if daemon_type in Monitoring.port_map.keys():
fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc

firewall.open_ports(fw_ports)
firewall.apply_rules()


Expand Down Expand Up @@ -4344,9 +4345,6 @@ def command_deploy(ctx):
elif daemon_type in Monitoring.components:
# monitoring daemon - prometheus, grafana, alertmanager, node-exporter
# Default Checks
if not ctx.reconfig and not redeploy:
daemon_ports.extend(Monitoring.port_map[daemon_type])

# make sure provided config-json is sufficient
config = get_parm(ctx.config_json) # type: ignore
required_files = Monitoring.components[daemon_type].get('config-json-files', list())
Expand Down
34 changes: 22 additions & 12 deletions src/pybind/mgr/cephadm/services/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
prom_services = [] # type: List[str]
for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
assert dd.hostname is not None
prom_services.append(dd.hostname)
addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
port = dd.ports[0] if dd.ports else 9095
prom_services.append(addr + ':' + str(port))
deps.append(dd.name())
grafana_data_sources = self.mgr.template.render(
'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
Expand All @@ -53,7 +55,10 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
})

grafana_ini = self.mgr.template.render(
'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT})
'services/grafana/grafana.ini.j2', {
'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT,
'http_addr': daemon_spec.ip if daemon_spec.ip else ''
})

config_file = {
'files': {
Expand All @@ -76,8 +81,9 @@ def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
# TODO: signed cert
dd = self.get_active_daemon(daemon_descrs)
assert dd.hostname is not None
service_url = 'https://{}:{}'.format(
self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
service_url = 'https://{}:{}'.format(addr, port)
self._set_service_url_on_dashboard(
'Grafana',
'dashboard get-grafana-api-url',
Expand Down Expand Up @@ -170,8 +176,9 @@ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDes
def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
dd = self.get_active_daemon(daemon_descrs)
assert dd.hostname is not None
service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname),
self.DEFAULT_SERVICE_PORT)
addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
service_url = 'http://{}:{}'.format(addr, port)
self._set_service_url_on_dashboard(
'AlertManager',
'dashboard get-alertmanager-api-host',
Expand Down Expand Up @@ -232,19 +239,21 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
assert dd.hostname is not None
deps.append(dd.name())
addr = self.mgr.inventory.get_addr(dd.hostname)
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
port = str(dd.ports[0]) if dd.ports else '9100'
nodes.append({
'hostname': dd.hostname,
'url': addr.split(':')[0] + ':9100'
'url': addr.split(':')[0] + ':' + port
})

# scrape alert managers
alertmgr_targets = []
for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
assert dd.hostname is not None
deps.append(dd.name())
addr = self.mgr.inventory.get_addr(dd.hostname)
alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0]))
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
port = str(dd.ports[0]) if dd.ports else '9093'
alertmgr_targets.append("'{}:{}'".format(addr.split(':')[0], port))

# scrape haproxies
haproxy_targets = []
Expand Down Expand Up @@ -293,8 +302,9 @@ def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDes
def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
dd = self.get_active_daemon(daemon_descrs)
assert dd.hostname is not None
service_url = 'http://{}:{}'.format(
self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
service_url = 'http://{}:{}'.format(addr, port)
self._set_service_url_on_dashboard(
'Prometheus',
'dashboard get-prometheus-api-host',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ datasources:
type: 'prometheus'
access: 'proxy'
orgId: 1
url: 'http://{{ host }}:9095'
url: 'http://{{ host }}'
basicAuth: false
isDefault: {{ 'true' if loop.first else 'false' }}
editable: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
cert_file = /etc/grafana/certs/cert_file
cert_key = /etc/grafana/certs/cert_key
http_port = {{ http_port }}
http_addr = {{ http_addr }}
[security]
admin_user = admin
admin_password = admin
Expand Down
34 changes: 34 additions & 0 deletions src/pybind/mgr/cephadm/tests/test_cephadm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest

import yaml

from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
from cephadm.serve import CephadmServe
from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
Expand Down Expand Up @@ -298,6 +300,38 @@ def test_daemon_check_extra_config(self, _run_cephadm, cephadm_module: CephadmOr
+ '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}',
image='')

@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
_run_cephadm.return_value = ('{}', '', 0)

with with_host(cephadm_module, 'test'):

yaml_str = """service_type: alertmanager
service_name: alertmanager
placement:
count: 1
spec:
port: 4200
"""
yaml_file = yaml.safe_load(yaml_str)
spec = ServiceSpec.from_json(yaml_file)

with mock.patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])):
with with_service(cephadm_module, spec):

CephadmServe(cephadm_module)._check_daemons()

_run_cephadm.assert_called_with(
'test', 'alertmanager.test', 'deploy', [
'--name', 'alertmanager.test',
'--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}',
'--config-json', '-',
'--tcp-ports', '4200 9094',
'--reconfig'
],
stdin='{}',
image='')

@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
with with_host(cephadm_module, 'test'):
Expand Down
55 changes: 55 additions & 0 deletions src/python-common/ceph/deployment/service_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,9 @@ def _cls(cls: Type[ServiceSpecT], service_type: str) -> Type[ServiceSpecT]:
'alertmanager': AlertManagerSpec,
'ingress': IngressSpec,
'container': CustomContainerSpec,
'grafana': MonitoringSpec,
'node-exporter': MonitoringSpec,
'prometheus': MonitoringSpec,
}.get(service_type, cls)
if ret == ServiceSpec and not service_type:
raise SpecValidationError('Spec needs a "service_type" key.')
Expand Down Expand Up @@ -852,6 +855,7 @@ def __init__(self,
user_data: Optional[Dict[str, Any]] = None,
config: Optional[Dict[str, str]] = None,
networks: Optional[List[str]] = None,
port: Optional[int] = None,
):
assert service_type == 'alertmanager'
super(AlertManagerSpec, self).__init__(
Expand All @@ -874,6 +878,23 @@ def __init__(self,
# added to the default receivers'
# <webhook_configs> configuration.
self.user_data = user_data or {}
self.port = port

def get_port_start(self) -> List[int]:
return [self.get_port(), 9094]

def get_port(self) -> int:
if self.port:
return self.port
else:
return 9093

def validate(self) -> None:
super(AlertManagerSpec, self).validate()

if self.port == 9094:
raise SpecValidationError(
'Port 9094 is reserved for AlertManager cluster listen address')


yaml.add_representer(AlertManagerSpec, ServiceSpec.yaml_representer)
Expand Down Expand Up @@ -1017,3 +1038,37 @@ def config_json(self) -> Dict[str, Any]:


yaml.add_representer(CustomContainerSpec, ServiceSpec.yaml_representer)


class MonitoringSpec(ServiceSpec):
def __init__(self,
service_type: str,
service_id: Optional[str] = None,
config: Optional[Dict[str, str]] = None,
networks: Optional[List[str]] = None,
placement: Optional[PlacementSpec] = None,
unmanaged: bool = False,
preview_only: bool = False,
port: Optional[int] = None,
):
assert service_type in ['grafana', 'node-exporter', 'prometheus']

super(MonitoringSpec, self).__init__(
service_type, service_id,
placement=placement, unmanaged=unmanaged,
preview_only=preview_only, config=config,
networks=networks)

self.service_type = service_type
self.port = port

def get_port_start(self) -> List[int]:
return [self.get_port()]

def get_port(self) -> int:
if self.port:
return self.port
else:
return {'prometheus': 9095,
'node-exporter': 9100,
'grafana': 3000}[self.service_type]

0 comments on commit 04fa416

Please sign in to comment.