From 8bb22d6396f453155578001a8c889aa8b8f5c281 Mon Sep 17 00:00:00 2001 From: Elliott Neil Clark Date: Fri, 20 Sep 2013 16:25:10 -0700 Subject: [PATCH] Move hbase and hadoop to http /jmx collection * remove the old java jars * Create an http base class for hadoop metrics2 * Create hadoop namenode and datanode subclasses * create hbase master and regionserver subclasses * refactor some functions into utils. Signed-off-by: Benoit Sigoure --- collectors/0/dfstat.py | 11 +- collectors/0/elasticsearch.py | 13 +- collectors/0/hadoop_datanode.py | 70 ++++ collectors/0/hadoop_datanode_jmx.py | 214 ---------- collectors/0/hadoop_namenode.py | 68 ++++ collectors/0/haproxy.py | 11 +- collectors/0/hbase_master.py | 63 +++ collectors/0/hbase_regionserver.py | 84 ++++ collectors/0/hbase_regionserver_jmx.py | 288 -------------- collectors/0/mysql.py | 11 +- collectors/0/postgresql.py | 11 +- collectors/lib/hadoop_http.py | 77 ++++ collectors/lib/jmx-1.0.jar | Bin 10400 -> 0 bytes collectors/lib/utils.py | 27 +- stumbleupon/monitoring/.gitignore | 1 - stumbleupon/monitoring/Makefile | 87 ---- stumbleupon/monitoring/jmx.java | 531 ------------------------- tcollector.py | 2 +- 18 files changed, 400 insertions(+), 1169 deletions(-) create mode 100644 collectors/0/hadoop_datanode.py delete mode 100755 collectors/0/hadoop_datanode_jmx.py create mode 100644 collectors/0/hadoop_namenode.py create mode 100755 collectors/0/hbase_master.py create mode 100755 collectors/0/hbase_regionserver.py delete mode 100755 collectors/0/hbase_regionserver_jmx.py create mode 100644 collectors/lib/hadoop_http.py delete mode 100644 collectors/lib/jmx-1.0.jar delete mode 100644 stumbleupon/monitoring/.gitignore delete mode 100644 stumbleupon/monitoring/Makefile delete mode 100644 stumbleupon/monitoring/jmx.java diff --git a/collectors/0/dfstat.py b/collectors/0/dfstat.py index d0e2d0f0..ca790e55 100755 --- a/collectors/0/dfstat.py +++ b/collectors/0/dfstat.py @@ -46,17 +46,12 @@ "rootfs", ]) - -def err(msg): - print >> sys.stderr, msg - - def main(): """dfstats main loop""" try: f_mounts = open("/proc/mounts", "r") except IOError, e: - err("error: can't open /proc/mounts: %s" % e) + utils.err("error: can't open /proc/mounts: %s" % e) return 13 # Ask tcollector to not respawn us utils.drop_privileges() @@ -77,7 +72,7 @@ def main(): try: fs_spec, fs_file, fs_vfstype, fs_mntops, fs_freq, fs_passno = line.split(None) except ValueError, e: - err("error: can't parse line at /proc/mounts: %s" % e) + utils.err("error: can't parse line at /proc/mounts: %s" % e) continue if fs_spec == "none": @@ -107,7 +102,7 @@ def main(): try: r = os.statvfs(fs_file) except OSError, e: - err("error: can't get info for mount point: %s" % fs_file) + utils.err("error: can't get info for mount point: %s" % fs_file) continue used = r.f_blocks - r.f_bfree diff --git a/collectors/0/elasticsearch.py b/collectors/0/elasticsearch.py index cad44399..6e36c4eb 100755 --- a/collectors/0/elasticsearch.py +++ b/collectors/0/elasticsearch.py @@ -44,11 +44,6 @@ "red": 2, } - -def is_numeric(value): - return isinstance(value, (int, long, float)) - - def err(msg): print >>sys.stderr, msg @@ -145,7 +140,7 @@ def printmetric(metric, value, **tags): for stat, value in cstats.iteritems(): if stat == "status": value = STATUS_MAP.get(value, -1) - elif not is_numeric(value): + elif not utils.is_numeric(value): continue printmetric("cluster." + stat, value) @@ -248,14 +243,14 @@ def printmetric(metric, value, **tags): del d if "network" in nstats: for stat, value in nstats["network"]["tcp"].iteritems(): - if is_numeric(value): + if utils.is_numeric(value): printmetric("network.tcp." + stat, value) for stat, value in nstats["transport"].iteritems(): - if is_numeric(value): + if utils.is_numeric(value): printmetric("transport." + stat, value) # New in ES 0.17: for stat, value in nstats.get("http", {}).iteritems(): - if is_numeric(value): + if utils.is_numeric(value): printmetric("http." + stat, value) del nstats time.sleep(COLLECTION_INTERVAL) diff --git a/collectors/0/hadoop_datanode.py b/collectors/0/hadoop_datanode.py new file mode 100644 index 00000000..eaac8c1c --- /dev/null +++ b/collectors/0/hadoop_datanode.py @@ -0,0 +1,70 @@ +#!/usr/bin/python +# This file is part of tcollector. +# Copyright (C) 2010 The tcollector Authors. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. This program is distributed in the hope that it +# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +# General Public License for more details. You should have received a copy +# of the GNU Lesser General Public License along with this program. If not, +# see . + +import sys +import time + +try: + import json +except ImportError: + json = None + +from collectors.lib import utils +from collectors.lib.hadoop_http import HadoopHttp + + +REPLACEMENTS = { + "datanodeactivity-": ["activity"], + "fsdatasetstate-ds-": ["fs_data_set_state"], + "rpcdetailedactivityforport": ["rpc_activity"], + "rpcactivityforport": ["rpc_activity"] +} + + +class HadoopDataNode(HadoopHttp): + """ + Class that will retrieve metrics from an Apache Hadoop DataNode's jmx page. + + This requires Apache Hadoop 1.0+ or Hadoop 2.0+. + Anything that has the jmx page will work but the best results will com from Hadoop 2.1.0+ + """ + + def __init__(self): + super(HadoopDataNode, self).__init__('hadoop', 'datanode', 'localhost', 50075) + + def emit(self): + current_time = int(time.time()) + metrics = self.poll() + for context, metric_name, value in metrics: + for k, v in REPLACEMENTS.iteritems(): + if any(c.startswith(k) for c in context): + context = v + self.emit_metric(context, current_time, metric_name, value) + + +def main(args): + utils.drop_privileges() + if json is None: + utils.err("This collector requires the `json' Python module.") + return 13 # Ask tcollector not to respawn us + datanode_service = HadoopDataNode() + while True: + datanode_service.emit() + time.sleep(15) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) + diff --git a/collectors/0/hadoop_datanode_jmx.py b/collectors/0/hadoop_datanode_jmx.py deleted file mode 100755 index 657fbbea..00000000 --- a/collectors/0/hadoop_datanode_jmx.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/python -# This file is part of tcollector. -# Copyright (C) 2012 The tcollector Authors. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. This program is distributed in the hope that it -# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser -# General Public License for more details. You should have received a copy -# of the GNU Lesser General Public License along with this program. If not, -# see . - -import os -import re -import signal -import subprocess -import sys -import time - -from collectors.lib import utils - -# How oftent to poll -INTERVAL="60" - -# If this user doesn't exist, we'll exit immediately. -# If we're running as root, we'll drop privileges using this user. -USER = "hadoop" - -# We add those files to the classpath if they exist. -CLASSPATH = [ - "/usr/lib/jvm/default-java/lib/tools.jar", -] - -# Map certain JVM stats so they are unique and shorter -JMX_SERVICE_RENAMING = { - "GarbageCollector": "datanode.gc", - "OperatingSystem": "datanode.os", - "Threading": "datanode.threads", -} - -IGNORED_METRICS = frozenset(["revision", "hdfsUser", "hdfsDate", "hdfsUrl", "date", - "hdfsRevision", "user", "hdfsVersion", "url", "version", - "NamenodeAddress", "Version", "RpcPort", "HttpPort", - # These are useless as-is because they represent the - # thread that's dedicated to serving JMX RPCs. - "CurrentThreadCpuTime", "CurrentThreadUserTime", - # List of directories used by the DataNode. - "StorageInfo", - "VolumeInfo", - ]) - -# How many times, maximum, will we attempt to restart the JMX collector. -# If we reach this limit, we'll exit with an error. -MAX_RESTARTS = 10 - -TOP = False # Set to True when we want to terminate. -RETVAL = 0 # Return value set by signal handler. - - -def kill(proc): - """Kills the subprocess given in argument.""" - # Clean up after ourselves. - proc.stdout.close() - rv = proc.poll() - if rv is None: - os.kill(proc.pid, 15) - rv = proc.poll() - if rv is None: - os.kill(proc.pid, 9) # Bang bang! - rv = proc.wait() # This shouldn't block too long. - print >>sys.stderr, "warning: proc exited %d" % rv - return rv - - -def do_on_signal(signum, func, *args, **kwargs): - """Calls func(*args, **kwargs) before exiting when receiving signum.""" - def signal_shutdown(signum, frame): - print >>sys.stderr, "got signal %d, exiting" % signum - func(*args, **kwargs) - sys.exit(128 + signum) - signal.signal(signum, signal_shutdown) - - -def main(argv): - utils.drop_privileges(user=USER) - # Build the classpath. - dir = os.path.dirname(sys.argv[0]) - jar = os.path.normpath(dir + "/../lib/jmx-1.0.jar") - if not os.path.exists(jar): - print >>sys.stderr, "WTF?! Can't run, %s doesn't exist" % jar - return 13 - classpath = [jar] - for jar in CLASSPATH: - if os.path.exists(jar): - classpath.append(jar) - classpath = ":".join(classpath) - - jpid = "DataNode" - jps = subprocess.check_output("jps").split("\n") - for item in jps: - vals = item.split(" ") - if len(vals) == 2: - if vals[1] == "DataNode": - jpid = vals[0] - break - - jmx = subprocess.Popen( - ["java", "-enableassertions", "-enablesystemassertions", # safe++ - "-Xmx64m", # Low RAM limit, to avoid stealing too much from prod. - "-cp", classpath, "com.stumbleupon.monitoring.jmx", - "--watch", INTERVAL, "--long", "--timestamp", - jpid, # Name of the process. - # The remaining arguments are pairs (mbean_regexp, attr_regexp). - # The first regexp is used to match one or more MBeans, the 2nd - # to match one or more attributes of the MBeans matched. - "hadoop", "", # All HBase / hadoop metrics. - "Threading", "Count|Time$", # Number of threads and CPU time. - "OperatingSystem", "OpenFile", # Number of open files. - "GarbageCollector", "Collection", # GC runs and time spent GCing. - ], stdout=subprocess.PIPE, bufsize=1) - do_on_signal(signal.SIGINT, kill, jmx) - do_on_signal(signal.SIGPIPE, kill, jmx) - do_on_signal(signal.SIGTERM, kill, jmx) - try: - prev_timestamp = 0 - while True: - line = jmx.stdout.readline() - - if not line and jmx.poll() is not None: - break # Nothing more to read and process exited. - elif len(line) < 4: - print >>sys.stderr, "invalid line (too short): %r" % line - continue - - timestamp, metric, value, mbean = line.split("\t", 3) - # Sanitize the timestamp. - try: - timestamp = int(timestamp) - if timestamp < time.time() - 600: - raise ValueError("timestamp too old: %d" % timestamp) - if timestamp < prev_timestamp: - raise ValueError("timestamp out of order: prev=%d, new=%d" - % (prev_timestamp, timestamp)) - except ValueError, e: - print >>sys.stderr, ("Invalid timestamp on line: %r -- %s" - % (line, e)) - continue - prev_timestamp = timestamp - - if metric in IGNORED_METRICS: - continue - - tags = "" - # The JMX metrics have per-request-type metrics like so: - # metricNameNumOps - # metricNameMinTime - # metricNameMaxTime - # metricNameAvgTime - # Group related metrics together in the same metric name, use tags - # to separate the different request types, so we end up with: - # numOps op=metricName - # avgTime op=metricName - # etc, which makes it easier to graph things with the TSD. - if metric.endswith("MinTime"): # We don't care about the minimum - continue # time taken by operations. - elif metric.endswith("NumOps"): - tags = " op=" + metric[:-6] - metric = "numOps" - elif metric.endswith("AvgTime"): - tags = " op=" + metric[:-7] - metric = "avgTime" - elif metric.endswith("MaxTime"): - tags = " op=" + metric[:-7] - metric = "maxTime" - - # mbean is of the form "domain:key=value,...,foo=bar" - # some tags can have spaces, so we need to fix that. - mbean_domain, mbean_properties = mbean.rstrip().replace(" ", "_").split(":", 1) - if mbean_domain not in ("hadoop", "java.lang"): - print >>sys.stderr, ("Unexpected mbean domain = %r on line %r" - % (mbean_domain, line)) - continue - mbean_properties = dict(prop.split("=", 1) - for prop in mbean_properties.split(",")) - if mbean_domain == "hadoop": - # jmx_service is HBase by default, but we can also have - # RegionServer or Replication and such. - jmx_service = mbean_properties.get("service", "HBase") - if jmx_service == "HBase": - jmx_service = "regionserver" - elif mbean_domain == "java.lang": - jmx_service = mbean_properties.pop("type", "jvm") - if mbean_properties: - tags += " " + " ".join(k + "=" + v for k, v in - mbean_properties.iteritems()) - else: - assert 0, "Should never be here" - - jmx_service = JMX_SERVICE_RENAMING.get(jmx_service, jmx_service) - metric = jmx_service.lower() + "." + metric - - sys.stdout.write("hadoop.%s %d %s%s\n" - % (metric, timestamp, value, tags)) - sys.stdout.flush() - finally: - kill(jmx) - time.sleep(300) - return 0 # Ask the tcollector to re-spawn us. - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/collectors/0/hadoop_namenode.py b/collectors/0/hadoop_namenode.py new file mode 100644 index 00000000..06b8f86d --- /dev/null +++ b/collectors/0/hadoop_namenode.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# This file is part of tcollector. +# Copyright (C) 2010 The tcollector Authors. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. This program is distributed in the hope that it +# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +# General Public License for more details. You should have received a copy +# of the GNU Lesser General Public License along with this program. If not, +# see . + +import sys +import time + +try: + import json +except ImportError: + json = None + +from collectors.lib import utils +from collectors.lib.hadoop_http import HadoopHttp + + +REPLACEMENTS = { + "rpcdetailedactivityforport": ["rpc_activity"], + "rpcactivityforport": ["rpc_activity"] +} + + +class HadoopNameNode(HadoopHttp): + """ + Class that will retrieve metrics from an Apache Hadoop DataNode's jmx page. + + This requires Apache Hadoop 1.0+ or Hadoop 2.0+. + Anything that has the jmx page will work but the best results will com from Hadoop 2.1.0+ + """ + + def __init__(self): + super(HadoopNameNode, self).__init__('hadoop', 'namenode', 'localhost', 50070) + + def emit(self): + current_time = int(time.time()) + metrics = self.poll() + for context, metric_name, value in metrics: + for k, v in REPLACEMENTS.iteritems(): + if any(c.startswith(k) for c in context): + context = v + self.emit_metric(context, current_time, metric_name, value) + + +def main(args): + utils.drop_privileges() + if json is None: + utils.err("This collector requires the `json' Python module.") + return 13 # Ask tcollector not to respawn us + name_node_service = HadoopNameNode() + while True: + name_node_service.emit() + time.sleep(90) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) + diff --git a/collectors/0/haproxy.py b/collectors/0/haproxy.py index b414d1e1..4eae3a37 100755 --- a/collectors/0/haproxy.py +++ b/collectors/0/haproxy.py @@ -35,9 +35,6 @@ COLLECTION_INTERVAL = 15 -def err(e): - print >>sys.stderr, e - def haproxy_pid(): """Finds out the pid of haproxy process""" try: @@ -51,7 +48,7 @@ def find_conf_file(pid): try: output = subprocess.check_output(["ps", "--no-headers", "-o", "cmd", pid]) except subprocess.CalledProcessError, e: - err("HAProxy (pid %s) went away? %s" % (pid, e)) + utils.err("HAProxy (pid %s) went away? %s" % (pid, e)) return None return output.split("-f")[1].split()[0] @@ -60,7 +57,7 @@ def find_sock_file(conf_file): try: fd = open(conf_file) except IOError, e: - err("Error: %s. Config file path is relative: %s" % (e, conf_file)) + utils.err("Error: %s. Config file path is relative: %s" % (e, conf_file)) return None try: for line in fd: @@ -93,7 +90,7 @@ def collect_stats(sock): def main(): pid = haproxy_pid() if not pid: - err("Error: HAProxy is not running") + utils.err("Error: HAProxy is not running") return 13 # Ask tcollector to not respawn us. conf_file = find_conf_file(pid) @@ -102,7 +99,7 @@ def main(): sock_file = find_sock_file(conf_file) if sock_file is None: - err("Error: HAProxy is not listening on any unix domain socket") + utils.err("Error: HAProxy is not listening on any unix domain socket") return 13 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) diff --git a/collectors/0/hbase_master.py b/collectors/0/hbase_master.py new file mode 100755 index 00000000..0f435a09 --- /dev/null +++ b/collectors/0/hbase_master.py @@ -0,0 +1,63 @@ +#!/usr/bin/python +# This file is part of tcollector. +# Copyright (C) 2010 The tcollector Authors. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. This program is distributed in the hope that it +# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +# General Public License for more details. You should have received a copy +# of the GNU Lesser General Public License along with this program. If not, +# see . + +import sys +import time + +try: + import json +except ImportError: + json = None + +from collectors.lib import utils +from collectors.lib.hadoop_http import HadoopHttp + + +EXCLUDED_CONTEXTS = ('regionserver', 'regions', ) + + +class HBaseMaster(HadoopHttp): + """ + Class to get metrics from Apache HBase's master + + Require HBase 0.96.0+ + """ + + def __init__(self): + super(HBaseMaster, self).__init__('hbase', 'master', 'localhost', 60010) + + def emit(self): + current_time = int(time.time()) + metrics = self.poll() + for context, metric_name, value in metrics: + if any(c in EXCLUDED_CONTEXTS for c in context): + continue + self.emit_metric(context, current_time, metric_name, value) + + +def main(args): + utils.drop_privileges() + if json is None: + utils.err("This collector requires the `json' Python module.") + return 13 # Ask tcollector not to respawn us + hbase_service = HBaseMaster() + while True: + hbase_service.emit() + time.sleep(90) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) + diff --git a/collectors/0/hbase_regionserver.py b/collectors/0/hbase_regionserver.py new file mode 100755 index 00000000..9a2168e5 --- /dev/null +++ b/collectors/0/hbase_regionserver.py @@ -0,0 +1,84 @@ +#!/usr/bin/python +# This file is part of tcollector. +# Copyright (C) 2010 The tcollector Authors. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. This program is distributed in the hope that it +# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +# General Public License for more details. You should have received a copy +# of the GNU Lesser General Public License along with this program. If not, +# see . + +import time + +try: + import json +except ImportError: + json = None + +from collectors.lib import utils +from collectors.lib.hadoop_http import HadoopHttp + +EMIT_REGION = True + +EXCLUDED_CONTEXTS = ("master") + +class HBaseRegionserver(HadoopHttp): + def __init__(self): + super(HBaseRegionserver, self).__init__("hbase", "regionserver", "localhost", 60030) + + def emit_region_metric(self, context, current_time, full_metric_name, value): + split_metric = full_metric_name.split("_") + if len(split_metric) < 7: + utils.err("Error splitting %s" % full_metric_name) + return + + namespace = split_metric[1] + table = split_metric[3] + region = split_metric[5] + metric_name = "_".join(split_metric[7:]) + tag_dict = {"namespace": namespace, "table": table, "region": region} + + if any( not v for k,v in tag_dict.iteritems()): + utils.err("Error splitting %s", full_metric_name) + else: + self.emit_metric(context, current_time, metric_name, value, tag_dict) + + def emit(self): + """ + Emit metrics from a HBase regionserver. + + This will only emit per region metrics is EMIT_REGION is set to true + """ + current_time = int(time.time()) + metrics = self.poll() + for context, metric_name, value in metrics: + if any( c in EXCLUDED_CONTEXTS for c in context): + continue + + if any(c == "regions" for c in context): + if EMIT_REGION: + self.emit_region_metric(context, current_time, metric_name, value) + else: + self.emit_metric(context, current_time, metric_name, value) + + +def main(args): + utils.drop_privileges() + if json is None: + utils.err("This collector requires the `json' Python module.") + return 13 # Ask tcollector not to respawn us + hbase_service = HBaseRegionserver() + while True: + hbase_service.emit() + time.sleep(15) + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(main(sys.argv)) + diff --git a/collectors/0/hbase_regionserver_jmx.py b/collectors/0/hbase_regionserver_jmx.py deleted file mode 100755 index ff2daa65..00000000 --- a/collectors/0/hbase_regionserver_jmx.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/python -# This file is part of tcollector. -# Copyright (C) 2010 The tcollector Authors. -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. This program is distributed in the hope that it -# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser -# General Public License for more details. You should have received a copy -# of the GNU Lesser General Public License along with this program. If not, -# see . - -import os -import re -import signal -import subprocess -import sys -import time -from collectors.lib import utils - -# How oftent to poll -INTERVAL="60" - -# If this user doesn't exist, we'll exit immediately. -# If we're running as root, we'll drop privileges using this user. -USER = "hadoop" - -# Use JAVA_HOME env variable if set -JAVA_HOME = os.getenv('JAVA_HOME', '/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64') -JAVA = "%s/bin/java" % JAVA_HOME - -# We add those files to the classpath if they exist. -CLASSPATH = [ - "%s/lib/tools.jar" % JAVA_HOME, -] - -# We shorten certain strings to avoid excessively long metric names. -JMX_SERVICE_RENAMING = { - "GarbageCollector": "gc", - "OperatingSystem": "os", - "Threading": "threads", - # New in 0.92.1, from HBASE-5325: - "org.apache.hbase": "hbase", -} - -IGNORED_METRICS = set(["revision", "hdfsUser", "hdfsDate", "hdfsUrl", "date", - "hdfsRevision", "user", "hdfsVersion", "url", "version", - "Version", "RpcPort", "HttpPort","HeapMemoryUsage", - "NonHeapMemoryUsage", "tag.Context", "tag.Hostname", - "tag.SessionId" - ]) - -REPORTED_OPS = frozenset([ - "append", - "get", - "put", - "multi", - "balance", -]) - - -def kill(proc): - """Kills the subprocess given in argument.""" - # Clean up after ourselves. - proc.stdout.close() - rv = proc.poll() - if rv is None: - os.kill(proc.pid, 15) - rv = proc.poll() - if rv is None: - os.kill(proc.pid, 9) # Bang bang! - rv = proc.wait() # This shouldn't block too long. - print >>sys.stderr, "warning: proc exited %d" % rv - return rv - - -def do_on_signal(signum, func, *args, **kwargs): - """Calls func(*args, **kwargs) before exiting when receiving signum.""" - def signal_shutdown(signum, frame): - print >>sys.stderr, "got signal %d, exiting" % signum - func(*args, **kwargs) - sys.exit(128 + signum) - signal.signal(signum, signal_shutdown) - - -def main(argv): - utils.drop_privileges(user=USER) - # Build the classpath. - dir = os.path.dirname(sys.argv[0]) - jar = os.path.normpath(dir + "/../lib/jmx-1.0.jar") - if not os.path.exists(jar): - print >>sys.stderr, "WTF?! Can't run, %s doesn't exist" % jar - return 13 - classpath = [jar] - for jar in CLASSPATH: - if os.path.exists(jar): - classpath.append(jar) - classpath = ":".join(classpath) - - jpid = "HRegionServer" - jps = subprocess.check_output("jps").split("\n") - for item in jps: - vals = item.split(" ") - if len(vals) == 2: - if vals[1] == "HRegionServer": - jpid = vals[0] - break - - # in HBase 0.94 the mbean domain is hadoop - # in HBase 0.96 it is Hadoop (captical H) - jmx = subprocess.Popen( - [JAVA, "-enableassertions", "-enablesystemassertions", # safe++ - "-Xmx64m", # Low RAM limit, to avoid stealing too much from prod. - "-cp", classpath, "com.stumbleupon.monitoring.jmx", - "--watch", INTERVAL , "--long", "--timestamp", - jpid, # Name of the process. - # The remaining arguments are pairs (mbean_regexp, attr_regexp). - # The first regexp is used to match one or more MBeans, the 2nd - # to match one or more attributes of the MBeans matched. - "[Hh]adoop", "", # All HBase / hadoop metrics. - "Memory$", "", # Heap stats - "Threading", "Count|Time$", # Number of threads and CPU time. - "OperatingSystem", "OpenFile", # Number of open files. - "GarbageCollector", "Collection", # GC runs and time spent GCing. - ], stdout=subprocess.PIPE, bufsize=1) - do_on_signal(signal.SIGINT, kill, jmx) - do_on_signal(signal.SIGPIPE, kill, jmx) - do_on_signal(signal.SIGTERM, kill, jmx) - try: - prev_timestamp = 0 - while True: - line = jmx.stdout.readline() - - if not line and jmx.poll() is not None: - break # Nothing more to read and process exited. - elif len(line) < 4: - print >>sys.stderr, "invalid line (too short): %r" % line - continue - - try: - timestamp, metric, value, mbean = line.split("\t", 3) - except ValueError, e: - # Temporary workaround for jmx.jar not printing these lines we - # don't care about anyway properly. - if "java.lang.String" not in line: - print >>sys.stderr, "Can't split line: %r" % line - continue - - # Sanitize the timestamp. - try: - timestamp = int(timestamp) - if timestamp < time.time() - 600: - raise ValueError("timestamp too old: %d" % timestamp) - if timestamp < prev_timestamp: - raise ValueError("timestamp out of order: prev=%d, new=%d" - % (prev_timestamp, timestamp)) - except ValueError, e: - print >>sys.stderr, ("Invalid timestamp on line: %r -- %s" - % (line, e)) - continue - prev_timestamp = timestamp - - if metric in IGNORED_METRICS: - continue - - tags = "" - # The JMX metrics have per-request-type metrics like so: - # metricNameNumOps - # metricNameMinTime - # metricNameMaxTime - # metricNameAvgTime - # Group related metrics together in the same metric name, use tags - # to separate the different request types, so we end up with: - # numOps op=metricName - # avgTime op=metricName - # etc, which makes it easier to graph things with the TSD. - if metric.endswith("MinTime"): # We don't care about the minimum - continue # time taken by operations. - elif metric.startswith("tbl."): # Per-table/region/cf metrics - continue # ignore for now, too much spam - elif "BlockedSeconds" in metric or "LatencyHistogram" in metric: - continue # ignore for now, too much spam - elif metric.endswith("KB"): - metric = metric[:-2] - # Try converting to bytes - try: - value = float(value) * 1024 - except ValueError, e: - value = 0 - elif metric.endswith("MB"): - metric = metric[:-2] - # Try converting to bytes - try: - value = float(value) * 1024 * 1024 - except ValueError, e: - value = 0 - elif metric.endswith("NumOps"): - op = metric[:-6] - if op not in REPORTED_OPS: - continue - tags = " op=" + op - metric = "numOps" - elif metric.endswith("AvgTime"): - op = metric[:-7] - if op not in REPORTED_OPS: - continue - tags = " op=" + op - metric = "avgTime" - elif metric.endswith("MaxTime"): - op = metric[:-7] - if op not in REPORTED_OPS: - continue - tags = " op=" + op - metric = "maxTime" - - # mbean is of the form "domain:key=value,...,foo=bar" - mbean_domain, mbean_properties = mbean.rstrip().split(":", 1) - if mbean_domain not in ("Hadoop", "hadoop", "java.lang"): - print >>sys.stderr, ("Unexpected mbean domain = %r on line %r" - % (mbean_domain, line)) - continue - mbean_properties = dict(prop.split("=", 1) - for prop in mbean_properties.split(",")) - if mbean_domain in ("Hadoop", "hadoop"): - # Ignore metrics from RegionServerDynamicStatistics (0.94) or - # Regions (0.96 and later), which contain some metrics on - # per-region or per-column-family bases - if (mbean_properties.get("name") == "RegionServerDynamicStatistics" - or mbean_properties.get("sub") == "Regions"): - continue - - # jmx_service is HBase by default, but we can also have - # RegionServer or Replication and such. - jmx_service = mbean_properties.get("service", "HBase") - if jmx_service == "HBase": - jmx_service = "regionserver" - elif mbean_domain == "java.lang": - jmx_service = mbean_properties.pop("type", "jvm") - if mbean_properties: - tags += " " + " ".join(k + "=" + v for k, v in - mbean_properties.iteritems()) - else: - assert 0, "Should never be here" - - # Hack. Right now, the RegionServer is printing stats for its own - # replication queue, but when another RegionServer dies, this one - # may take over the replication queue of the dead one. When this - # happens, we'll get the same metrics multiple times, because - # internally the RegionServer has multiple queues (although only - # only one is actively used, the other ones get flushed and - # discarded). The following `if' statement is simply discarding - # stats for "recovered" replication queues, because we can't keep - # track of them properly in TSDB, because there is no sensible - # tag we can use to differentiate queues. - if jmx_service == "Replication": - attr_name = mbean_properties.get("name", "") - # Normally the attribute will look this: - # ReplicationSource for - # Where is the ID of the destination cluster. - # But when this is the recovered queue of a dead RegionServer: - # ReplicationSource for -%2C%2C - # Where , and relate to the dead RS. - # So we discriminate those entries by looking for a dash. - if "ReplicationSource" in attr_name and "-" in attr_name: - continue - - jmx_service = JMX_SERVICE_RENAMING.get(jmx_service, jmx_service) - jmx_service, repl_count = re.subn("[^a-zA-Z0-9]+", ".", - jmx_service) - if repl_count: - print >>sys.stderr, ("Warning: found malformed" - " jmx_service=%r on line=%r" - % (mbean_properties["service"], line)) - metric = jmx_service.lower() + "." + metric - - sys.stdout.write("hbase.%s %d %s%s\n" - % (metric, timestamp, value, tags)) - sys.stdout.flush() - finally: - kill(jmx) - time.sleep(300) - return 0 # Ask the tcollector to re-spawn us. - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/collectors/0/mysql.py b/collectors/0/mysql.py index b3bda4a0..489e7d8c 100755 --- a/collectors/0/mysql.py +++ b/collectors/0/mysql.py @@ -43,9 +43,6 @@ "/var/lib/mysql", ] -def err(msg): - print >>sys.stderr, msg - class DB(object): """Represents a MySQL server (as we can monitor more than 1 MySQL).""" @@ -142,7 +139,7 @@ def get_dbname(sockfile): return "default" m = re.search("/mysql-(.+)/[^.]+\.sock$", sockfile) if not m: - err("error: couldn't guess the name of the DB for " + sockfile) + utils.err("error: couldn't guess the name of the DB for " + sockfile) return None return m.group(1) @@ -193,7 +190,7 @@ def find_databases(dbs=None): cursor.execute("SELECT VERSION()") except (EnvironmentError, EOFError, RuntimeError, socket.error, MySQLdb.MySQLError), e: - err("Couldn't connect to %s: %s" % (sockfile, e)) + utils.err("Couldn't connect to %s: %s" % (sockfile, e)) continue version = cursor.fetchone()[0] dbs[dbname] = DB(sockfile, dbname, db, cursor, version) @@ -355,7 +352,7 @@ def main(args): if not find_sockfiles(): # Nothing to monitor. return 13 # Ask tcollector to not respawn us. if MySQLdb is None: - err("error: Python module `MySQLdb' is missing") + utils.err("error: Python module `MySQLdb' is missing") return 1 last_db_refresh = now() @@ -376,7 +373,7 @@ def main(args): # Exit on a broken pipe. There's no point in continuing # because no one will read our stdout anyway. return 2 - err("error: failed to collect data from %s: %s" % (db, e)) + utils.err("error: failed to collect data from %s: %s" % (db, e)) errs.append(dbname) for dbname in errs: diff --git a/collectors/0/postgresql.py b/collectors/0/postgresql.py index f64fb4bb..3b533a94 100755 --- a/collectors/0/postgresql.py +++ b/collectors/0/postgresql.py @@ -44,9 +44,6 @@ "/tmp", # custom compilation ]) -def err(msg): - print >>sys.stderr, msg - def find_sockdir(): """Returns a path to PostgreSQL socket file to monitor.""" for dir in SEARCH_DIRS: @@ -67,7 +64,7 @@ def postgres_connect(sockdir): % (sockdir, user, password, CONNECT_TIMEOUT)) except (EnvironmentError, EOFError, RuntimeError, socket.error), e: - err("Couldn't connect to DB :%s" % (e)) + utils.err("Couldn't connect to DB :%s" % (e)) def collect(db): """ @@ -123,18 +120,18 @@ def collect(db): # exit on a broken pipe. There is no point in continuing # because no one will read our stdout anyway. return 2 - err("error: failed to collect data: %s" % e) + utils.err("error: failed to collect data: %s" % e) def main(args): """Collects and dumps stats from a PostgreSQL server.""" if psycopg2 is None: - err("error: Python module 'psycopg2' is missing") + utils.err("error: Python module 'psycopg2' is missing") return 13 # Ask tcollector to not respawn us sockdir = find_sockdir() if not sockdir: # Nothing to monitor - err("error: Can't find postgresql socket file") + utils.err("error: Can't find postgresql socket file") return 13 # Ask tcollector to not respawn us db = postgres_connect(sockdir) diff --git a/collectors/lib/hadoop_http.py b/collectors/lib/hadoop_http.py new file mode 100644 index 00000000..0584c691 --- /dev/null +++ b/collectors/lib/hadoop_http.py @@ -0,0 +1,77 @@ +#!/usr/bin/python +# This file is part of tcollector. +# Copyright (C) 2011-2013 The tcollector Authors. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. This program is distributed in the hope that it +# will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty +# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +# General Public License for more details. You should have received a copy +# of the GNU Lesser General Public License along with this program. If not, +# see . + +import httplib +import json +from collections import OrderedDict +from collectors.lib.utils import is_numeric + +EXCLUDED_KEYS = ( + "Name", + "name" +) + +class HadoopHttp(object): + def __init__(self, service, daemon, host, port, uri="/jmx"): + self.service = service + self.daemon = daemon + self.port = port + self.host = host + self.uri = uri + self.server = httplib.HTTPConnection(self.host, self.port) + self.server.connect() + + def request(self): + self.server.request('GET', self.uri) + resp = self.server.getresponse() + return json.loads(resp.read()) + + def poll(self): + """ + Get metrics from the http server's /jmx page, and transform them into normalized tupes + + @return: array of tuples ([u'Context', u'Array'], u'metricName', value) + """ + json_arr = self.request()['beans'] + kept = [] + for bean in json_arr: + if (not bean['name']) or (not "name=" in bean['name']): + continue + #split the name string + context = bean['name'].split("name=")[1].split(",sub=") + # Create a set that keeps the first occurrence + context = OrderedDict.fromkeys(context).keys() + # lower case and replace spaces. + context = [c.lower().replace(" ", "_") for c in context] + # don't want to include the service or daemon twice + context = [c for c in context if c != self.service and c != self.daemon] + + for key, value in bean.iteritems(): + if key in EXCLUDED_KEYS: + continue + if not is_numeric(value): + continue + kept.append((context, key, value)) + return kept + + def emit_metric(self, context, current_time, metric_name, value, tag_dict=None): + if not tag_dict: + print "%s.%s.%s.%s %d %d" % (self.service, self.daemon, ".".join(context), metric_name, current_time, value) + else: + tag_string = " ".join([k + "=" + v for k, v in tag_dict.iteritems()]) + print "%s.%s.%s.%s %d %d %s" % \ + (self.service, self.daemon, ".".join(context), metric_name, current_time, value, tag_string) + + def emit(self): + pass diff --git a/collectors/lib/jmx-1.0.jar b/collectors/lib/jmx-1.0.jar deleted file mode 100644 index 8f02782a2a4fc1f86bd516d96e6773e39da69303..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10400 zcmbuFV{~TSvhQQtR>$es=96@6+qP}n>Nt5~+h)gB$LZMV80YPK&e{7tdyISb9pl#g zvOZM($EaB~=3HylPeB?S0s{o*uVrKAEd=s6g9U*Akp-v-(@V;UGk#2ffPjN2NJGQ? z@c{cTH--OeFvedV{|J^9mXj0*D625YiiajJ!w)c_gg%OVii$(xTcwJ0mMe$LRo5sj zR6-h0Y)aG>ScJ1WvH74ftVK@7rMf;G$f)z&RP~{X9 zX=dhpyxV*|Ios)ieqbKaX^3&q9AEjGZeZfM)od28LyS|6bM|pXZK;$J*G$Vgw$!02 ztK-QBbvTu7N#tKx_%Yry`1lsb>%WbA9l3??iG6>GR4^hzFOT^w5lkuIMu|iK>Uj}{ zP0@7`!?WXTLnh~vHOxgrNo91Ey*&HpZ;I7#Gi0NSm+~r~T|ptZ$sbu#3j&5Uej7Ty zR0z#Z6072zeN_tfwd^_h5_g28mw%EEy3r4%=VZgxvrFG)l%$bCK<2mf!7Mr{sjK!@w>f=aTi;7mf>0s zgePGa(Dy)0EC;JaN_N%YQaZY?7`#x-drA_DPzZCRRBNOj!h`(KC#68$Bs$Eag%V zc1D;-t1T7R)TK-#aTxJ1B4>h!Vb_BM>h_w0D5hZ&>(S(-a}VXw<5YYe$V1&_K@b2L zDNX0AcSavPHluD;n+LtZn!1)LG?<)m5uJivW*|n!2{nEET^m(0qaov7>;~rzwTuNf zai;{BpSE>oQz4VADeZOn94SgMt}j_;d<>G&U>~L8%U7I{BkOxBfv`0T_%3Ugu-!+d z2oV9FOGHya>knV!@o{ul;NaKRP{Qq_6mwcQExhPBH*g>3`@{~B)lNlJTN^Gz2s5o34->QLLCis0YJRUc233`kl6lQV68$IJy-5HGT zGoFfxy)eghKTZnQwA}G1xiF$WbxxKSarzr#)VPIly0fw&A%pouL7j3gpH^*|CK)A1 zEh&Rxbk3fgVTAJK`7Ks;kqVc$wmP+~G8DG)+sZoW^E!R6-1_Z8ws!B1jdG0wkvlg4 zSKgD>5NigGKiu<_1?!qX-eL;N5}PU3)Xh-+H}aqAyEc~rD*mcI(f?X~>VJP+NU6#G zSN+w_obi`1KM8v;we--{<<3`eB)H`ET17;NS3`Dipb5x%Bhj%-X{OfKznxXN4O^vh zS$Z}ZkRT6qvK4em_4A3;qkk>^EhF07#Clzy_dLXDhVuam^|+ZzuomS|H22;6G1uAe z$k)C1X_xpDq7Ly+U^8}5z-G7>l1S|upE$~~n1Fwl95>lybCjGc|8-{&HLwUw5t5mh z*6`aDEOA^vMg+WufXaRyn05`UBvIlJ>NhD;b>WHdeJ;hg4d(Kl;vi)>Ke}sPVq${C z14YPqNt3HxM@ac^-P`bQ!u`o$@-!Jq(DHO1;$ZxA4q5_viVkZ1+>kRgT(vu%miLg5 zjIhuo3A~d zQWReM<3T*L&UbX_OYRhEt)65c{C8hD#v`>IvQOA3Yjc7fFEpP)w+Y-8cYGFz(D#!h zzp~X(Qq0=@nGoRjeQ;p81bgF3B(QktW89H{7q2B`ZXfp1M956{w5Pxb`cknT{juUW z)dWQ}M3xitO979$Q)cYT=E+COA)cczZe!E7qm9J6pYpJ_NA?%>8>A1BRUW(1;PDhp z7uyXPDE9Op81N5XKk_e-O3`4of7#+{*!oqwl_0Pd^6HHJ#51QwbGQ(K`gW<(72PX! zn;R8gXFl#fh=hcoE1`NL0E9hETEcAH-y|h2dqN>Df5LfmnxiDB5vsS0sZZ4j<6+m- zVt~U}Pd6}aR?$gXDxYB&?n6h^Xu;>QDy@o$k}qaOB=E-`-A)C+*yAy0sok@GN~tSx zh$GhGv!|Kd`&eMIvIK6Z z!I%DM8f9;sY5D5A*_^XAnO~VPY$~Gar>d%xsH>UWNHo`-MdW(U6|O|E+V2v)q8W(J zjCE^dHkC@s`M5^IYGmsOF3BB~yMNQw;BqrnZ%)2RwsF83b_v7AYUzn!e1pdJ<}r>@=rR~%tf*3vBF z(Z{zMrwN>SR3Nc^JS6cvegFI?c{t3O7My;lE#$N)BnrQwqYkU`%d8?Sr@|A*B$s4c zWkUC?EW|+|Ia(Y{$_?M>!iTv7^2U=DcA5hE+cZNhndf&==eDyrI522G?dxC3s z*;JpS>l&#ZHxmEuEiI_M6%pcv*Qar#aWp;7=BZ$wdFqaU2}q3i7wKP$Uey=N+$qhj zlfq4|)54ji{bcv7wjm(YQD{*GK?qev#${CK_|$P`4RzV36~fUP;b?)ck~^Rucck^| z;F=q)H&R_`Exut5(nO7dXC%j{K;HV&fU=BnbXjI!l!B^b)VR`!mDsNpZ{hs%MLwWC ziqBl}QJJ$~v=Z;FO*6(*>}usgh*kw=`xU}8;JG3B$^o}V&tF983)lFMU z6|Z{PW5XQOf})k3aa);xbN*_)a+46~j5R*HcP~MGBhYTIe!7XX{H~Q4 zXInLN)ckQ#NpUgFXXdU6RoSimCRoQqig`LwW4ca`elBlX*GPSoq~^@{)a}Z9dd=R` z-)fOcS8h>Cv@Y5C<+Cx*7cFSs(+8artglwvdB_- z39u_bF3%A#du2Uez+4A3E=r`Y1xuUEQn3N@8W>s6S1@^Iy09}I-Lr~jXk_(gRcjl+ zXcUD&J`iE8KJw#1JbVT?XC`9e5TS8h*Y)*}0OTZ5h$~O=h~e zpG!u!8pd`R$M4w1ZmpSki_M?b6YuyMR0{LPC$5i+Z=63FS-eR*?`kIXkXU4knCbME zQp8*N%xYSXf+lRO&3K*p?=sUVI^GM11L+#dLcu6X>e7f06X zCszCv{BuiFkVRW5K!bpAA^mUfXypI)mj2Hhx>^&~M`dYA@W__s4izOS zArycb4q%2rAP0qwwooNXmSADZW<|%^=$HrCRxVY9E?X($S8EB|!W6b$HLF!?H@8^n zp4wHb(Pv!wUuI{@jDb9_|AM~y>7MR9^CEb(<@t8T>yAEWVq2AMS2zQItbZRG-Q%*L zesBN0x<6uEE2rOCHY&@L9K9xsUH@y3k27>_*Qhj`;g?U2YmTlRo=9G28;HGIrzI1% z)Q^Nrz&x$n4GA`~+YQReZJS3~cF&K&gCa4a!Jj5KFr35#TOvf`CG(sUveIo;3CM?o z2fGW;4t1kNrUN*cAj{@=s0>6@(W5^kJW16ZGN;kuEYZ}loe)=~tP7`;H~BJ4+n&D( zOvp;ysBjWbErg6QGd%p5KNFw(I z9z7Yj%PlMR=ceomytjJ$6SiH4aF#mj;gtUJkO`X@nOYZv2_>3ZkfqrEK*w5a=?cA# zT}`cagTu{91unAqI*=7}3GgMd7*~>kRg@5`jD0G1 zMZw+cN769j)Lq7{ImDO#gRkeRdi62mirBWp#lo@u5lfq>dZxqa8I>Z(j&9_W9G4sq=f2kdcQBjos(->K!)hJt^=LgD1m z4@a4O_R8@uai&V{^b_riq6kKTO(L&K!2HHRXrFoOX;edKOUbzyHU+batet35owaQ= zydqqxXu&~p090SK=1uBXxJYU<4HTxX&8Jw!VooM>{N~sHne`F#VU1m*75qG9wwjg8 zE@;=RiEo9SIbMtzFOx^%)9e;vZ1n|b@jOn2oDv;;wVF3}PA;aql-PtvR$0VEY6LM2 zOJYbnU6<%o)6VUALZ#XQ)#!EZX$6mMLkLI7GQ!{TCa8FS?yFbP^Z&0QMHgvVcP;PT1IT*W%>ri3h zx&y`>LF#j}^#J}z5KaW+;xiNp=>iZmF(;9|oHPld^yUP7&0@Ll$&fat;7j}0uU@9N zY3B6`*odL?W*TYCuRX@BP-95c$F^x@7z`>(dWoW;GE?m@pU#9JeD$Wu{3gv6)#2$! zEJT}c;yM1Y2!l;bqcNWYNGLu`Z9>Ocfo}bpb%8P=D!~}w>b8+?L*UDs`6Qk2XI3Uu z!FEL(u07*B+0R(h+dV2M+gA_IO#SMFw}(&Y@RUAg)CbYpD2s4L192bb17rQRFDVEkB)wcvHzrOusfQa#;O|#Y#UURX*ZByg|+|WJyp2^3-NCvwy;t!cJQ0p{tk=L3mj|7US z;7^lUJ+sF~MX8F#E{iT&u4-_WxQ9S8=IiOf$>eT|ysxCQ)(i&!bvv$q2 z{;aBX)yR|^zH|$hY+KkdPeCw{Yuyu6bf_4KlOnBOWrnM|m2eg4laCWkt(K&Q^?-u= zr#-hzFww>KQG{4(1J!0EZH0v68WIhK6^x$RSp@f~B2IJAf|3|WsC4{kWL0S;7Un@d zrcxLC7Z)k32>6~AM^z}YB|BiGT5D?J22~5V z%AtT3ZFm`mTQ-q+7V@v7TKxuy*(o%n@Ty7j zz=v`ylTh3e%4oTE_V*q#5TsXBZd5Bw<}-jf@|S1@yE||$7Ef3oP6C{CS|XzCGaFD9tMNOp=#s7VG3=Ai+JJa{!$o*&=Y>eer$ga6t11uN9c*a zT6qQyCO!yv5NCZd^c%{@KA{LEJ{)z>V|{Y;8`#;tNlw@YF>Sy(LJIbmZ^ggo^_t%) zeS)>H)!&y68TK`IiTFYmh4>K+^T8%q6jfrL8TODSMV&mdERl2*%avKt+V6ir=(mD?;+qR7|F3@EOWG#;M|R|&h?QQt#}9bF zxpGHFx_m6|_}`jC`ODQ7FEP(kF*Ue8?w?a5&yuau&Bu3naKqYAigy?_Ga2BUU`r*Y@JXH;|yRKFIPQM|~Gx zu`%4P^j=6dV8L%c!AYU)ZWE06c4OiJ#gdxT`JWM&({?O+7eCR3K6#pG8!2zS|v*+h)tD< zK@9T~@oQMv(MT7~S|c)H92#{_PxD(o3AoAZ(9Rv>4lh9h!YMq&xc3a*=Ch;sL^3(Y z5sNjN*iau73lLWF5tK=Y)eg9Q-wn=peCM#CnMCmmJrQiF1(UbPUb`SKoly0#E-9=n z2Aj}C@u!JT*I`6?#qLhmzlqW<5nL16rqrYg(<9oVQlKdLYdMMXs_@FJQnlFV zFv16p$mWV?CJDH`-$`+*Z8b93SCq{qe9Ssz2)h`4E`lvH;iu}~^XIXEZhi}rxQb4s zR?{0q4hv@*h}Xu-)kK-z%{qaB_cY)gw8nU>0hYc&P>}X6-M+g#W*~a~E*=bzlR5(5 z{`!aS4QW%K8|xB!NvE+2eJ&_aUj0 zx4k~_s1pCu@bJCVD2@)dc|)d%WCFD8uo4ZakYN7EjONS=-WerWdY=>+wfOy=tEtVJ z$$(yWGXj7PwfF2S>sd%N5ALS^%jslCn;;=|5^3iEMN#9@p<X7uji1ZKhIO5G@l*6r}-xKcftJGzs5SKuX@mRMN8y%-~yI z^(=e0d;&>xpHPBBVCbQ^mI|*%>Cvp|GT{2dL>@EbBYvE-qmH4Uq|Mi zH6=y;bNWDGJH;lw)|zvl9)_9@t1mB5fuNGExtf(tidP<<_=S82*Mk9nD4(s$V$oCz&@_xZU9e+gUccoUR)D+FaV^% zfrCV!`1~b(KO>m1Q#5gg1cO9MH=kysK7_Ye;@`l$w678X$EI`yV~FL=fgqG65KJ|( z_kvh|^6uM3j*Vl8ECIHrlTU3O`m$7v>=g>yM@zoc>~%Vd)P%84uT$-ZHkrofYl^72 zF%&#PI26oML7WyXtH7+i%NMG!TF8YY;TU;g`T8!r-#5$Hdat=l)~W#`2Vypl5G5}4 zNGvOq?*lsQn$~7lr#i0~=RyK(apqWH=3bZsx$QjAJD#4NrzsD9XO+JCrhj2tODzLm zDn#GA)SyWRm){ZpNb9m0z;fhrlev?rCb-7RL4eUH4ar*PCb@~A@o}j9UuT;Zs@gH}79;SBq%nN}MC$Amsznjg zTH?^`i+B9@4mEGn(5D_q-m+Rq8;C)r)R|a`!6aFyH@>`1Dc*AuzW!y>-=aCC@e3t> zYV?;I!J^$ELk+iqZSGeEW_`Oms8#mCXkNbAlu8+PWMr}8^Y+JTM(e?!l`2xQx@B zrNveL zh@!4b1V0Q*4*!aGmt{w2oMy$D(#)7k`fkRlg0FdCq;(mL3=dz4NL#k}b53r~a>=an9>k`u2r(Ksk z!fXpnoX4ASVN8AfS(mW#6M&h+U;AT1igGp$fNlc7R<#8>K3k4g09?*hl2Q4)DeWt? z%~mdpBUd|TwKLYwwC9z)WpDBIjHzT>9_ep8x&XYEQQ{v)Cjv?x@wdpgSnhKwp9WVu zKb6A|WPhi4NQK#yeCZCB=1YDT?G7fQ=a?l8dp;lN%VAXN+LW?v>;f)6Gql2a5`J&n5i?z= zKfC6Fm?PR-=!`mYM2a^pp`rtls`m|f7I2UA9zawYeL~X_@*yS&Tscwl3T7ycKY@Em zsVVMm%Dl6L$G>N-Da^Z@bHOH-?ma-apkpkw_N4DYUmQO>hWeoSIkqxa`>fUx={`60 zM)q@nc-H<&x+C>*b}CD})l#lQ*Li_(-K0bMPPx^M;9?Q)Sgsm&Tl(0}VxNItsM8Xg z?Zz8VQ_F+k=r;)3cOH%b;<<93pSo(N_Xvbd;Yf3q?#TChaZPwP9eKxe zw$D<`h5bKhNraF$@8x!m7xr*lg= zxtpeQ>6bRIKhA2D zopQZx)2Ri_U)DYcHPz-`2tr0Gk)_{R&-D2uyMw} zJ#W3eT5Or|jA#2ZPTie(D(jLh!pd-j+z%|JVCX#m{32bceDjryYjPn#BJzj0R&8hY zFMB7_C*reWvdIvw+qq``FJ7^G2i(PRN2D+CV;+oTwcTRIsA#|MS*n5R)MB+{5T8K~ z-m>Ke1l%b{k(pP@0(Tj5GspK;a{f&}cj0-&>5h%>kA(CO*+PVhmZ%^7H8nq#;R7luE|Y?)Rergqwo!p= z=4^==WOQ33XG6B;6vm+wR_ivkDLWaXhi*L|ZLcY}3m8b(6 zEl_f=`XoM(FuSD(Pxw9hq5H~^EkS+7sCIqve8HMtFv{+56Mi|rpsF}P`obc-NR`__ z9A1?$^K|b#Wp(k;8+`=%3S1`6M8evm|>t;21-TJr2cfdZ5ff#`+!pknk3edX-l+~$q;4dCg$r8F5Df0Ne^ ztBstc`fF1v{IW2WZk`}xwSMwpg|O)~7uNJA#&O}Y_xLSuS6n#F_StG$ZfA$P9lAq) z`zfjbw#r!+z;v2$@t3<$n5a8i9Oc;Ov`Ul8H)-ME!e(L0uL9fDYG{8 zXocaZtIDBkK9rPbpo@rnu03$-wR$p&tQLIfsFif6MXrQ9`&@G^P9et;;wx7bZr)Mx zQI0ioUOJu|ScJqiM~0rLTiT@HU=2tDq|_4(&Ib$;qdiX~<00LS$ZZdLzCq{w``Fcx zsN_D{yeAhf9~kXJ;RCMZr~}Q^4ogUMyw;O~)cadEpw55cvXe0Ztjs2(e~j(nNscWeSg z)jWFB&K?vijl<5*V0Jq{JW%!f;h&`J4hSu_$7Q>sv!99d#%At}{4%vu&Lk|3_3)4h z#Po2&HdnyEk15wh$UMaq_jjU}n$pU@VUlQurnIMrRpLHgQS?fs&zu)H<>+OHe}9I= zv@oXn4dU8RHhU9M6ChWc4m5g03|Zeko>DPt$6=1^E5?E(p*i3pR@)#S3$$5NNtGj3 zwUn@f+9{ENuN)aR?=jZqQia)Dom;!}U?f0-MnYjM1rvx2yD3*0E}b*dD^F4Q%HnAY z+FNKzL%9u>Y!5DEo01(x)Q>*H@WSBsz6CaP^?36cG@)nFcylQ1bR+5z5xy%EZ(@p` zeY!3DTUV&Qa6-<6iSYgCq7YvFy4z(A=|!0Hy$)6B^INg_gH4SbY;OY(#oMr7-wp*G zxrSd?FvD+Ns&5ebzh-Lhjrx?p1rtL&+pSy|FnNzB7+y}CV7I1}KFGXxEq<+mQH;+! z^LW>+BN16AvKHZD=Scx}u3_xjX_n3SIc% zw~uarNEI!G#3)uQyz=^sAt_QdRw5j4p*t>ANK{BL>2a@>yGSgQmW%09MHHG04Mp=%m)0E0 ze&#c=0ipUYgcb5CnqlH1X*R!gHUKL$;A<5w)H|=@dEI`g#MlfCaNNm26S^MfGo3E;l?$>-Y}moR&1bvUJac zaM#`#2ahLY9|;V*Gsi3+5e!H1mYnO4H-Z5?dGoJt+`dDfO2A6e6(U*%ak^xZt?J3& zkwHQHDpLT(oDbCHlLf|dC7bk?4DnBu>X~D#E{VZhEUV~j#Vpx9shSDK{KHeC{?NM^ zuTSDXbDRPnJ2^L85RgT-KXV*VFbt6YiLd=PoAw7^`=9m?9_!-vm48+Jds+Sr(f(<_5&oU(e~#1?q#>dHL_z(vrvBQo?*EhUe*lbw BTl)Y2 diff --git a/collectors/lib/utils.py b/collectors/lib/utils.py index b2c30de7..daf9862c 100644 --- a/collectors/lib/utils.py +++ b/collectors/lib/utils.py @@ -18,6 +18,7 @@ import stat import pwd import errno +import sys # If we're running as root and this user exists, we'll drop privileges. USER = "nobody" @@ -38,12 +39,20 @@ def drop_privileges(user=USER): def is_sockfile(path): - """Returns whether or not the given path is a socket file.""" - try: - s = os.stat(path) - except OSError, (no, e): - if no == errno.ENOENT: - return False - err("warning: couldn't stat(%r): %s" % (path, e)) - return None - return s.st_mode & stat.S_IFSOCK == stat.S_IFSOCK + """Returns whether or not the given path is a socket file.""" + try: + s = os.stat(path) + except OSError, (no, e): + if no == errno.ENOENT: + return False + err("warning: couldn't stat(%r): %s" % (path, e)) + return None + return s.st_mode & stat.S_IFSOCK == stat.S_IFSOCK + + +def err(msg): + print >> sys.stderr, msg + + +def is_numeric(value): + return isinstance(value, (int, long, float)) \ No newline at end of file diff --git a/stumbleupon/monitoring/.gitignore b/stumbleupon/monitoring/.gitignore deleted file mode 100644 index 378eac25..00000000 --- a/stumbleupon/monitoring/.gitignore +++ /dev/null @@ -1 +0,0 @@ -build diff --git a/stumbleupon/monitoring/Makefile b/stumbleupon/monitoring/Makefile deleted file mode 100644 index 320da191..00000000 --- a/stumbleupon/monitoring/Makefile +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2010 StumbleUpon, Inc. -# -# This library is free software: you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this library. If not, see . - -all: jmx - -top_builddir = build -package = com.stumbleupon.monitoring -spec_title = Monitoring Tools -spec_vendor = StumbleUpon, Inc. -spec_version = 1.0 -jmx_JAVA = \ - jmx.java \ - -jmx_LIBADD = \ - /usr/lib/jvm/java-6-sun/lib/tools.jar \ - -AM_JAVACFLAGS = -Xlint -source 6 -JVM_ARGS = -package_dir = $(subst .,/,$(package)) -jmx_classes=$(jmx_JAVA:%.java=$(top_builddir)/$(package_dir)/%.class) -jmx_jar = $(top_builddir)/jmx-$(spec_version).jar - -jmx: $(jmx_jar) - -jmx_get_dep_classpath = `echo $(jmx_LIBADD) | tr ' ' ':'` -$(top_builddir)/.javac-stamp: $(jmx_JAVA) - @mkdir -p $(top_builddir) - javac $(AM_JAVACFLAGS) -cp $(jmx_get_dep_classpath) \ - -d $(top_builddir) $(jmx_JAVA) - @touch "$@" - -classes_with_nested_classes = $(jmx_classes:$(top_builddir)/%.class=%*.class) - -pkg_version = \ - `git rev-list --pretty=format:%h HEAD --max-count=1 | sed 1d || echo unknown` -$(top_builddir)/manifest: $(top_builddir)/.javac-stamp ../../.git/HEAD - { echo "Specification-Title: $(spec_title)"; \ - echo "Specification-Version: $(spec_version)"; \ - echo "Specification-Vendor: $(spec_vendor)"; \ - echo "Implementation-Title: $(package)"; \ - echo "Implementation-Version: $(pkg_version)"; \ - echo "Implementation-Vendor: $(spec_vendor)"; } >"$@" - -$(jmx_jar): $(top_builddir)/manifest $(top_builddir)/.javac-stamp $(jmx_classes) - cd $(top_builddir) && jar cfm `basename $(jmx_jar)` manifest $(classes_with_nested_classes) \ - || { rv=$$? && rm -f `basename $(jar)` && exit $$rv; } -# ^^^^^^^^^^^^^^^^^^^^^^^ -# I've seen cases where `jar' exits with an error but leaves a partially built .jar file! - -doc: $(top_builddir)/api/index.html - -JDK_JAVADOC=http://download.oracle.com/javase/6/docs/api -$(top_builddir)/api/index.html: $(jmx_JAVA) $(BUILT_SOURCES) - javadoc -d $(top_builddir)/api -classpath $(get_dep_classpath) \ - -link $(JDK_JAVADOC) -link $(jmx_JAVA) $(BUILT_SOURCES) - -clean: - @rm -f $(top_builddir)/.javac-stamp - rm -f $(top_builddir)/manifest $(BUILT_SOURCES) - cd $(top_builddir) || exit 0 && rm -f $(classes_with_nested_classes) - cd $(top_builddir) || exit 0 \ - && test -d $(package_dir) || exit 0 \ - && find $(package_dir) -type d -depth -exec rmdir {} ';' \ - && dir=$(package_dir) && dir=$${dir%/*} \ - && while test x"$$dir" != x"$${dir%/*}"; do \ - rmdir "$$dir" && dir=$${dir%/*} || break; \ - done \ - && rmdir "$$dir" - -distclean: clean - rm -f $(jar) - rm -rf $(top_builddir)/api - test ! -d $(top_builddir) || rmdir $(top_builddir) - -.PHONY: all jmx clean distclean doc check diff --git a/stumbleupon/monitoring/jmx.java b/stumbleupon/monitoring/jmx.java deleted file mode 100644 index d9f751c7..00000000 --- a/stumbleupon/monitoring/jmx.java +++ /dev/null @@ -1,531 +0,0 @@ -// This file is part of OpenTSDB. -// Copyright (C) 2010 The tcollector Authors. -// -// This program is free software: you can redistribute it and/or modify it -// under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or (at your -// option) any later version. This program is distributed in the hope that it -// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty -// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser -// General Public License for more details. You should have received a copy -// of the GNU Lesser General Public License along with this program. If not, -// see . - -/** Quick CLI tool to get JMX MBean attributes. */ -package com.stumbleupon.monitoring; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - -import javax.management.MBeanAttributeInfo; -import javax.management.MBeanInfo; -import javax.management.MBeanServerConnection; -import javax.management.ObjectName; -import javax.management.openmbean.TabularData; -import javax.management.remote.JMXConnector; -import javax.management.remote.JMXConnectorFactory; -import javax.management.remote.JMXServiceURL; - -// Composite Data -import javax.management.openmbean.CompositeDataSupport; -import javax.management.openmbean.CompositeType; -import javax.management.openmbean.OpenType; - -// Sun specific -import com.sun.tools.attach.AgentInitializationException; -import com.sun.tools.attach.AgentLoadException; -import com.sun.tools.attach.AttachNotSupportedException; -import com.sun.tools.attach.VirtualMachine; -import com.sun.tools.attach.VirtualMachineDescriptor; - -// Sun private -import sun.management.ConnectorAddressLink; -import sun.jvmstat.monitor.HostIdentifier; -import sun.jvmstat.monitor.MonitoredHost; -import sun.jvmstat.monitor.MonitoredVm; -import sun.jvmstat.monitor.MonitoredVmUtil; -import sun.jvmstat.monitor.VmIdentifier; - -final class jmx { - - private static final String LOCAL_CONNECTOR_ADDRESS = - "com.sun.management.jmxremote.localConnectorAddress"; - - private static void usage() { - System.out.println("Usage:\n" - + " jmx -l Lists all reachable VMs.\n" - + " jmx Lists all MBeans for this JVM (PID or regexp).\n" - + " jmx Prints all the attributes of this MBean.\n" - + " jmx Prints the matching attributes of this MBean.\n" - + "\n" - + "You can pass multiple pairs to match multiple different\n" - + "attributes for different MBeans. For example:\n" - + " jmx --long JConsole Class Count Thread Total Garbage Collection\n" - + " LoadedClassCount 2808 java.lang:type=ClassLoading\n" - + " UnloadedClassCount 0 java.lang:type=ClassLoading\n" - + " TotalLoadedClassCount 2808 java.lang:type=ClassLoading\n" - + " CollectionCount 0 java.lang:type=GarbageCollector,name=ConcurrentMarkSweep\n" - + " CollectionTime 0 java.lang:type=GarbageCollector,name=ConcurrentMarkSweep\n" - + " CollectionCount 1 java.lang:type=GarbageCollector,name=ParNew\n" - + " CollectionTime 19 java.lang:type=GarbageCollector,name=ParNew\n" - + " TotalStartedThreadCount 43 java.lang:type=Threading\n" - + "The command above searched for a JVM with `JConsole' in its name, and then searched\n" - + "for MBeans with `Class' in the name and `Count' in the attribute (first 3 matches\n" - + "in this output), MBeans with `Thread' in the name and `Total' in the attribute (last\n" - + "line in the output) and MBeans matching `Garbage' with a `Collection' attribute.\n" - + "\n" - + "Other flags you can pass:\n" - + " --long Print a longer but more explicit output for each value.\n" - + " --timestamp Print a timestamp at the beginning of each line.\n" - + " --watch N Reprint the output every N seconds.\n" - + "\n" - + "Return value:\n" - + " 0: Everything OK.\n" - + " 1: Invalid usage or unexpected error.\n" - + " 2: No JVM matched.\n" - + " 3: No MBean matched.\n" - + " 4: No attribute matched for the MBean(s) selected."); - } - - private static void fatal(final int rv, final String errmsg) { - System.err.println(errmsg); - System.exit(rv); - throw new AssertionError("You should never see this, really."); - } - - public static void main(final String[] args) throws Exception { - if (args.length == 0 || "-h".equals(args[0]) || "--help".equals(args[0])) { - usage(); - System.exit(args.length == 0 ? 1 : 0); - return; - } - - int current_arg = 0; - int watch = 0; - boolean long_output = false; - boolean print_timestamps = false; - while (current_arg < args.length) { - if ("--watch".equals(args[current_arg])) { - current_arg++; - try { - watch = Integer.parseInt(args[current_arg]); - } catch (NumberFormatException e) { - fatal(1, "Invalid value for --watch: " + e.getMessage()); - return; - } - if (watch < 1) { - fatal(1, "Invalid value for --watch: " + watch); - } - current_arg++; - } else if ("--long".equals(args[current_arg])) { - long_output = true; - current_arg++; - } else if ("--timestamp".equals(args[current_arg])) { - print_timestamps = true; - current_arg++; - } else { - break; - } - } - - if (current_arg == args.length) { - usage(); - fatal(1, "error: Missing argument (-l or JVM specification)."); - return; - } - - HashMap vms = getJVMs(); - if ("-l".equals(args[current_arg])) { - printVmList(vms.values()); - return; - } - - final JVM jvm = selectJVM(args[current_arg++], vms); - vms = null; - final JMXConnector connection = JMXConnectorFactory.connect(jvm.jmxUrl()); - try { - final MBeanServerConnection mbsc = connection.getMBeanServerConnection(); - if (args.length == current_arg) { - for (final ObjectName mbean : listMBeans(mbsc)) { - System.out.println(mbean); - } - return; - } - - final TreeMap objects = selectMBeans(args, current_arg, mbsc); - if (objects.isEmpty()) { - fatal(3, "No MBean matched your query in " + jvm.name()); - return; - } - do { - boolean found = false; - for (final Map.Entry entry : objects.entrySet()) { - final ObjectName object = entry.getKey(); - final MBeanInfo mbean = mbsc.getMBeanInfo(object); - final Pattern wanted = entry.getValue(); - for (final MBeanAttributeInfo attr : mbean.getAttributes()) { - if (wanted == null || wanted.matcher(attr.getName()).find()) { - dumpMBean(long_output, print_timestamps, mbsc, object, attr); - found = true; - } - } - } - if (!found) { - fatal(4, "No attribute of " + objects.keySet() - + " matched your query in " + jvm.name()); - return; - } - System.out.flush(); - Thread.sleep(watch * 1000); - } while (watch > 0); - } finally { - connection.close(); - } - } - - private static TreeMap selectMBeans(final String[] args, - final int current_arg, - final MBeanServerConnection mbsc) throws IOException { - final TreeMap mbeans = new TreeMap(); - for (int i = current_arg; i < args.length; i += 2) { - final Pattern object_re = compile_re(args[i]); - final Pattern attr_re = i + 1 < args.length ? compile_re(args[i + 1]) : null; - for (final ObjectName o : listMBeans(mbsc)) { - if (object_re.matcher(o.toString()).find()) { - mbeans.put(o, attr_re); - } - } - } - return mbeans; - } - - private static void dumpMBean(final boolean long_output, - final boolean print_timestamps, - final MBeanServerConnection mbsc, - final ObjectName object, - final MBeanAttributeInfo attr) throws Exception { - final String name = attr.getName(); - Object value = null; - try { - value = mbsc.getAttribute(object, name); - } catch (Exception e) { - // Above may raise errors for some attributes like - // CollectionUsage - return; - } - if (value instanceof TabularData) { - final TabularData tab = (TabularData) value; - int i = 0; - for (final Object o : tab.keySet()) { - dumpMBeanValue(long_output, print_timestamps, object, name + "." + i, o); - i++; - } - } else if (value instanceof CompositeDataSupport){ - CompositeDataSupport cds = (CompositeDataSupport) value; - CompositeType ct = cds.getCompositeType(); - for (final String item: ct.keySet()){ - dumpMBeanValue(long_output, print_timestamps, object, name + "." + item, cds.get(item)); - } - } else { - dumpMBeanValue(long_output, print_timestamps, object, name, value); - } - } - - private static void dumpMBeanValue(final boolean long_output, - final boolean print_timestamps, - final ObjectName object, - final String name, - final Object value) { - // Ignore non numeric values - if ((value instanceof String)|| - (value instanceof String[])|| - (value instanceof Boolean)) { - return; - } - final StringBuilder buf = new StringBuilder(); - final long timestamp = System.currentTimeMillis() / 1000; - if (print_timestamps) { - buf.append(timestamp).append('\t'); - } - if (value instanceof Object[]) { - for (final Object o : (Object[]) value) { - buf.append(o).append('\t'); - } - if (buf.length() > 0) { - buf.setLength(buf.length() - 1); - } - } else { - buf.append(name).append('\t').append(value); - } - if (long_output) { - buf.append('\t').append(object); - } - buf.append('\n'); - System.out.print(buf); - } - - private static ArrayList listMBeans(final MBeanServerConnection mbsc) throws IOException { - ArrayList mbeans = new ArrayList(mbsc.queryNames(null, null)); - Collections.sort(mbeans, new Comparator() { - public int compare(final ObjectName a, final ObjectName b) { - return a.toString().compareTo(b.toString()); - } - }); - return mbeans; - } - - private static Pattern compile_re(final String re) { - try { - return Pattern.compile(re); - } catch (PatternSyntaxException e) { - fatal(1, "Invalid regexp: " + re + ", " + e.getMessage()); - throw new AssertionError("Should never be here"); - } - } - - private static final String MAGIC_STRING = "this.is.jmx.magic"; - - private static JVM selectJVM(final String selector, - final HashMap vms) { - String error = null; - try { - final int pid = Integer.parseInt(selector); - if (pid < 2) { - throw new IllegalArgumentException("Invalid PID: " + pid); - } - final JVM jvm = vms.get(pid); - if (jvm != null) { - return jvm; - } - error = "Couldn't find a JVM with PID " + pid; - } catch (NumberFormatException e) { - /* Ignore. */ - } - if (error == null) { - try { - final Pattern p = compile_re(selector); - final ArrayList matches = new ArrayList(2); - for (final JVM jvm : vms.values()) { - if (p.matcher(jvm.name()).find()) { - matches.add(jvm); - } - } - // Exclude ourselves from the matches. - System.setProperty(MAGIC_STRING, - "LOL Java processes can't get their own PID"); - final String me = jmx.class.getName(); - final Iterator it = matches.iterator(); - while (it.hasNext()) { - final JVM jvm = it.next(); - final String name = jvm.name(); - // Ignore other long running jmx clients too. - if (name.contains("--watch") && name.contains(me)) { - it.remove(); - continue; - } - final VirtualMachine vm = VirtualMachine.attach(String.valueOf(jvm.pid())); - try { - if (vm.getSystemProperties().containsKey(MAGIC_STRING)) { - it.remove(); - continue; - } - } finally { - vm.detach(); - } - } - System.clearProperty(MAGIC_STRING); - if (matches.size() == 0) { - error = "No JVM matched your regexp " + selector; - } else if (matches.size() > 1) { - printVmList(matches); - error = matches.size() + " JVMs matched your regexp " + selector - + ", it's too ambiguous, please refine it."; - } else { - return matches.get(0); - } - } catch (PatternSyntaxException e) { - error = "Invalid pattern: " + selector + ", " + e.getMessage(); - } catch (Exception e) { - e.printStackTrace(); - error = "Unexpected Exception: " + e.getMessage(); - } - } - fatal(2, error); - return null; - } - - private static void printVmList(final Collection vms) { - final ArrayList sorted_vms = new ArrayList(vms); - Collections.sort(sorted_vms, new Comparator() { - public int compare(final JVM a, final JVM b) { - return a.pid() - b.pid(); - } - }); - for (final JVM jvm : sorted_vms) { - System.out.println(jvm.pid() + "\t" + jvm.name()); - } - } - - private static final class JVM { - final int pid; - final String name; - String address; - - public JVM(final int pid, final String name, final String address) { - if (name.isEmpty()) { - throw new IllegalArgumentException("empty name"); - } - this.pid = pid; - this.name = name; - this.address = address; - } - - public int pid() { - return pid; - } - - public String name() { - return name; - } - - public JMXServiceURL jmxUrl() { - if (address == null) { - ensureManagementAgentStarted(); - } - try { - return new JMXServiceURL(address); - } catch (Exception e) { - throw new RuntimeException("Error", e); - } - } - - public void ensureManagementAgentStarted() { - if (address != null) { // already started - return; - } - VirtualMachine vm; - try { - vm = VirtualMachine.attach(String.valueOf(pid)); - } catch (AttachNotSupportedException e) { - throw new RuntimeException("Failed to attach to " + this, e); - } catch (IOException e) { - throw new RuntimeException("Failed to attach to " + this, e); - } - try { - // java.sun.com/javase/6/docs/technotes/guides/management/agent.html#gdhkz - // + code mostly stolen from JConsole's code. - final String home = vm.getSystemProperties().getProperty("java.home"); - - // Normally in ${java.home}/jre/lib/management-agent.jar but might - // be in ${java.home}/lib in build environments. - - String agent = home + File.separator + "jre" + File.separator - + "lib" + File.separator + "management-agent.jar"; - File f = new File(agent); - if (!f.exists()) { - agent = home + File.separator + "lib" + File.separator - + "management-agent.jar"; - f = new File(agent); - if (!f.exists()) { - throw new RuntimeException("Management agent not found"); - } - } - - agent = f.getCanonicalPath(); - try { - vm.loadAgent(agent, "com.sun.management.jmxremote"); - } catch (AgentLoadException e) { - throw new RuntimeException("Failed to load the agent into " + this, e); - } catch (AgentInitializationException e) { - throw new RuntimeException("Failed to initialize the agent into " + this, e); - } - address = (String) vm.getAgentProperties().get(LOCAL_CONNECTOR_ADDRESS); - } catch (IOException e) { - throw new RuntimeException("Error while loading agent into " + this, e); - } finally { - try { - vm.detach(); - } catch (IOException e) { - throw new RuntimeException("Failed to detach from " + vm + " = " + this, e); - } - } - if (address == null) { - throw new RuntimeException("Couldn't start the management agent."); - } - } - - public String toString() { - return "JVM(" + pid + ", \"" + name + "\", " - + (address == null ? null : '"' + address + '"') + ')'; - } - } - - /** - * Returns a map from PID to JVM. - */ - private static HashMap getJVMs() throws Exception { - final HashMap vms = new HashMap(); - getMonitoredVMs(vms); - getAttachableVMs(vms); - return vms; - } - - private static void getMonitoredVMs(final HashMap out) throws Exception { - final MonitoredHost host = - MonitoredHost.getMonitoredHost(new HostIdentifier((String) null)); - @SuppressWarnings("unchecked") - final Set vms = host.activeVms(); - for (final Integer pid : vms) { - try { - final VmIdentifier vmid = new VmIdentifier(pid.toString()); - final MonitoredVm vm = host.getMonitoredVm(vmid); - out.put(pid, new JVM(pid, MonitoredVmUtil.commandLine(vm), - ConnectorAddressLink.importFrom(pid))); - vm.detach(); - } catch (Exception x) { - System.err.println("Ignoring exception:"); - x.printStackTrace(); - } - } - } - - private static void getAttachableVMs(final HashMap out) { - for (final VirtualMachineDescriptor vmd : VirtualMachine.list()) { - int pid; - try { - pid = Integer.parseInt(vmd.id()); - } catch (NumberFormatException e) { - System.err.println("Ignoring invalid vmd.id(): " + vmd.id() - + ' ' + e.getMessage()); - continue; - } - if (out.containsKey(pid)) { - continue; - } - try { - final VirtualMachine vm = VirtualMachine.attach(vmd); - out.put(pid, new JVM(pid, String.valueOf(pid), - (String) vm.getAgentProperties().get(LOCAL_CONNECTOR_ADDRESS))); - vm.detach(); - } catch (AttachNotSupportedException e) { - System.err.println("VM not attachable: " + vmd.id() - + ' ' + e.getMessage()); - } catch (IOException e) { - System.err.println("Could not attach: " + vmd.id() - + ' ' + e.getMessage()); - } - } - } - -} diff --git a/tcollector.py b/tcollector.py index 146d4dc4..2fd47faa 100755 --- a/tcollector.py +++ b/tcollector.py @@ -686,7 +686,7 @@ def send_data(self): def setup_logging(logfile=DEFAULT_LOG, max_bytes=None, backup_count=None): """Sets up logging and associated handlers.""" - LOG.setLevel(logging.INFO) + LOG.setLevel(logging.DEBUG) if backup_count is not None and max_bytes is not None: assert backup_count > 0 assert max_bytes > 0