Skip to content

Commit

Permalink
Adding more docs and some code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
pwendell committed Sep 8, 2013
1 parent 8de8ee5 commit c190b48
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 20 deletions.
36 changes: 35 additions & 1 deletion conf/metrics.properties.template
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# 1. To add a new sink, set the "class" option to a fully qualified class
# name (see examples below).
# 2. Some sinks involve a polling period. The minimum allowed polling period
# is 1 second.
# is 1 second.
# 3. Wild card properties can be overridden by more specific properties.
# For example, master.sink.console.period takes precedence over
# *.sink.console.period.
Expand All @@ -47,6 +47,40 @@
# instance master and applications. MetricsServlet may not be configured by self.
#

## List of available sinks and their properties.

# org.apache.spark.metrics.sink.ConsoleSink
# Name: Default: Description:
# period 10 Poll period
# unit seconds Units of poll period

# org.apache.spark.metrics.sink.CSVSink
# Name: Default: Description:
# period 10 Poll period
# unit seconds Units of poll period
# directory /tmp Where to store CSV files

# org.apache.spark.metrics.sink.GangliaSink
# Name: Default: Description:
# host NONE Hostname or multicast group of Ganglia server
# port NONE Port of Ganglia server(s)
# period 10 Poll period
# unit seconds Units of poll period
# ttl 1 TTL of messages sent by Ganglia
# mode multicast Ganglia network mode ('unicast' or 'mulitcast')

# org.apache.spark.metrics.sink.JmxSink

# org.apache.spark.metrics.sink.MetricsServlet
# Name: Default: Description:
# path VARIES* Path prefix from the web server root
# sample false Whether to show entire set of samples for histograms ('false' or 'true')
#
# * Default path is /metrics/json for all instances except the master. The master has two paths:
# /metrics/aplications/json # App information
# /metrics/master/json # Master information

## Examples
# Enable JmxSink for all instances by class name
#*.sink.jmx.class=spark.metrics.sink.JmxSink

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ private[spark] class MetricsConfig(val configFile: Option[String]) extends Loggi

private def setDefaultProperties(prop: Properties) {
prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
prop.setProperty("*.sink.servlet.uri", "/metrics/json")
prop.setProperty("*.sink.servlet.sample", "false")
prop.setProperty("master.sink.servlet.uri", "/metrics/master/json")
prop.setProperty("applications.sink.servlet.uri", "/metrics/applications/json")
prop.setProperty("*.sink.servlet.path", "/metrics/json")
prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
}

def initialize() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,21 @@ import org.eclipse.jetty.server.Handler
import org.apache.spark.ui.JettyUtils

class MetricsServlet(val property: Properties, val registry: MetricRegistry) extends Sink {
val SERVLET_KEY_URI = "uri"
val SERVLET_KEY_PATH = "path"
val SERVLET_KEY_SAMPLE = "sample"

val servletURI = property.getProperty(SERVLET_KEY_URI)
val SERVLET_DEFAULT_SAMPLE = false

val servletShowSample = property.getProperty(SERVLET_KEY_SAMPLE).toBoolean
val servletPath = property.getProperty(SERVLET_KEY_PATH)

val servletShowSample = Option(property.getProperty(SERVLET_KEY_SAMPLE)).map(_.toBoolean)
.getOrElse(SERVLET_DEFAULT_SAMPLE)

val mapper = new ObjectMapper().registerModule(
new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))

def getHandlers = Array[(String, Handler)](
(servletURI, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json"))
(servletPath, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json"))
)

def getMetricsSnapshot(request: HttpServletRequest): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,34 @@ class MetricsConfigSuite extends FunSuite with BeforeAndAfter {
val conf = new MetricsConfig(Option("dummy-file"))
conf.initialize()

assert(conf.properties.size() === 5)
assert(conf.properties.size() === 4)
assert(conf.properties.getProperty("test-for-dummy") === null)

val property = conf.getInstance("random")
assert(property.size() === 3)
assert(property.size() === 2)
assert(property.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
assert(property.getProperty("sink.servlet.uri") === "/metrics/json")
assert(property.getProperty("sink.servlet.sample") === "false")
assert(property.getProperty("sink.servlet.path") === "/metrics/json")
}

test("MetricsConfig with properties set") {
val conf = new MetricsConfig(Option(filePath))
conf.initialize()

val masterProp = conf.getInstance("master")
assert(masterProp.size() === 6)
assert(masterProp.size() === 5)
assert(masterProp.getProperty("sink.console.period") === "20")
assert(masterProp.getProperty("sink.console.unit") === "minutes")
assert(masterProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource")
assert(masterProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
assert(masterProp.getProperty("sink.servlet.uri") === "/metrics/master/json")
assert(masterProp.getProperty("sink.servlet.sample") === "false")
assert(masterProp.getProperty("sink.servlet.path") === "/metrics/master/json")

val workerProp = conf.getInstance("worker")
assert(workerProp.size() === 6)
assert(workerProp.size() === 5)
assert(workerProp.getProperty("sink.console.period") === "10")
assert(workerProp.getProperty("sink.console.unit") === "seconds")
assert(workerProp.getProperty("source.jvm.class") === "org.apache.spark.metrics.source.JvmSource")
assert(workerProp.getProperty("sink.servlet.class") === "org.apache.spark.metrics.sink.MetricsServlet")
assert(workerProp.getProperty("sink.servlet.uri") === "/metrics/json")
assert(workerProp.getProperty("sink.servlet.sample") === "false")
assert(workerProp.getProperty("sink.servlet.path") === "/metrics/json")
}

test("MetricsConfig with subProperties") {
Expand All @@ -84,6 +81,6 @@ class MetricsConfigSuite extends FunSuite with BeforeAndAfter {
assert(consoleProps.size() === 2)

val servletProps = sinkProps("servlet")
assert(servletProps.size() === 3)
assert(servletProps.size() === 2)
}
}
9 changes: 9 additions & 0 deletions docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ set of sinks to which metrics are reported. The following instances are currentl
* `executor`: A Spark executor.
* `driver`: The Spark driver process (the process in which your SparkContext is created).

Each instance can report to zero or more _sinks_. Sinks are contained in the
`org.apache.spark.metrics.sink` package:

* `ConsoleSink`: Logs metrics information to the console.
* `CSVSink`: Exports metrics data to CSV files at regular intervals.
* `GangliaSink`: Sends metrics to a Ganglia node or multicast group.
* `JmxSink`: Registers metrics for viewing in a JXM console.
* `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data.

The syntax of the metrics configuration file is defined in an example configuration file,
`$SPARK_HOME/conf/metrics.conf.template`.

Expand Down

0 comments on commit c190b48

Please sign in to comment.