Skip to content

Commit

Permalink
Add Metrics to Specialization codepath (Azure#4973)
Browse files Browse the repository at this point in the history
Added Metrics to specialization and i/o codepaths.
  • Loading branch information
yojagad authored Oct 15, 2019
1 parent ad82e4a commit 3543a35
Show file tree
Hide file tree
Showing 36 changed files with 743 additions and 376 deletions.
16 changes: 14 additions & 2 deletions src/WebJobs.Script.WebHost/Diagnostics/MetricsEventManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -62,6 +63,7 @@ public object BeginEvent(string eventName, string functionName = null, string da
FunctionName = functionName,
EventName = eventName.ToLowerInvariant(),
Timestamp = DateTime.UtcNow,
StopWatch = Stopwatch.StartNew(),
Data = data
};
}
Expand All @@ -76,8 +78,18 @@ public void EndEvent(object eventHandle)
SystemMetricEvent evt = eventHandle as SystemMetricEvent;
if (evt != null)
{
evt.Duration = DateTime.UtcNow - evt.Timestamp;
long latencyMS = (long)evt.Duration.TotalMilliseconds;
long latencyMS = 0;
evt.StopWatch.Stop();
if (evt.StopWatch != null)
{
evt.Duration = evt.StopWatch.Elapsed;
latencyMS = evt.StopWatch.ElapsedMilliseconds;
}
else
{
evt.Duration = DateTime.UtcNow - evt.Timestamp;
latencyMS = (long)evt.Duration.TotalMilliseconds;
}

// event aggregation is based on this key
// for each unique key, there will be only 1
Expand Down
56 changes: 37 additions & 19 deletions src/WebJobs.Script.WebHost/Standby/StandbyManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Threading.Tasks;
using Microsoft.AspNetCore.Hosting;
using Microsoft.Azure.WebJobs.Script.Description;
using Microsoft.Azure.WebJobs.Script.Diagnostics;
using Microsoft.Azure.WebJobs.Script.Rpc;
using Microsoft.Azure.WebJobs.Script.WebHost.Properties;
using Microsoft.Extensions.Configuration;
Expand All @@ -31,6 +32,7 @@ public class StandbyManager : IStandbyManager, IDisposable
private readonly IWebHostLanguageWorkerChannelManager _languageWorkerChannelManager;
private readonly IConfigurationRoot _configuration;
private readonly ILogger _logger;
private readonly IMetricsLogger _metricsLogger;
private readonly HostNameProvider _hostNameProvider;
private readonly IDisposable _changeTokenCallbackSubscription;
private readonly TimeSpan _specializationTimerInterval;
Expand All @@ -42,17 +44,18 @@ public class StandbyManager : IStandbyManager, IDisposable
private static SemaphoreSlim _semaphore = new SemaphoreSlim(1, 1);

public StandbyManager(IScriptHostManager scriptHostManager, IWebHostLanguageWorkerChannelManager languageWorkerChannelManager, IConfiguration configuration, IScriptWebHostEnvironment webHostEnvironment,
IEnvironment environment, IOptionsMonitor<ScriptApplicationHostOptions> options, ILogger<StandbyManager> logger, HostNameProvider hostNameProvider, IApplicationLifetime applicationLifetime)
: this(scriptHostManager, languageWorkerChannelManager, configuration, webHostEnvironment, environment, options, logger, hostNameProvider, applicationLifetime, TimeSpan.FromMilliseconds(500))
IEnvironment environment, IOptionsMonitor<ScriptApplicationHostOptions> options, ILogger<StandbyManager> logger, HostNameProvider hostNameProvider, IApplicationLifetime applicationLifetime, IMetricsLogger metricsLogger)
: this(scriptHostManager, languageWorkerChannelManager, configuration, webHostEnvironment, environment, options, logger, hostNameProvider, applicationLifetime, TimeSpan.FromMilliseconds(500), metricsLogger)
{
}

public StandbyManager(IScriptHostManager scriptHostManager, IWebHostLanguageWorkerChannelManager languageWorkerChannelManager, IConfiguration configuration, IScriptWebHostEnvironment webHostEnvironment,
IEnvironment environment, IOptionsMonitor<ScriptApplicationHostOptions> options, ILogger<StandbyManager> logger, HostNameProvider hostNameProvider, IApplicationLifetime applicationLifetime, TimeSpan specializationTimerInterval)
IEnvironment environment, IOptionsMonitor<ScriptApplicationHostOptions> options, ILogger<StandbyManager> logger, HostNameProvider hostNameProvider, IApplicationLifetime applicationLifetime, TimeSpan specializationTimerInterval, IMetricsLogger metricsLogger)
{
_scriptHostManager = scriptHostManager ?? throw new ArgumentNullException(nameof(scriptHostManager));
_options = options ?? throw new ArgumentNullException(nameof(options));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_metricsLogger = metricsLogger ?? throw new ArgumentNullException(nameof(metricsLogger));
_specializationTask = new Lazy<Task>(SpecializeHostCoreAsync, LazyThreadSafetyMode.ExecutionAndPublication);
_webHostEnvironment = webHostEnvironment ?? throw new ArgumentNullException(nameof(webHostEnvironment));
_environment = environment ?? throw new ArgumentNullException(nameof(environment));
Expand All @@ -68,6 +71,7 @@ public StandbyManager(IScriptHostManager scriptHostManager, IWebHostLanguageWork

public Task SpecializeHostAsync()
{
IDisposable latencyEvent = _metricsLogger.LatencyEvent(MetricEventNames.SpecializationSpecializeHost);
return _specializationTask.Value.ContinueWith(t =>
{
if (t.IsFaulted)
Expand All @@ -77,6 +81,7 @@ public Task SpecializeHostAsync()
_logger.LogError(t.Exception, $"Specialization failed. Shutting down.");
_applicationLifetime.StopApplication();
}
latencyEvent.Dispose();
});
}

Expand All @@ -102,12 +107,22 @@ public async Task SpecializeHostCoreAsync()
// user dependencies
FunctionAssemblyLoadContext.ResetSharedContext();

await _languageWorkerChannelManager.SpecializeAsync();
using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationLanguageWorkerChannelManagerSpecialize))
{
await _languageWorkerChannelManager.SpecializeAsync();
}

NotifyChange();

await _scriptHostManager.RestartHostAsync();
await _scriptHostManager.DelayUntilHostReady();
using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationRestartHost))
{
await _scriptHostManager.RestartHostAsync();
}

using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationDelayUntilHostReady))
{
await _scriptHostManager.DelayUntilHostReady();
}
}

public void NotifyChange()
Expand Down Expand Up @@ -141,21 +156,24 @@ internal static void ResetChangeToken()

public async Task InitializeAsync()
{
if (await _semaphore.WaitAsync(timeout: TimeSpan.FromSeconds(30)))
using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationStandbyManagerInitialize))
{
try
{
await CreateStandbyWarmupFunctions();

// start a background timer to identify when specialization happens
// specialization usually happens via an http request (e.g. scale controller
// ping) but this timer is started as well to handle cases where we
// might not receive a request
_specializationTimer = new Timer(OnSpecializationTimerTick, null, _specializationTimerInterval, _specializationTimerInterval);
}
finally
if (await _semaphore.WaitAsync(timeout: TimeSpan.FromSeconds(30)))
{
_semaphore.Release();
try
{
await CreateStandbyWarmupFunctions();

// start a background timer to identify when specialization happens
// specialization usually happens via an http request (e.g. scale controller
// ping) but this timer is started as well to handle cases where we
// might not receive a request
_specializationTimer = new Timer(OnSpecializationTimerTick, null, _specializationTimerInterval, _specializationTimerInterval);
}
finally
{
_semaphore.Release();
}
}
}
}
Expand Down
102 changes: 59 additions & 43 deletions src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using Microsoft.ApplicationInsights.AspNetCore.Extensions;
using Microsoft.ApplicationInsights.Extensibility.Implementation.ApplicationId;
using Microsoft.Azure.WebJobs.Logging;
using Microsoft.Azure.WebJobs.Script.Diagnostics;
using Microsoft.Azure.WebJobs.Script.Scale;
using Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.Extensions;
using Microsoft.Extensions.DependencyInjection;
Expand All @@ -27,6 +28,7 @@ public class WebJobsScriptHostService : IHostedService, IScriptHostManager, IDis
private readonly IServiceProvider _rootServiceProvider;
private readonly ILogger _logger;
private readonly IEnvironment _environment;
private readonly IMetricsLogger _metricsLogger;
private readonly HostPerformanceManager _performanceManager;
private readonly IOptions<HostHealthMonitorOptions> _healthMonitorOptions;
private readonly SlidingWindow<bool> _healthCheckWindow;
Expand All @@ -44,7 +46,7 @@ public class WebJobsScriptHostService : IHostedService, IScriptHostManager, IDis

public WebJobsScriptHostService(IOptionsMonitor<ScriptApplicationHostOptions> applicationHostOptions, IScriptHostBuilder scriptHostBuilder, ILoggerFactory loggerFactory, IServiceProvider rootServiceProvider,
IServiceScopeFactory rootScopeFactory, IScriptWebHostEnvironment scriptWebHostEnvironment, IEnvironment environment,
HostPerformanceManager hostPerformanceManager, IOptions<HostHealthMonitorOptions> healthMonitorOptions)
HostPerformanceManager hostPerformanceManager, IOptions<HostHealthMonitorOptions> healthMonitorOptions, IMetricsLogger metricsLogger)
{
if (loggerFactory == null)
{
Expand All @@ -54,6 +56,7 @@ public WebJobsScriptHostService(IOptionsMonitor<ScriptApplicationHostOptions> ap
// This will no-op if already initialized.
InitializeApplicationInsightsRequestTracking();

_metricsLogger = metricsLogger;
_applicationHostOptions = applicationHostOptions ?? throw new ArgumentNullException(nameof(applicationHostOptions));
_scriptWebHostEnvironment = scriptWebHostEnvironment ?? throw new ArgumentNullException(nameof(scriptWebHostEnvironment));
_rootServiceProvider = rootServiceProvider;
Expand Down Expand Up @@ -141,20 +144,23 @@ public async Task StartAsync(CancellationToken cancellationToken)

private async Task StartHostAsync(CancellationToken cancellationToken, int attemptCount = 0, JobHostStartupMode startupMode = JobHostStartupMode.Normal)
{
try
using (_metricsLogger.LatencyEvent(MetricEventNames.ScriptHostManagerStartService))
{
await _hostStartSemaphore.WaitAsync();
try
{
await _hostStartSemaphore.WaitAsync();

// Now that we're inside the semaphore, set this task as completed. This prevents
// restarts from being invoked (via the PlaceholderSpecializationMiddleware) before
// the IHostedService has ever started.
_hostStartedSource.TrySetResult(true);
// Now that we're inside the semaphore, set this task as completed. This prevents
// restarts from being invoked (via the PlaceholderSpecializationMiddleware) before
// the IHostedService has ever started.
_hostStartedSource.TrySetResult(true);

await UnsynchronizedStartHostAsync(cancellationToken, attemptCount, startupMode);
}
finally
{
_hostStartSemaphore.Release();
await UnsynchronizedStartHostAsync(cancellationToken, attemptCount, startupMode);
}
finally
{
_hostStartSemaphore.Release();
}
}
}

Expand Down Expand Up @@ -185,7 +191,11 @@ private async Task UnsynchronizedStartHostAsync(CancellationToken cancellationTo
// If we're in a non-transient error state or offline, skip host initialization
bool skipJobHostStartup = isOffline || hasNonTransientErrors;

localHost = BuildHost(skipJobHostStartup, skipHostJsonConfiguration: startupMode == JobHostStartupMode.HandlingConfigurationParsingError);
using (_metricsLogger.LatencyEvent(MetricEventNames.ScriptHostManagerBuildScriptHost))
{
localHost = BuildHost(skipJobHostStartup, skipHostJsonConfiguration: startupMode == JobHostStartupMode.HandlingConfigurationParsingError);
}

ActiveHost = localHost;

var scriptHost = (ScriptHost)ActiveHost.Services.GetService<ScriptHost>();
Expand All @@ -206,7 +216,10 @@ private async Task UnsynchronizedStartHostAsync(CancellationToken cancellationTo

cancellationToken.ThrowIfCancellationRequested();

await ActiveHost.StartAsync(cancellationToken);
using (_metricsLogger.LatencyEvent(MetricEventNames.ScriptHostManagerStartScriptHost))
{
await ActiveHost.StartAsync(cancellationToken);
}

if (!startupMode.HasFlag(JobHostStartupMode.HandlingError))
{
Expand Down Expand Up @@ -344,44 +357,47 @@ public async Task StopAsync(CancellationToken cancellationToken)

public async Task RestartHostAsync(CancellationToken cancellationToken)
{
// Do not invoke a restart if the host has not yet been started. This can lead
// to invalid state.
if (!_hostStarted.IsCompleted)
using (_metricsLogger.LatencyEvent(MetricEventNames.ScriptHostManagerRestartService))
{
_logger.RestartBeforeStart();
await _hostStarted;
}
// Do not invoke a restart if the host has not yet been started. This can lead
// to invalid state.
if (!_hostStarted.IsCompleted)
{
_logger.RestartBeforeStart();
await _hostStarted;
}

_logger.EnteringRestart();
_logger.EnteringRestart();

// If anything is mid-startup, cancel it.
_startupLoopTokenSource?.Cancel();
// If anything is mid-startup, cancel it.
_startupLoopTokenSource?.Cancel();

try
{
await _hostStartSemaphore.WaitAsync();

if (State == ScriptHostState.Stopping || State == ScriptHostState.Stopped)
try
{
_logger.SkipRestart(State.ToString());
return;
}
await _hostStartSemaphore.WaitAsync();

if (State == ScriptHostState.Stopping || State == ScriptHostState.Stopped)
{
_logger.SkipRestart(State.ToString());
return;
}

State = ScriptHostState.Default;
_logger.Restarting();
State = ScriptHostState.Default;
_logger.Restarting();

var previousHost = ActiveHost;
ActiveHost = null;
Task startTask = UnsynchronizedStartHostAsync(cancellationToken);
Task stopTask = Orphan(previousHost, cancellationToken);
var previousHost = ActiveHost;
ActiveHost = null;
Task startTask = UnsynchronizedStartHostAsync(cancellationToken);
Task stopTask = Orphan(previousHost, cancellationToken);

await startTask;
await startTask;

_logger.Restarted();
}
finally
{
_hostStartSemaphore.Release();
_logger.Restarted();
}
finally
{
_hostStartSemaphore.Release();
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/WebJobs.Script.WebHost/WebScriptHostBuilderExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public static IHostBuilder AddWebScriptHost(this IHostBuilder builder, IServiceP
{
ILoggerFactory configLoggerFactory = rootServiceProvider.GetService<ILoggerFactory>();
IDependencyValidator validator = rootServiceProvider.GetService<IDependencyValidator>();
IMetricsLogger metricsLogger = rootServiceProvider.GetService<IMetricsLogger>();

builder.UseServiceProviderFactory(new JobHostScopedServiceProviderFactory(rootServiceProvider, rootScopeFactory, validator))
.ConfigureServices(services =>
Expand All @@ -38,7 +39,7 @@ public static IHostBuilder AddWebScriptHost(this IHostBuilder builder, IServiceP
services.ConfigureOptions<CustomHttpHeadersOptionsSetup>();
services.ConfigureOptions<HostHstsOptionsSetup>();
})
.AddScriptHost(webHostOptions, configLoggerFactory, webJobsBuilder =>
.AddScriptHost(webHostOptions, configLoggerFactory, metricsLogger, webJobsBuilder =>
{
webJobsBuilder
.AddAzureStorageCoreServices();
Expand Down
Loading

0 comments on commit 3543a35

Please sign in to comment.