Skip to content

Commit

Permalink
Add deseasonality in SrCnnEntireAnomalyDetect (dotnet#5202)
Browse files Browse the repository at this point in the history
* add seasonality detect and stl decompose in srcnn

* optimizations in performance

* Add parameter period in SrCnn interface, remove inner period detect logic

* add periodical data

* add test

* Remove unused files

* further remove unused codes

* remove unused functions

* update

* optimize WeightedRegression; clean code; add null checks

* recover

* reduce file numbers

* restore

* move stl related codes to a subfolder

* fix sln file

* update code style

* fix members initialization outside the constructor

* remove unused using

* refactor InnerStl

* use contract exception

* remove unused class

* update stl

* remove unused usings

* add readonly

* fix bug

* add deseasonality

* update deseasonality

* update

* add options

* refine code style

* refine code

* update

* updates

* remove max neighbor number constraint

* remove the max neightbor count constraint

* update SrCnnEntireDetectOptions, move input/output column name  out; fix unit tests

* refactor the constructor of Loess

* remove unused imports

* refactor and optimization

* optimize

* unfold pow(x, 2) to x * x for performance optimization

* refactor polynomial model class and deseasonality functions, refine comments

* refine

* update comment

* updates

* update some wordings

* update comments

* update some comments

* wording

Co-authored-by: [email protected] <[email protected]>
  • Loading branch information
guinao and [email protected] authored Jun 29, 2020
1 parent 45a16dc commit 33f5f32
Show file tree
Hide file tree
Showing 15 changed files with 9,758 additions and 72 deletions.
132 changes: 132 additions & 0 deletions src/Microsoft.ML.TimeSeries/Deseasonality.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;

namespace Microsoft.ML.TimeSeries
{
internal interface IDeseasonality
{
/// <summary>
/// Remove the seasonality component from the given time-series.
/// </summary>
/// <param name="values">An array representing the input time-series.</param>
/// <param name="period">The period value of the time-series.</param>
/// <param name="results">The de-seasonalized time-series.</param>
public abstract void Deseasonality(ref double[] values, int period, ref double[] results);
}

internal sealed class MeanDeseasonality : IDeseasonality
{
private double[] _circularComponent;

public void Deseasonality(ref double[] values, int period, ref double[] results)
{
Array.Resize(ref _circularComponent, period);

var length = values.Length;

// Initialize the circular component to 0.
for (int i = 0; i < period; ++i)
{
_circularComponent[i] = 0;
}

// Sum up values that locate at the same position in one period.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
_circularComponent[indexInPeriod] += values[i];
}

// Calculate the mean value as circular component.
var cnt = (length - 1) / period;
var rest = (length - 1) % period;
for (int i = 0; i < period; ++i)
{
var lastCircle = i <= rest ? 1 : 0;
_circularComponent[i] = _circularComponent[i] / (cnt + lastCircle);
}

// Substract the circular component from the original series.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
results[i] -= _circularComponent[indexInPeriod];
}
}
}

internal sealed class MedianDeseasonality : IDeseasonality
{
private List<double>[] _subSeries;
private double[] _circularComponent;

public void Deseasonality(ref double[] values, int period, ref double[] results)
{
Array.Resize(ref _circularComponent, period);
Array.Resize(ref _subSeries, period);

var length = values.Length;

for (int i = 0; i < period; ++i)
{
_subSeries[i] = new List<double>();
}

// Split the original series into #period subseries.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
_subSeries[indexInPeriod].Add(values[i]);
}

// Calculate the median value as circular component.
for (int i = 0; i < period; ++i)
{
_circularComponent[i] = MathUtility.QuickMedian(_subSeries[i]);
}

// Substract the circular component from the original series.
for (int i = 0; i < length; ++i)
{
var indexInPeriod = i % period;
results[i] -= _circularComponent[indexInPeriod];
}
}
}

/// <summary>
/// This class takes the residual component of stl decompose as the deseasonality result.
/// </summary>
internal sealed class StlDeseasonality : IDeseasonality
{
private readonly InnerStl _stl;

public StlDeseasonality()
{
_stl = new InnerStl(true);
}

public void Deseasonality(ref double[] values, int period, ref double[] results)
{
bool success = _stl.Decomposition(values, period);
if (success)
{
for (int i = 0; i < _stl.Residual.Count; ++i)
{
results[i] = _stl.Residual[i];
}
}
else
{
for (int i = 0; i < values.Length; ++i)
{
results[i] = values[i];
}
}
}
}
}
31 changes: 30 additions & 1 deletion src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,36 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
/// </example>
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly)
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode);
{
var options = new SrCnnEntireAnomalyDetectorOptions()
{
Threshold = threshold,
BatchSize = batchSize,
Sensitivity = sensitivity,
DetectMode = detectMode,
};

return DetectEntireAnomalyBySrCnn(catalog, input, outputColumnName, inputColumnName, options);
}

/// <summary>
/// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm.
/// </summary>
/// <param name="catalog">The AnomalyDetectionCatalog.</param>
/// <param name="input">Input DataView.</param>
/// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>.
/// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="options.DetectMode"/>.</param>
/// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
/// <param name="options">Defines the settings of the load operation.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[DetectEntireAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, SrCnnEntireAnomalyDetectorOptions options)
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, outputColumnName, inputColumnName, options);

/// <summary>
/// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
Expand Down
104 changes: 104 additions & 0 deletions src/Microsoft.ML.TimeSeries/STL/FastLoess.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.TimeSeries
{
/// <summary>
/// This is the fast version of Loess. There are several alternatives to improve the performance. This one is an approximation approach.
/// The smoothing is conducted on a sample set, and then the values on the left points are assigned directly.
/// </summary>
internal class FastLoess
{
/// <summary>
/// This class is a sampling based method, so here specifies the sample size.
/// </summary>
private const int _sampleSize = 100;

/// <summary>
/// The minimum length of a valid time series. A time series with length equals 2 is so trivial and meaningless less than 2.
/// </summary>
public const int MinTimeSeriesLength = 3;

private readonly IReadOnlyList<double> _x;
private readonly IReadOnlyList<double> _y;
private readonly int _length;

private readonly Loess _smoother;

/// <summary>
/// Initializes a new instance of the <see cref="FastLoess"/> class.
/// The fast version of the Loess method. when the time series is too long, the sampling will be conducted first to improve the performance.
/// </summary>
/// <param name="xValues">The input x-axis values</param>
/// <param name="yValues">The input y-axis values</param>
/// <param name="isTemporal">If the regression is considered to take temporal information into account. In general, this is true if we are regressing a time series, and false if we are regressing scatter plot data</param>
/// <param name="r">This method will provide default smoothing ratio if user did not specify</param>
public FastLoess(IReadOnlyList<double> xValues, IReadOnlyList<double> yValues, bool isTemporal = true, int r = -1)
{
Contracts.CheckValue(xValues, nameof(xValues));
Contracts.CheckValue(yValues, nameof(yValues));
Y = new List<double>();

if (yValues.Count < MinTimeSeriesLength)
throw Contracts.Except("input data structure cannot be 0-length: lowess");

_x = xValues;
_y = yValues;
_length = _y.Count;

if (_length <= FastLoess._sampleSize)
{
if (r == -1)
_smoother = new Loess(_x, _y, isTemporal);
else
_smoother = new Loess(_x, _y, isTemporal, r);
}
else
{
// Conduct sampling based strategy, to boost the performance.
double step = _length * 1.0 / FastLoess._sampleSize;
var sampleX = new double[FastLoess._sampleSize];
var sampleY = new double[FastLoess._sampleSize];
for (int i = 0; i < FastLoess._sampleSize; i++)
{
int index = (int)(i * step);
sampleX[i] = _x[index];
sampleY[i] = _y[index];
}
if (r == -1)
_smoother = new Loess(sampleX, sampleY, isTemporal);
else
_smoother = new Loess(sampleX, sampleY, isTemporal, r);
}
}

/// <summary>
/// The estimated y values.
/// </summary>
public List<double> Y { get; }

/// <summary>
/// Assign the smoothing values to all the data points, not only on the sample size.
/// </summary>
public void Estimate()
{
for (int i = 0; i < _length; i++)
{
double yValue = _smoother.EstimateY(_x[i]);
Y.Add(yValue);
}
}

/// <summary>
/// Estimate a y value by giving an x value, even if the x value is not one of the input points.
/// </summary>
public double EstimateY(double xValue)
{
return _smoother.EstimateY(xValue);
}
}
}
Loading

0 comments on commit 33f5f32

Please sign in to comment.