Skip to content

Commit

Permalink
additional threshold for some normal records
Browse files Browse the repository at this point in the history
  • Loading branch information
Pranab Ghosh committed Sep 23, 2018
1 parent 8e8449a commit c396d6c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
1 change: 1 addition & 0 deletions resource/and.conf
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ statsBasedOutlierPredictor {
id.fieldOrdinals = [0]
attr.ordinals = [3]
score.threshold = 0.94
score.thresholdNorm = 0.92
zscore {
attr.weights = [1.0]
stats.file.path = "/Users/pranab/Projects/bin/beymani/other/olp/stats.txt"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ object StatsBasedOutlierPredictor extends JobConfiguration with SeasonalUtility
None
}


val thresholdNorm = this.getOptionalDoubleParam(appConfig, "score.thresholdNorm")
val debugOn = appConfig.getBoolean("debug.on")
val saveOutput = appConfig.getBoolean("save.output")

Expand Down Expand Up @@ -171,6 +171,7 @@ object StatsBasedOutlierPredictor extends JobConfiguration with SeasonalUtility
marker.equals("O")
})
if (remOutliers) {
//additional output for input with outliers subtracted
taggedData = taggedData.map(line => {
val items = line.split(fieldDelimIn, -1)
val ar = items.slice(0, items.length - 2)
Expand All @@ -181,7 +182,19 @@ object StatsBasedOutlierPredictor extends JobConfiguration with SeasonalUtility
val cleanData = data.subtract(taggedData)
cleanData.saveAsTextFile(cleanDataDirPath)
}
}
} else {
//all or only records above a threshold
taggedData = thresholdNorm match {
case Some(threshold:Double) => {
taggedData.filter(line => {
val items = line.split(fieldDelimIn, -1)
val score = items(items.length - 2).toDouble
score > threshold
})
}
case None => taggedData
}
}

if (debugOn) {
val records = taggedData.collect
Expand Down Expand Up @@ -222,15 +235,16 @@ object StatsBasedOutlierPredictor extends JobConfiguration with SeasonalUtility
val expConst :java.lang.Double = getDoubleParamOrElse(appConfig, "exp.const", 1.0)
configParams.put("exp.const", expConst);

val isHdfsFile = getBooleanParamOrElse(appConfig, "hdfs.file", false)
configParams.put("hdfs.file", new java.lang.Boolean(isHdfsFile))

predictorStrategy match {
case `predStrategyZscore` => {
val attWeightList = getMandatoryDoubleListParam(appAlgoConfig, "attr.weights", "missing attribute weights")
val attrWeights = BasicUtils.fromListToDoubleArray(attWeightList)
configParams.put("attr.weights", attrWeights)
val statsFilePath = getMandatoryStringParam(appAlgoConfig, "stats.file.path", "missing stat file path")
configParams.put("stats.filePath", statsFilePath)
val isHdfsFile = getBooleanParamOrElse(appAlgoConfig, "hdfs.file", false)
configParams.put("hdfs.file", new java.lang.Boolean(isHdfsFile))
}
case `predStrategyExtremeValueProb` => {
val attWeightList = getMandatoryDoubleListParam(appAlgoConfig, "attr.weights", "missing attribute weights")
Expand All @@ -240,25 +254,18 @@ object StatsBasedOutlierPredictor extends JobConfiguration with SeasonalUtility
configParams.put("stats.medFilePath", medStatsFilePath)
val madStatsFilePath = getMandatoryStringParam(appAlgoConfig, "stats.madFilePath", "missing mad stat file path")
configParams.put("stats.madFilePath", madStatsFilePath)
val isHdfsFile = getBooleanParamOrElse(appAlgoConfig, "hdfs.file", false)
configParams.put("hdfs.file", new java.lang.Boolean(isHdfsFile))
}
case `predStrategyEstProb` => {
val distrFilePath = getMandatoryStringParam(appAlgoConfig, "distr.file.path", "missing distr file path")
configParams.put("distr.filePath", distrFilePath)
val isHdfsFile = getBooleanParamOrElse(appAlgoConfig, "hdfs.file", false)
configParams.put("hdfs.file", new java.lang.Boolean(isHdfsFile))
val schemaFilePath = getMandatoryStringParam(appAlgoConfig, "schema.file.path", "missing schema file path")
configParams.put("schema.filePath", schemaFilePath)
}
case `predStrategyEstAttrProb` => {
val distrFilePath = getMandatoryStringParam(appAlgoConfig, "distr.file.path", "missing distr file path")
configParams.put("distr.filePath", distrFilePath)
val isHdfsFile = getBooleanParamOrElse(appAlgoConfig, "hdfs.file", false)
configParams.put("hdfs.file", new java.lang.Boolean(isHdfsFile))
val schemaFilePath = getMandatoryStringParam(appAlgoConfig, "schema.file.path", "missing schema file path")
configParams.put("schema.filePath", schemaFilePath)

}
}

Expand Down

0 comments on commit c396d6c

Please sign in to comment.