Skip to content

Commit

Permalink
PDIO-232 Add the rest of ItemRec and ItemSim algo code
Browse files Browse the repository at this point in the history
  • Loading branch information
Donald Szeto committed Oct 10, 2013
1 parent d3dce91 commit aa73447
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 149 deletions.
34 changes: 7 additions & 27 deletions bin/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,35 +30,16 @@ fi
echo "Going to build PredictionIO..."
BASE_TARGETS="update publish"

# Build commons
#echo "Going to build PredictionIO Commons..."
#cd $BASE/commons
#$SBT $CLEAN update +publish

# Build output
#echo "Going to build PredictionIO Output..."
#cd $BASE/output
#$SBT $CLEAN update +publish

# Build process commons
#echo "Going to build PredictionIO Process Commons..."
#cd $BASE/process/commons/hadoop/scalding
#$SBT $CLEAN update +publish

if test "$SKIP_PROCESS" = "1" ; then
echo "Skip building process assemblies."
else
# Build process itemrec algo assembly
echo "+ Assemble Process ItemRec Hadoop Scalding Algorithms"
BASE_TARGETS="$BASE_TARGETS processItemRecAlgoHadoopScalding/assembly"
# $SBT $CLEAN processItemRecAlgoHadoopScalding/assembly
# cd $BASE/process/engines/itemrec/algorithms/hadoop/scalding
# $SBT $CLEAN update assembly
#
# echo "Going to build PredictionIO Process ItemRec Scala Mahout Algorithms Assembly..."
# cd $BASE/process/engines/itemrec/algorithms/scala/mahout
# $SBT $CLEAN update assembly
#

echo "+ Assemble Process ItemRec Scala Mahout Algorithms"
BASE_TARGETS="$BASE_TARGETS processItemRecAlgoScalaMahout/assembly"

# # Build process itemrec eval assembly
# echo "Going to build PredictionIO Process ItemRec Evaluations Assembly..."
# cd $BASE/process/engines/itemrec/evaluations/hadoop/scalding
Expand All @@ -79,10 +60,9 @@ else
# cd $BASE/process/engines/itemrec/evaluations/scala/topkitems
# $SBT $CLEAN update assembly
#
# # Build process itemsim algo assembly
# echo "Going to build PredictionIO Process ItemSim Algorithms Assembly..."
# cd $BASE/process/engines/itemsim/algorithms/hadoop/scalding
# $SBT $CLEAN update assembly
# Build process itemsim algo assembly
echo "+ Assemble Process ItemSim Hadoop Scalding Algorithms"
BASE_TARGETS="$BASE_TARGETS processItemSimAlgoHadoopScalding/assembly"
#
# # Build process itemsim eval assembly
# echo "Going to build PredictionIO Process ItemSim Evaluations Assembly..."
Expand Down
4 changes: 2 additions & 2 deletions bin/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ cp $BASE/bin/quiet.sh $PACKAGE_DIR/bin
cp -R $DIST_DIR/conf $PACKAGE_DIR

cp "$BASE/process/engines/itemrec/algorithms/hadoop/scalding/target/scala-2.10/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemrec/algorithms/scala/mahout/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
cp "$BASE/process/engines/itemrec/algorithms/scala/mahout/target/scala-2.10/predictionio-process-itemrec-algorithms-scala-mahout-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemrec/evaluations/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemrec/evaluations/scala/topkitems/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-TopKItems-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemrec/evaluations/scala/trainingtestsplit/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-Scala-TrainingTestSplitTime-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemrec/evaluations/scala/paramgen/target/scala-2.10/PredictionIO-Process-ItemRec-Evaluations-ParamGen-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemsim/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
cp "$BASE/process/engines/itemsim/algorithms/hadoop/scalding/target/scala-2.10/predictionio-process-itemsim-algorithms-hadoop-scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemsim/evaluations/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Evaluations-Hadoop-Scalding-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
#cp "$BASE/process/engines/itemsim/evaluations/scala/topkitems/target/scala-2.10/PredictionIO-Process-ItemSim-Evaluations-TopKItems-assembly-$VERSION.jar" "$PACKAGE_DIR/lib"
cp -n $BASE/tools/conncheck/target/pack/lib/* $PACKAGE_DIR/lib
Expand Down
90 changes: 88 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,33 @@ lazy val root = project.in(file(".")).aggregate(
output,
processCommonsHadoopScalding,
processItemRecAlgoHadoopScalding,
processItemRecAlgoScalaMahout,
processItemSimAlgoHadoopScalding,
toolsConncheck,
toolsSettingsInit,
toolsSoftwareManager,
toolsUsers)

// Commons and Output

lazy val commons = project in file("commons")

lazy val output = project.in(file("output")).dependsOn(commons)

// Process Assemblies

lazy val processCommonsHadoopScalding = project
.in(file("process/commons/hadoop/scalding")).dependsOn(commons)

lazy val processItemRecAlgoHadoopScalding = project
.in(file("process/engines/itemrec/algorithms/hadoop/scalding")).aggregate(
.in(file("process/engines/itemrec/algorithms/hadoop/scalding"))
.aggregate(
processItemRecAlgoHadoopScaldingGeneric,
processItemRecAlgoHadoopScaldingKnnitembased,
processItemRecAlgoHadoopScaldingRandomrank,
processItemRecAlgoHadoopScaldingLatestrank,
processItemRecAlgoHadoopScaldingMahout).dependsOn(
processItemRecAlgoHadoopScaldingMahout)
.dependsOn(
processItemRecAlgoHadoopScaldingGeneric,
processItemRecAlgoHadoopScaldingKnnitembased,
processItemRecAlgoHadoopScaldingRandomrank,
Expand All @@ -66,6 +74,84 @@ lazy val processItemRecAlgoHadoopScaldingMahout = project
.in(file("process/engines/itemrec/algorithms/hadoop/scalding/mahout"))
.dependsOn(processCommonsHadoopScalding)

lazy val processItemRecAlgoScalaMahout = project
.in(file("process/engines/itemrec/algorithms/scala/mahout"))
.aggregate(
processItemRecAlgoScalaMahoutCommons,
processItemRecAlgoScalaMahoutALSWR,
processItemRecAlgoScalaMahoutKNNUserBased,
processItemRecAlgoScalaMahoutSlopeOne,
processItemRecAlgoScalaMahoutSVDPlusPlus,
processItemRecAlgoScalaMahoutSVDSGD,
processItemRecAlgoScalaMahoutThresholdUserBased)
.dependsOn(
processItemRecAlgoScalaMahoutCommons,
processItemRecAlgoScalaMahoutALSWR,
processItemRecAlgoScalaMahoutKNNUserBased,
processItemRecAlgoScalaMahoutSlopeOne,
processItemRecAlgoScalaMahoutSVDPlusPlus,
processItemRecAlgoScalaMahoutSVDSGD,
processItemRecAlgoScalaMahoutThresholdUserBased)

lazy val processItemRecAlgoScalaMahoutCommons = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/commons"))
.dependsOn(commons)

lazy val processItemRecAlgoScalaMahoutALSWR = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/alswr"))
.dependsOn(processItemRecAlgoScalaMahoutCommons)

lazy val processItemRecAlgoScalaMahoutKNNUserBased = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/knnuserbased"))
.dependsOn(processItemRecAlgoScalaMahoutCommons)

lazy val processItemRecAlgoScalaMahoutSlopeOne = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/slopeone"))
.dependsOn(processItemRecAlgoScalaMahoutCommons)

lazy val processItemRecAlgoScalaMahoutSVDPlusPlus = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/svdplusplus"))
.dependsOn(processItemRecAlgoScalaMahoutCommons)

lazy val processItemRecAlgoScalaMahoutSVDSGD = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/svdsgd"))
.dependsOn(processItemRecAlgoScalaMahoutCommons)

lazy val processItemRecAlgoScalaMahoutThresholdUserBased = project
.in(file("process/engines/itemrec/algorithms/scala/mahout/thresholduserbased"))
.dependsOn(processItemRecAlgoScalaMahoutCommons)

lazy val processItemSimAlgoHadoopScalding = project
.in(file("process/engines/itemsim/algorithms/hadoop/scalding"))
.aggregate(
processItemSimAlgoHadoopScaldingItemSimCF,
processItemSimAlgoHadoopScaldingLatestRank,
processItemSimAlgoHadoopScaldingMahout,
processItemSimAlgoHadoopScaldingRandomRank)
.dependsOn(
processItemSimAlgoHadoopScaldingItemSimCF,
processItemSimAlgoHadoopScaldingLatestRank,
processItemSimAlgoHadoopScaldingMahout,
processItemSimAlgoHadoopScaldingRandomRank)

lazy val processItemSimAlgoHadoopScaldingItemSimCF = project
.in(file("process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf"))
.dependsOn(processCommonsHadoopScalding)

lazy val processItemSimAlgoHadoopScaldingLatestRank = project
.in(file("process/engines/itemsim/algorithms/hadoop/scalding/latestrank"))
.dependsOn(processCommonsHadoopScalding)

lazy val processItemSimAlgoHadoopScaldingMahout = project
.in(file("process/engines/itemsim/algorithms/hadoop/scalding/mahout"))
.dependsOn(processCommonsHadoopScalding)

lazy val processItemSimAlgoHadoopScaldingRandomRank = project
.in(file("process/engines/itemsim/algorithms/hadoop/scalding/randomrank"))
.dependsOn(processCommonsHadoopScalding)

// Tools Section

lazy val toolsConncheck = project.in(file("tools/conncheck"))
.dependsOn(commons)

Expand Down
30 changes: 15 additions & 15 deletions dist/conf/predictionio.conf
Original file line number Diff line number Diff line change
Expand Up @@ -75,28 +75,28 @@ io.prediction.commons.modeldata.training.db.host=localhost
io.prediction.commons.modeldata.training.db.port=27017

# PredictionIO Algorithms
pdio-knnitembased.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-knnitembased.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-latestrank.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-randomrank.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-itembased.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-parallelals.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-knnuserbased.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-thresholduserbased.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-slopeone.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-alswr.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-svdsgd.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-svdplusplus.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar

pdio-itemsimcf.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-itemsimlatestrank.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-itemsimrandomrank.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-itemsimcf.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-itembased.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-parallelals.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-knnuserbased.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-thresholduserbased.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-slopeone.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-alswr.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-svdsgd.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-svdplusplus.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar

pdio-itemsimcf.jar=${io.prediction.base}/lib/predictionio-process-itemsim-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-itemsimlatestrank.jar=${io.prediction.base}/lib/predictionio-process-itemsim-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
pdio-itemsimrandomrank.jar=${io.prediction.base}/lib/predictionio-process-itemsim-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar
mahout-itemsimcf.jar=${io.prediction.base}/lib/predictionio-process-itemsim-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar

# PredictionIO generic scalding job
io.prediction.algorithms.scalding.itemrec.generic.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-hadoop-scalding-assembly-0.7.0-SNAPSHOT.jar

# Itemrec Scala Mahout Algorithms
io.prediction.algorithms.mahout.itemrec.jar=${io.prediction.base}/lib/PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout-assembly-0.7.0-SNAPSHOT.jar
io.prediction.algorithms.mahout.itemrec.jar=${io.prediction.base}/lib/predictionio-process-itemrec-algorithms-scala-mahout-assembly-0.7.0-SNAPSHOT.jar

# Mahout core job
io.prediction.algorithms.mahout-core-job.jar=${io.prediction.base}/vendors/mahout-distribution-0.8/mahout-core-0.8-job.jar
Expand Down
11 changes: 2 additions & 9 deletions process/engines/itemrec/algorithms/scala/mahout/build.sbt
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
import AssemblyKeys._

name := "PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout"
name := "predictionio-process-itemrec-algorithms-scala-mahout"

packageOptions += Package.ManifestAttributes(java.util.jar.Attributes.Name.MAIN_CLASS -> "io.prediction.commons.mahout.itemrec.MahoutJob")

version in ThisBuild:= "0.7.0-SNAPSHOT"

scalaVersion in ThisBuild:= "2.10.2"

scalacOptions in ThisBuild ++= Seq("-deprecation")

parallelExecution in Test := false

resolvers in ThisBuild ++= Seq(
"Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/.m2/repository",
resolvers ++= Seq(
"Concurrent Maven Repo" at "http://conjars.org/repo",
"Clojars Repository" at "http://clojars.org/repo")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
name := "PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout-Commons"

libraryDependencies ++= Seq(
"io.prediction" %% "predictionio-commons" % "0.7.0-SNAPSHOT",
"org.apache.mahout" % "mahout-core" % "0.8"
)
libraryDependencies ++= Seq("org.apache.mahout" % "mahout-core" % "0.8")

This file was deleted.

This file was deleted.

4 changes: 2 additions & 2 deletions process/engines/itemsim/algorithms/hadoop/scalding/build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import AssemblyKeys._ // put this at the top of the file
import AssemblyKeys._

name := "PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding"
name := "predictionio-process-itemsim-algorithms-hadoop-scalding"

packageOptions += Package.ManifestAttributes(java.util.jar.Attributes.Name.MAIN_CLASS -> "com.twitter.scalding.Tool")

Expand Down

This file was deleted.

This file was deleted.

0 comments on commit aa73447

Please sign in to comment.