Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
dearirenelang committed Feb 20, 2014
2 parents 32361b4 + e9f46e6 commit 75ed805
Show file tree
Hide file tree
Showing 27 changed files with 214 additions and 189 deletions.
3 changes: 2 additions & 1 deletion R/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ endif
# Build binary for each OS
rm -rf $(TMP_BUILD_DIR)
mkdir -p $(TMP_BUILD_DIR)
R CMD INSTALL -l $(TMP_BUILD_DIR) --build h2oRClient-package
[ -x "`which gnutar 2>/dev/null`" ] || echo 'Note: gnutar not found; package install in R may fail in the next step'
R CMD INSTALL -l $(TMP_BUILD_DIR) --build h2oRClient-package || echo 'If you got an error like "Dependency foo is not available for package h2oRClient" you need to install the required R package by running R and executing the R command: install.packages("foo")'

ifneq ($(OS),Windows_NT)
# rm -rf h2oRClient
Expand Down
29 changes: 21 additions & 8 deletions h2o-docs/source/developuser/quickstart_mac.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Downloading and Building H\ :sub:`2`\ O

$ git clone https://github.com/0xdata/h2o.git

9. Build H\ :sub:`2`\ O from source. After the build finishes, some JUnit tests will run automatically.
9. Build H\ :sub:`2`\ O from source. After the build finishes, some JUnit tests will run automatically. Note that if you normally compile a different way, e.g. with an IDE, you may not have built the Hadoop driver jars that you create when building with make:

::

Expand All @@ -86,7 +86,11 @@ Installing Hadoop on a Mac

12. Configure Hadoop (modify the file paths or version number if applicable):

Modify ``/usr/local/Cellar/hadoop/1.2.1/libexec/conf/core-site.xml`` to contain the following:
Note:
In Hadoop 1.x these files are found in, e.g., ``/usr/local/Cellar/hadoop/1.2.1/libexec/conf/``.
In Hadoop 2.x these files are found in, e.g., ``/usr/local/Cellar/hadoop/2.2.0/libexec/etc/hadoop/``.

Modify ``core-site.xml`` to contain the following:

::

Expand All @@ -96,8 +100,8 @@ Installing Hadoop on a Mac
<value>hdfs://localhost:8020</value>
</property>
</configuration>
Modify ``/usr/local/Cellar/hadoop/1.2.1/libexec/conf/mapred-site.xml`` to contain the following:

Modify ``mapred-site.xml`` to contain the following (NOTE: you may need to create the file from mapred-site.xml.template):

::

Expand All @@ -112,7 +116,7 @@ Installing Hadoop on a Mac
</property>
</configuration>

Modify ``/usr/local/Cellar/hadoop/1.2.1/libexec/conf/hdfs-site.xml`` to contain the following:
Modify ``hdfs-site.xml`` to contain the following:

::

Expand All @@ -123,18 +127,27 @@ Installing Hadoop on a Mac
</property>
</configuration>

13. Optional: Enable password-less SSH from localhost to localhost for convenience
13. Optional: Enable password-less SSH from localhost to localhost for convenience.

First enable remote login in the system sharing control panel, and then:

::

$ brew install ssh-copy-id
$ ssh-keygen
$ ssh-copy-id -i ~/.ssh/id_rsa.pub localhost

14. Start Hadoop MapReduce services
14. Start Hadoop MapReduce services, e.g.:

::

$ /usr/local/Cellar/hadoop/1.2.1/bin/start-all.sh
or

::

$ /usr/local/Cellar/hadoop/2.2.0/sbin/start-dfs.sh
$ /usr/local/Cellar/hadoop/2.2.0/sbin/start-yarn.sh

15. Verify that Hadoop is up and running by checking the output of ``jps`` (look for NameNode, DataNode, JobTracker, TaskTracker)

Expand All @@ -148,7 +161,7 @@ Installing Hadoop on a Mac
81655 DataNode
81928 TaskTracker

16. Format HDFS and leave the safe mode
16. Format HDFS and leave the safe mode.

::
Expand Down
4 changes: 2 additions & 2 deletions h2o-samples/src/main/java/samples/Frames.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static void main(String[] args) throws Exception {
}

@Override
protected Status exec() {
protected JobState exec() {
// From file
parse(new File(VM.h2oFolder(), "smalldata/iris/iris.csv"));

Expand All @@ -35,7 +35,7 @@ protected Status exec() {
Key key = Key.make("MyFrame");
UKV.put(key, frame);

return Status.Done;
return JobState.DONE;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions h2o-samples/src/main/java/samples/LoadDatasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ void load() {
TestUtil.parseFromH2OFolder("smalldata/./zipcodes");
}

@Override protected Status exec() {
@Override protected JobState exec() {
load();
return Status.Running;
return JobState.RUNNING;
}
}
4 changes: 2 additions & 2 deletions h2o-samples/src/main/java/samples/MapReduce.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public static void main(String[] args) throws Exception {
}

@Override
protected Status exec() {
protected JobState exec() {
// Parse a dataset into a Frame, H2O's distributed table-like data structure
File file = new File(VM.h2oFolder(), "smalldata/iris/iris.csv");
Frame frame = Frames.parse(file);
Expand All @@ -44,7 +44,7 @@ protected Status exec() {
// At this point, all task instances have been merged by their 'reduce' method. We
// are back to a state where only one instance exist, and it contains the overall sum.
System.out.println("Sum is " + sum.value);
return Status.Done;
return JobState.DONE;
}

static class Sum extends MRTask2<Sum> {
Expand Down
4 changes: 2 additions & 2 deletions h2o-samples/src/main/java/samples/MapReduceKMeans.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static void main(String[] args) throws Exception {
samples.launchers.CloudProcess.launch(MapReduceKMeans.class, 2);
}

@Override protected Status exec() {
@Override protected JobState exec() {
// Load and parse a file. Data is distributed to other nodes in a round-robin way
Key file = NFSFileVec.make(new File("../lib/resources/datasets/gaussian.csv"));
Frame frame = ParseDataset2.parse(Key.make("test"), new Key[] { file });
Expand Down Expand Up @@ -62,7 +62,7 @@ public static void main(String[] args) throws Exception {
System.out.print(df.format(clusters[c][v]) + ", ");
System.out.println("");
}
return Status.Done;
return JobState.DONE;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions h2o-samples/src/main/java/samples/NeuralNetMnist.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ protected void startTraining(Layer[] ls) {
_trainer.start();
}

@Override protected Status exec() {
@Override protected JobState exec() {
final double fraction = 1.0;
final long seed = 0xC0FFEE;
load(fraction, seed);
Expand Down Expand Up @@ -160,7 +160,7 @@ protected void startTraining(Layer[] ls) {
}
}, 0, 10);
startTraining(ls);
return Status.Running;
return JobState.RUNNING;
}

// Remaining code was used to shuffle & convert to CSV
Expand Down
22 changes: 11 additions & 11 deletions src/main/java/hex/GridSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class GridSearch extends Job {
public GridSearch(){

}
@Override protected Status exec() {
@Override protected JobState exec() {
UKV.put(destination_key, this);
int max = jobs[0].gridParallelism();
int head = 0, tail = 0;
Expand All @@ -34,7 +34,7 @@ public GridSearch(){
}
}
}
return Status.Done;
return JobState.DONE;
}

@Override protected void onCancelled() {
Expand Down Expand Up @@ -96,8 +96,8 @@ public static class GridSearchProgress extends Progress2 {
for( Job job : jobs ) {
JobInfo info = new JobInfo();
info._job = job;
if(job.destination_key != null){
Object value = UKV.get(job.destination_key);
if(job.dest() != null){
Object value = UKV.get(job.dest());
info._model = value instanceof Model ? (Model) value : null;
if( info._model != null ) {
info._cm = info._model.cm();
Expand Down Expand Up @@ -138,18 +138,18 @@ public static class GridSearchProgress extends Progress2 {
if( perf != null )
sb.append("<td>").append(speed).append("</td>");
String link = "";
if( info._job.start_time != 0 && DKV.get(info._job.destination_key) != null ) {
link = info._job.destination_key.toString();
if( info._job.start_time != 0 && DKV.get(info._job.dest()) != null ) {
link = info._job.dest().toString();
if( info._model instanceof GBMModel )
link = GBMModelView.link(link, info._job.destination_key);
link = GBMModelView.link(link, info._job.dest());
else if( info._model instanceof DRFModel )
link = DRFModelView.link(link, info._job.destination_key);
link = DRFModelView.link(link, info._job.dest());
else if( info._model instanceof NeuralNetModel )
link = NeuralNetModelView.link(link, info._job.destination_key);
link = NeuralNetModelView.link(link, info._job.dest());
if( info._model instanceof KMeans2Model )
link = KMeans2ModelView.link(link, info._job.destination_key);
link = KMeans2ModelView.link(link, info._job.dest());
else
link = Inspect.link(link, info._job.destination_key);
link = Inspect.link(link, info._job.dest());
}
sb.append("<td>").append(link).append("</td>");

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/hex/KMeans2.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public KMeans2() {
description = "K-means";
}

@Override protected Status exec() {
@Override protected JobState exec() {
source.read_lock(self());
String sourceArg = input("source");
Key sourceKey = null;
Expand Down Expand Up @@ -107,7 +107,7 @@ public KMeans2() {
clusters = Utils.append(clusters, sampler._sampled);

if( !isRunning(self()) )
return Status.Done;
return JobState.DONE;
model.centers = normalize ? denormalize(clusters, vecs) : clusters;
model.total_within_SS = sqr._sqr;
model.iterations++;
Expand Down Expand Up @@ -154,7 +154,7 @@ public KMeans2() {
}
model.unlock(self());
source.unlock(self());
return Status.Done;
return JobState.DONE;
}

@Override protected Response redirect() {
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/hex/drf/DRF.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,17 @@ public static String link(Key k, String content) {
Log.info(" seed: " + _seed);
}

@Override protected Status exec() {
@Override protected JobState exec() {
logStart();
buildModel();
return Status.Done;
return JobState.DONE;
}

@Override protected Response redirect() {
return DRFProgressPage.redirect(this, self(), dest());
}

@SuppressWarnings("unused")
@Override protected void init() {
super.init();
// Initialize local variables
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/hex/gbm/GBM.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ public static String link(Key k, String content) {
Log.info(" learn_rate: " + learn_rate);
}

@Override protected Status exec() {
@Override protected JobState exec() {
logStart();
buildModel();
return Status.Done;
return JobState.DONE;
}

@Override public int gridParallelism() {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/hex/glm/GLM2.java
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ public static Job gridSearch(Key jobKey, Key destinationKey, DataInfo dinfo, GLM
if(destination_key == null)destination_key = Key.make("GLMGridModel_"+Key.make());
if(job_key == null)job_key = Key.make("GLMGridJob_"+Key.make());
Job j = gridSearch(self(),destination_key, _dinfo, _glm, lambda, alpha,n_folds);
return GLMGridView.redirect(this,j.destination_key);
return GLMGridView.redirect(this,j.dest());
} else {
if(destination_key == null)destination_key = Key.make("GLMModel_"+Key.make());
if(job_key == null)job_key = Key.make("GLM2Job_"+Key.make());
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/hex/pca/PCA.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public PCA(String desc, Key dest, Frame src, int max_pc, double tolerance, boole
this.standardize = standardize;
}

@Override protected Status exec() {
@Override protected JobState exec() {
Frame fr = selectFrame(source);
Vec[] vecs = fr.vecs();

Expand All @@ -81,7 +81,7 @@ public PCA(String desc, Key dest, Frame src, int max_pc, double tolerance, boole
PCAModel myModel = buildModel(dinfo, tsk);
myModel.delete_and_lock(self());
myModel.unlock(self());
return Status.Done;
return JobState.DONE;
}

@Override protected void init() {
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/hex/pca/PCAImpute.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ public class PCAImpute extends FrameJob {
@API(help = "Scale columns by their standard deviations", filter = Default.class)
boolean scale = true;

@Override protected Status exec() {
@Override protected JobState exec() {
Frame fr = source;
new Frame(destination_key,fr._names.clone(),fr.vecs().clone()).delete_and_lock(null).unlock(null);
return Status.Done;
return JobState.DONE;
}

@Override protected void init() {
Expand Down
9 changes: 3 additions & 6 deletions src/main/java/hex/pca/PCAScore.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@

import hex.FrameTask;
import hex.FrameTask.DataInfo;

import java.util.Arrays;

import water.Job.*;
import water.*;
import water.Job.FrameJob;
import water.api.DocGen;
import water.fvec.*;
import water.util.RString;
Expand All @@ -31,7 +28,7 @@ public class PCAScore extends FrameJob {
@API(help = "Number of principal components to return", filter = Default.class, lmin = 1, lmax = 10000)
int num_pc = 1;

@Override protected Status exec() {
@Override protected JobState exec() {
// Note: Source data MUST contain all features (matched by name) used to build PCA model!
// If additional columns exist in source, they are automatically ignored in scoring
new Frame(destination_key, new String[0], new Vec[0]).delete_and_lock(self());
Expand All @@ -47,7 +44,7 @@ public class PCAScore extends FrameJob {
domains[i] = null;
}
tsk.outputFrame(destination_key, names, domains).unlock(self());
return Status.Done;
return JobState.DONE;
}

@Override protected void init() {
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/hex/rf/ConfusionTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ public static void remove(RFModel model, Key datakey, int classcol, boolean comp
/**Apply a model to a dataset to produce a Confusion Matrix. To support
incremental & repeated model application, hash the model & data and look
for that Key to already exist, returning a prior CM if one is available.*/
static public CMJob make(RFModel model, Key datakey, int classcol, double[] classWt, boolean computeOOB) {
static public Job make(RFModel model, Key datakey, int classcol, double[] classWt, boolean computeOOB) {
return make(model, model.size(), datakey, classcol, classWt, computeOOB);
}
static public CMJob make(final RFModel model, final int modelSize, final Key datakey, final int classcol, final double[] classWt, final boolean computeOOB) {
static public Job make(final RFModel model, final int modelSize, final Key datakey, final int classcol, final double[] classWt, final boolean computeOOB) {
// Create a unique key for CM regarding given RFModel, validation data and parameters
final Key cmKey = keyForCM(model._key, modelSize, datakey, classcol, computeOOB);
// Start a new job if CM is not yet computed
Expand Down Expand Up @@ -145,7 +145,7 @@ static public CMJob make(final RFModel model, final int modelSize, final Key dat
return cmJob;
} else {
// We should return Job which is/was computing the CM with given cmKey
return (CMJob) Job.findJobByDest(cmKey);
return Job.findJobByDest(cmKey);
}
}

Expand Down
Loading

0 comments on commit 75ed805

Please sign in to comment.