forked from spotify/luigi
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new hadoop_jvm module for java/scala hadoop jobs.
* new JvmJobTask and extracted some common functionality from HadoopJobTask into BaseHadoopJobTask * moved jobconf logic into the BaseHadoopJobTask * bare-bones JvmHadoopJobRunner and extracted common functionality from HadoopJobRunner into static methods (submitting/tracking a job).
- Loading branch information
Showing
3 changed files
with
104 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
|
||
import logging | ||
import os | ||
|
||
import luigi | ||
import luigi.hdfs | ||
from luigi.hadoop import BaseHadoopJobTask, HadoopJobRunner, JobRunner | ||
|
||
logger = logging.getLogger('luigi-interface') | ||
|
||
|
||
class JvmHadoopJobRunner(JobRunner): | ||
|
||
def __init__(self): | ||
pass | ||
|
||
def run_job(self, job): | ||
# TODO(jcrobak): libjars, files, etc. Can refactor out of | ||
# hadoop.HadoopJobRunner | ||
if not os.path.exists(job.jar()): | ||
logger.error("Can't find jar: {0}, full path {1}".format(job.jar(), | ||
os.path.abspath(job.jar()))) | ||
raise Exception("job jar does not exist") | ||
arglist = ['hadoop', 'jar', job.jar(), job.main()] | ||
|
||
jobconfs = job.jobconfs() | ||
|
||
for jc in jobconfs: | ||
arglist += ['-D' + jc] | ||
|
||
arglist += job.args() | ||
|
||
HadoopJobRunner.run_and_track_hadoop_job(arglist) | ||
|
||
# TODO support temp output locations? | ||
self.finish() | ||
|
||
def finish(self): | ||
pass | ||
|
||
def __del__(self): | ||
self.finish() | ||
|
||
|
||
class JvmHadoopJobTask(BaseHadoopJobTask): | ||
|
||
def jar(self): | ||
return None | ||
|
||
def main(self): | ||
return None | ||
|
||
def job_runner(self): | ||
# We recommend that you define a subclass, override this method and set up your own config | ||
return JvmHadoopJobRunner() | ||
|
||
def args(self): | ||
"""returns an array of args to pass to the job (after hadoop jar <jar> <main>).""" | ||
return [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters