Skip to content

Commit

Permalink
Added an hadoop streaming example
Browse files Browse the repository at this point in the history
  • Loading branch information
Zachary Radtka committed Sep 20, 2015
1 parent ee57e79 commit c535b79
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 0 deletions.
8 changes: 8 additions & 0 deletions python/MapReduce/HadoopStreaming/mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/python
import sys

for line in sys.stdin:
words = line.split()

for word in words:
print '{0}\t{1}'.format(word, 1)
21 changes: 21 additions & 0 deletions python/MapReduce/HadoopStreaming/reducer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/python
import sys

curr_word = None
curr_count = 0

for line in sys.stdin:
word, count = line.split('\t')

count = int(count)

if word == curr_word:
curr_count += count
else:
if curr_word:
print '{0}\t{1}'.format(curr_word, curr_count)

curr_count = count
curr_word = word

print '{0}\t{1}'.format(curr_word, curr_count)
File renamed without changes.

0 comments on commit c535b79

Please sign in to comment.