Skip to content

Commit

Permalink
Introduce adapted sgsh regression tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dspinellis committed Dec 16, 2016
1 parent 35d656b commit 7735841
Show file tree
Hide file tree
Showing 97 changed files with 15,733 additions and 38 deletions.
29 changes: 29 additions & 0 deletions example/parallel-word-count.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env sgsh
#
# SYNOPSIS Parallel word count
# DESCRIPTION
# Count number of times each word appears in the specified input file(s)
# Demonstrates parallel execution mirroring the Hadoop WordCount example
# via the dgsh-parallel command.
#
# Copyright 2014-2016 Diomidis Spinellis
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Scatter input
dgsh-tee -s |
# Emulate Java's default StringTokenizer, sort, count
dgsh-parallel -n 4 "tr -s ' \t\n\r\f' '\n' | LC_ALL=C sort -S 512M | uniq -c" |
# Merge sorted counts
dgsh-merge-sum '<|' '<|' '<|'
85 changes: 47 additions & 38 deletions test-dgsh.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
#!/bin/sh
#
# Regression testing of the provided examples
#

# Ensure that the generated test file matches the reference one
# File names are by conventions test/$base/out.{ok,test}
ensure_same()
{
local flags=$1
local base=$2
echo "$base.sh [$flags]"
if ! diff -rw test/$base/out.ok test/$base/out.test >/dev/null
then
echo "$base.sh: test/$base/out.ok and test/$base/out.test differ" 1>&2
exit 1
fi
local flags=$1
local base=$2
echo -n "$base.sh "
if diff -rw test/$base/out.ok test/$base/out.test >/dev/null
then
echo OK
else
echo "$base.sh: Files differ: test/$base/out.ok test/$base/out.test" 1>&2
exit 1
fi
}

# Include fallback commands in our executable path
Expand All @@ -24,6 +29,7 @@ LOGFILE=test/web-log-report/logfile
CLARKNET=ftp://ita.ee.lbl.gov/traces/clarknet_access_log_Aug28.gz
if ! [ -f $LOGFILE ]
then
mkdir -p $(dirname $LOGFILE)
echo "Fetching web test data"
{
curl $CLARKNET 2>/dev/null ||
Expand All @@ -34,54 +40,57 @@ then
fi


for flags in '' -m -S
do
rm -rf test/*/out.test
rm -rf test/*/out.test

echo hello cruwl world | ./dgsh $flags -p . example/spell-highlight.sh >test/spell-highlight/out.test
ensure_same "$flags" spell-highlight
mkdir -p test/spell-highlight
echo hello cruwl world | ./dgsh example/spell-highlight.sh >test/spell-highlight/out.test
ensure_same "$flags" spell-highlight

./dgsh $flags -p . example/map-hierarchy.sh test/map-hierarchy/in/a test/map-hierarchy/in/b test/map-hierarchy/out.test
ensure_same "$flags" map-hierarchy
./dgsh example/map-hierarchy.sh test/map-hierarchy/in/a test/map-hierarchy/in/b test/map-hierarchy/out.test
ensure_same "$flags" map-hierarchy

./dgsh $flags -p . example/commit-stats.sh --until '{2013-07-15 23:59 UTC}' >test/commit-stats/out.test
ensure_same "$flags" commit-stats
./dgsh example/commit-stats.sh --until '{2013-07-15 23:59 UTC}' >test/commit-stats/out.test
ensure_same "$flags" commit-stats

./dgsh $flags -p . example/code-metrics.sh test/code-metrics/in/ >test/code-metrics/out.test
ensure_same "$flags" code-metrics
./dgsh example/code-metrics.sh test/code-metrics/in/ >test/code-metrics/out.test
ensure_same "$flags" code-metrics

./dgsh $flags -p . example/duplicate-files.sh test/duplicate-files >test/duplicate-files/out.test
ensure_same "$flags" duplicate-files
./dgsh example/duplicate-files.sh test/duplicate-files >test/duplicate-files/out.test
ensure_same "$flags" duplicate-files

./dgsh $flags -p . example/word-properties.sh <test/word-properties/LostWorldChap1-3 >test/word-properties/out.test
ensure_same "$flags" word-properties
# TODO Fails due to extraneous grep output
#./dgsh example/word-properties.sh <test/word-properties/LostWorldChap1-3 >test/word-properties/out.test
#ensure_same "$flags" word-properties

./dgsh $flags -p . example/compress-compare.sh <test/word-properties/LostWorldChap1-3 | sed 's/:.*ASCII.*/: ASCII/;s|/dev/stdin:||' >test/compress-compare/out.test
ensure_same "$flags" compress-compare
./dgsh example/compress-compare.sh <test/word-properties/LostWorldChap1-3 | sed 's/:.*ASCII.*/: ASCII/;s|/dev/stdin:||' >test/compress-compare/out.test
ensure_same "$flags" compress-compare

./dgsh $flags -p . example/web-log-report.sh <test/web-log-report/logfile >test/web-log-report/out.test
ensure_same "$flags" web-log-report
# TODO Order of output different from original (why?); GB wrong
#./dgsh example/web-log-report.sh <test/web-log-report/logfile >test/web-log-report/out.test
#ensure_same "$flags" web-log-report

(
cd test/text-properties
rm -rf out.test
mkdir out.test
cd out.test
../../../dgsh $flags -p ../../.. ../../../example/text-properties.sh <../../word-properties/LostWorldChap1-3
)
ensure_same "$flags" text-properties
done
(
cd test/text-properties
rm -rf out.test
mkdir out.test
cd out.test
../../../dgsh ../../../example/text-properties.sh ../../word-properties/LostWorldChap1-3
)
ensure_same "$flags" text-properties

# Outside the loop, because scatter -s is not compatible with -S
# The correct file was generated using
# tr -s ' \t\n\r\f' \\n <test/word-properties/LostWorldChap1-3 | sort | uniq -c | sed 's/^ *//'
# An empty line is removed from the test output, because it can be generated
# by tr when the first line of a split file is empty. (In that case \n is not
# a repeated character that tr will remove.)
./dgsh -p . example/parallel-word-count.sh <test/word-properties/LostWorldChap1-3 | sed '/^[0-9]* $/d' >test/parallel-word-count/out.test
ensure_same "" parallel-word-count
# TODO Make the parallel-word-count work
#./dgsh example/parallel-word-count.sh <test/word-properties/LostWorldChap1-3 | sed '/^[0-9]* $/d' >test/parallel-word-count/out.test
#ensure_same "" parallel-word-count


# TODO
exit
for i in example/*
do
echo -n "Testing graph of $i "
Expand Down
18 changes: 18 additions & 0 deletions test/bin/gdate
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/sh
#
# Poor man's GNU date
# This implements the GNU date -f - option for converting the date as
# represented in the web log into the week day using the FreeBSD
# date command
#

if [ x"$*" != x'-f - +%a' ]
then
echo "(poor man's) gdate usage: gdate -f - +%a" 1>&2
exit 1
fi

while read d
do
date -j -f '%d-%b-%Y' "$d" +%a
done
Loading

0 comments on commit 7735841

Please sign in to comment.