Skip to content

Commit

Permalink
Update for demo word accuracy script, however the utility is set faul…
Browse files Browse the repository at this point in the history
…ting atm
  • Loading branch information
dav committed Aug 17, 2013
1 parent 410255e commit 92c6614
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
34 changes: 27 additions & 7 deletions scripts/demo-word-accuracy.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
make
if [ ! -e text8 ]; then
wget http://mattmahoney.net/dc/text8.zip -O text8.gz
gzip -d text8.gz -f
DATA_DIR=../data
BIN_DIR=../bin

TEXT_DATA=$DATA_DIR/text8
VECTOR_DATA=$DATA_DIR/text8-vector.bin

pushd ${BIN_DIR} && make; popd


if [ ! -e $VECTOR_DATA ]; then

if [ ! -e $TEXT_DATA ]; then
wget http://mattmahoney.net/dc/text8.zip -O $DATA_DIR/text8.gz
gzip -d $DATA_DIR/text8.gz -f
fi
echo -----------------------------------------------------------------------------------------------------
echo -- Training vectors...
time $BIN_DIR/word2vec -train $TEXT_DATA -output $VECTOR_DATA -cbow 0 -size 200 -window 5 -negative 0 -hs 1 -sample 1e-3 -threads 12 -binary 1

fi
time ./word2vec -train text8 -output vectors.bin -cbow 0 -size 200 -window 5 -negative 0 -hs 1 -sample 1e-3 -threads 12 -binary 1
./compute-accuracy vectors.bin 30000 < questions-words.txt
# to compute accuracy with the full vocabulary, use: ./compute-accuracy vectors.bin < questions-words.txt

echo -----------------------------------------------------------------------------------------------------
echo -- distance...

# to compute accuracy with the full vocabulary, use: compute-accuracy $VECTOR_DATA < $DATA_DIR/questions-words.txt
set -x
$BIN_DIR/compute-accuracy $VECTOR_DATA 30000 < $DATA_DIR/questions-words.txt

23 changes: 15 additions & 8 deletions scripts/demo-word.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
DATA_DIR=../data
BIN_DIR=../src

TEXT_DATA=$DATA_DIR/text8
VECTOR_DATA=$DATA_DIR/text8-vector.bin

pushd ${BIN_DIR} && make; popd

if [ ! -e $DATA_DIR/text8 ]; then
wget http://mattmahoney.net/dc/text8.zip -O $DATA_DIR/text8.gz
gzip -d $DATA_DIR/text8.gz -f

if [ ! -e $VECTOR_DATA ]; then

if [ ! -e $TEXT_DATA ]; then
wget http://mattmahoney.net/dc/text8.zip -O $DATA_DIR/text8.gz
gzip -d $DATA_DIR/text8.gz -f
fi
echo -----------------------------------------------------------------------------------------------------
echo -- Training vectors...
time $BIN_DIR/word2vec -train $TEXT_DATA -output $VECTOR_DATA -cbow 0 -size 200 -window 5 -negative 0 -hs 1 -sample 1e-3 -threads 12 -binary 1

fi

echo -----------------------------------------------------------------------------------------------------
echo -- Training vectors...
time $BIN_DIR/word2vec -train $DATA_DIR/text8 -output $DATA_DIR/text8-vector.bin -cbow 0 -size 200 -window 5 -negative 0 -hs 1 -sample 1e-3 -threads 12 -binary 1

echo -----------------------------------------------------------------------------------------------------
echo -- distance...

$BIN_DIR/distance $DATA_DIR/text8-vector.bin
$BIN_DIR/distance $DATA_DIR/$VECTOR_DATA

0 comments on commit 92c6614

Please sign in to comment.