-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_stats
executable file
·32 lines (28 loc) · 1.17 KB
/
data_stats
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/bash
# structure of data folder should be:
# data
# /candidate_name
# /QA
# *.txt
# /Personality
# *.txt
# check for existence of stats file and empty it if there was one
touch data_stats.txt
> data_stats.txt
# directory to data folder should be passed as first arg
DATA_DIR=$1
for CANDIDATE in $DATA_DIR/*; do
P_FILES=$(ls -1 $CANDIDATE/personality | wc -l)
W_C=$(find $CANDIDATE/personality -type f -name \*.txt -print0 | xargs -0 cat | wc -w)
QUESTIONS=$(($(find $CANDIDATE/qa -type f -name \*.txt -print0 | xargs -0 cat | wc -l)/2))
AVG_A_LENGTH=$(($(find $CANDIDATE/qa -type f -name \*.txt -print0 | xargs -0 cat | sed -n 'n;p' | wc -w)/$QUESTIONS))
AVG_Q_LENGTH=$(($(find $CANDIDATE/qa -type f -name \*.txt -print0 | xargs -0 cat | sed -n 'p;n' | wc -w)/$QUESTIONS))
# output stats to file
echo $CANDIDATE | sed 's|.*\/\(.*\)|\1|' >> data_stats.txt
echo "personality files:" $P_FILES >> data_stats.txt
echo "word count:" $W_C >> data_stats.txt
echo "question count:" $QUESTIONS >> data_stats.txt
echo "average question length:" $AVG_Q_LENGTH >> data_stats.txt
echo "average answer length:" $AVG_A_LENGTH >> data_stats.txt
echo "" >> data_stats.txt
done