Skip to content

Commit

Permalink
Merge pull request steemit#3665 from steemit/fix_health_check
Browse files Browse the repository at this point in the history
update health check process
  • Loading branch information
ety001 authored May 17, 2021
2 parents 3db9b9d + a3581ad commit 1113bf0
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 12 deletions.
10 changes: 10 additions & 0 deletions contrib/healthcheck.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
#!/bin/bash

# because the max rolling update timeout is 1 hour,
# but the download process is longer than 1 hour.
if [[ -e /tmp/isdownloading ]]; then
echo Status: 200
echo Content-type:text/plain
echo
echo The blockchain data is downloading.
exit 0
fi

# if this is a syncing node, it will regularly be down
# so all we need to know is that this healthcheck script is up and responding
if [[ -e /tmp/issyncnode ]]; then
Expand Down
42 changes: 30 additions & 12 deletions contrib/startpaassteemd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,29 @@ else
STEEMD="/usr/local/steemd-full/bin/steemd"
fi

if [[ "$SYNC_TO_S3" ]]; then
echo "[info] create issyncnode file."
touch /tmp/issyncnode
chown www-data:www-data /tmp/issyncnode
fi

# add a tag file to help check if download process has been done.
echo "[info] create downloading tag file."
touch /tmp/isdownloading

# start nginx before downloading backup file.
# because the max rolling update timeout is 1 hour,
# but the download process is longer than 1 hour.
cp /etc/nginx/healthcheck.conf.template /etc/nginx/healthcheck.conf
# suppose we have healthy nodes in the auto scaling group
echo server ahnode.steemit.com\; >> /etc/nginx/healthcheck.conf
echo } >> /etc/nginx/healthcheck.conf
rm /etc/nginx/sites-enabled/default
cp /etc/nginx/healthcheck.conf /etc/nginx/sites-enabled/default
/etc/init.d/fcgiwrap restart
service nginx restart
echo "[info] nginx started."

chown -R steemd:steemd $HOME

# clean out data dir since it may be semi-persistent block storage on the ec2 with stale data
Expand Down Expand Up @@ -109,6 +132,11 @@ else
fi
done
fi

# remove download file tag
rm /tmp/isdownloading
echo "[info] remove /tmp/isdownloading."

if [[ $finished == 0 ]]; then
if [[ ! "$SYNC_TO_S3" ]]; then
echo notifyalert steemd: unable to pull blockchain state from S3 - exiting
Expand Down Expand Up @@ -136,21 +164,9 @@ ARGS+=" --tags-skip-startup-update"

cd $HOME

if [[ "$SYNC_TO_S3" ]]; then
touch /tmp/issyncnode
chown www-data:www-data /tmp/issyncnode
fi

chown -R steemd:steemd $HOME/*

# let's get going
cp /etc/nginx/healthcheck.conf.template /etc/nginx/healthcheck.conf
echo server 127.0.0.1:8091\; >> /etc/nginx/healthcheck.conf
echo } >> /etc/nginx/healthcheck.conf
rm /etc/nginx/sites-enabled/default
cp /etc/nginx/healthcheck.conf /etc/nginx/sites-enabled/default
/etc/init.d/fcgiwrap restart
service nginx restart
exec chpst -usteemd \
$STEEMD \
--webserver-ws-endpoint=127.0.0.1:8091 \
Expand All @@ -160,6 +176,8 @@ exec chpst -usteemd \
$ARGS \
$STEEMD_EXTRA_OPTS \
2>&1&
sed -i 's/ahnode.steemit.com/127.0.0.1:8091/' /etc/nginx/healthcheck.conf
service nginx restart
SAVED_PID=`pgrep -f p2p-endpoint`
echo $SAVED_PID >> /tmp/steemdpid
mkdir -p /etc/service/steemd
Expand Down
8 changes: 8 additions & 0 deletions contrib/sync-sv-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ BLOCKCHAIN_TIME=$(
--data '{"jsonrpc":"2.0","id":39,"method":"database_api.get_dynamic_global_properties"}' \
localhost:8090 | jq -r .result.time
)
echo "[info] BLOCKCHAIN_TIME=$BLOCKCHAIN_TIME"

if [[ "$IS_BROADCAST_NODE" ]]; then
FILE_TYPE=broadcast
Expand All @@ -46,8 +47,10 @@ if [[ ! -z "$BLOCKCHAIN_TIME" ]]; then

# if we're within 10 seconds of current time, call it synced and begin the upload
BLOCK_AGE=$((${CURRENT_SECS} - ${BLOCKCHAIN_SECS}))
echo "[info] BLOCK_AGE=$BLOCK_AGE"
if [[ ${BLOCK_AGE} -le 10 ]]; then
LAST_BACKUP_TIME=`aws s3 ls s3://steemit-dev-blockchainstate/${FILE_TYPE}-${CHECKSUM_BLOCKCHAIN_TAR_FILE} | awk '{print $1}'`
echo "[info] LAST_BACKUP_TIME=$LAST_BACKUP_TIME"
if [[ ! -z $LAST_BACKUP_TIME ]]; then
LAST_BACKUP_TIMESTAMP=`date -d $LAST_BACKUP_TIME +%s`
BACKUP_INTERVAL=$((${CURRENT_SECS} - ${LAST_BACKUP_TIMESTAMP}))
Expand Down Expand Up @@ -138,6 +141,11 @@ if [[ ! -z "$BLOCKCHAIN_TIME" ]]; then
else
echo warning: last backup file is later less than 22 hours.
fi
else
# if checksum file does not exist, create an empty one.
echo "[info] Create empty checksum file, $CHECKSUM_BLOCKCHAIN_TAR_FILE"
touch "${CHECKSUM_BLOCKCHAIN_TAR_FILE}"
aws s3 cp $CHECKSUM_BLOCKCHAIN_TAR_FILE s3://$S3_BUCKET/$FILE_TYPE-$CHECKSUM_BLOCKCHAIN_TAR_FILE
fi
fi
fi
Expand Down

0 comments on commit 1113bf0

Please sign in to comment.