-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
292 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#! /bin/bash | ||
|
||
tail -n +9 "${BASH_SOURCE[0]}" | head -n -1 | fold -s | ||
|
||
exit 0 | ||
|
||
# Everything below this line is simple documentation | ||
:' | ||
This is a placeholder step. There is at least one step here in which *.multihetsep.txt files were generated. This needs to be fleshed out. | ||
' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
#! /bin/bash | ||
|
||
# LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE | ||
|
||
# Some handy variables | ||
#${SLURM_MEM_PER_CPU} | ||
#${SLURM_MEM_PER_NODE} | ||
#${SLURM_JOB_NAME} | ||
#${SLURM_NTASKS} | ||
#${SLURM_JOB_NUM_NODES} | ||
#${SLURM_JOB_ID} | ||
#${SLURM_ARRAY_JOB_ID} | ||
#${SLURM_ARRAY_TASK_ID} | ||
#${SLURM_ARRAY_TASK_COUNT} | ||
#${SLURM_ARRAY_TASK_MIN} | ||
#${SLURM_ARRAY_TASK_MAX} | ||
|
||
if [ -n "$SLURM_JOB_ID" ] # basically, if this is managed by slurm vs being run locally | ||
then | ||
if [ -n "$SLURM_JOB_NUM_NODES" ] && [ $SLURM_JOB_NUM_NODES -ne 1 ] | ||
then | ||
printf "%s\n" "This job is meant to be run with a single node" 1>&2 | ||
exit 1 | ||
elif [ -n "$SLURM_MEM_PER_CPU" ] | ||
then | ||
MEM_TASK_IN_MB=${SLURM_MEM_PER_CPU} | ||
MEM_JOB_IN_MB=$((${MEM_TASK_IN_MB}*${SLURM_NTASKS})) | ||
MEM_JOB_IN_GB=$((${MEM_JOB_IN_MB}/1024)) | ||
elif [ -n "$SLURM_MEM_PER_NODE" ] | ||
then | ||
MEM_JOB_IN_MB=$((${SLURM_MEM_PER_NODE}*${SLURM_JOB_NUM_NODES})) | ||
MEM_JOB_IN_GB=$((${MEM_JOB_IN_MB}/1024)) | ||
MEM_TASK_IN_MB=$(bc <<< "${MEM_JOB_IN_MB}/${SLURM_NTASKS}") | ||
else | ||
printf "%s\n" '$SLURM_MEM_PER_NODE and $SLURM_MEM_PER_CPU not specificed.' 1>&2 | ||
exit 1 | ||
fi | ||
fi | ||
|
||
# move into the correct place | ||
if [ -n "${SLURM_SUBMIT_DIR}" ] | ||
then | ||
cd "$SLURM_SUBMIT_DIR" | ||
else | ||
SLURM_SUBMIT_DIR=. | ||
fi | ||
|
||
# manage job cleanup | ||
cleanup() | ||
{ | ||
# cleanup tmp dir | ||
if [ -n $SLURM_JOB_ID ] && [ -e /tmp/${SLURM_JOB_ID} ] | ||
then | ||
rm -rf /tmp/${SLURM_JOB_ID} &> /dev/null | ||
elif [ -e /tmp/${$} ] | ||
then | ||
rm -rf /tmp/${$} &> /dev/null | ||
fi | ||
|
||
rm -rf /tmp/${SLURM_ARRAY_JOB_ID}-${SLURM_ARRAY_TASK_ID} &> /dev/null | ||
|
||
# move successful/failed job files to the correct place | ||
local SUCCESS_FAIL_STATUS_SUBDIR | ||
SUCCESS_FAIL_STATUS_SUBDIR="${1:=success}" | ||
|
||
mv ${SLURM_SUBMIT_DIR}/job_files/${SLURM_JOB_NAME}__${SLURM_ARRAY_JOB_ID}-${SLURM_ARRAY_TASK_ID}.{err,out} ${SLURM_SUBMIT_DIR}/job_files/${SUCCESS_FAIL_STATUS_SUBDIR} &> /dev/null | ||
mv ${SLURM_SUBMIT_DIR}/job_files/${SLURM_JOB_NAME}__${SLURM_JOB_ID}.{err,out} ${SLURM_SUBMIT_DIR}/job_files/${SUCCESS_FAIL_STATUS_SUBDIR} &> /dev/null | ||
} | ||
|
||
control_c() | ||
{ | ||
kill -SIGINT `jobs -p` | ||
cleanup "failed" | ||
exit 1 | ||
} | ||
|
||
trap control_c SIGHUP SIGINT SIGTERM SIGQUIT | ||
|
||
outOfTime() | ||
{ | ||
printf "%s\n" "This job ran out of time! SLURM sent signal USR1 and now we're trying to quite gracefully. (fingers crossed!)" 1>&2 | ||
kill -SIGINT `jobs -p` | ||
|
||
printf "%s\n" "Now using 'cleanup' function with status 'success'. Be advised: this process ran out of time- you will need to run this again with more time (and/or more RAM)." 1>&2 | ||
cleanup "success" | ||
|
||
exit 10 # SIGUSR1 == 10 | ||
} | ||
|
||
trap outOfTime USR1 | ||
|
||
|
||
# load modules | ||
module purge | ||
module load msmc/1.1.0 | ||
|
||
# needed input things | ||
OUTPUT_MSMC_PFX="${1}" | ||
shift 1 | ||
INPUT_MULTIHETSEP_FILES=("${@}") | ||
|
||
OUTPUT_DIR=$(readlink -n -m `dirname "${OUTPUT_MSMC_PFX}"`) | ||
OUTPUT_FILES=("${OUTPUT_MSMC_PFX}".{log,{final,loop}.txt}) | ||
|
||
# check for existence of input file(s) | ||
# We assume msmc is capable of recognizing whether the files | ||
# it requires exist. | ||
|
||
# check for existence of expected output file(s) | ||
declare -a ALREADY_EXIST_OUTPUT_FILES | ||
for OUTPUT_FILE in "${OUTPUT_FILES[@]}" | ||
do | ||
if [ -e "${OUTPUT_FILE}" ] | ||
then | ||
ALREADY_EXIST_OUTPUT_FILES+=("${OUTPUT_FILE}") | ||
fi | ||
done | ||
|
||
if [ "${#ALREADY_EXIST_OUTPUT_FILES[@]}" -gt 0 ] | ||
then | ||
printf "%s\n" "INFO: one or more output files already exist! We assume this means we can quit this process without running the intended command. Bye!" 1>&2 | ||
cleanup | ||
exit 0 | ||
fi | ||
unset ALREADY_EXIST_OUTPUT_FILES | ||
|
||
# create output directory, if needed | ||
mkdir -p "${OUTPUT_DIR}" &> /dev/null | ||
unset OUTPUT_DIR | ||
|
||
# run the program of interest | ||
time msmc \ | ||
-t "${SLURM_NTASKS}" \ | ||
-R \ | ||
-o "${OUTPUT_MSMC_PFX}" \ | ||
"${INPUT_MULTIHETSEP_FILES[@]}" & | ||
|
||
wait `jobs -p` | ||
EXIT_CODE=$? | ||
|
||
# cleanup and exit | ||
if [ ${EXIT_CODE} -eq 0 ] | ||
then | ||
chmod 444 "${OUTPUT_FILES[@]}" &> /dev/null | ||
cleanup "success" | ||
else | ||
rm -f "${OUTPUT_FILES[@]}" &> /dev/null | ||
cleanup "failed" | ||
fi | ||
|
||
exit ${EXIT_CODE} | ||
|
||
#This is MSMC Version 1.1.0. Usage: msmc [options] <datafiles> | ||
# Options: | ||
# -i, --maxIterations=<size_t> : number of EM-iterations [default=20] | ||
# -o, --outFilePrefix=<string> : file prefix to use for all output files | ||
# -r, --rhoOverMu=<double>: ratio of recombination rate over mutation rate. Default=0.25. | ||
# -t, --nrThreads=<size_t> : nr of threads to use (defaults to nr of CPUs) | ||
# -p, --timeSegmentPattern=<string> : pattern of fixed time segments [default=10*1+15*2] | ||
# -P, --subpopLabels=<string> comma-separated subpopulation labels (assume one single population by default, with | ||
# number of haplotypes inferred from first input file). For cross-population analysis with 4 haplotypes, 2 | ||
# coming from each subpopulation, set this to 0,0,1,1 | ||
# -R, --fixedRecombination : keep recombination rate fixed [recommended, but not set by default] | ||
# -I, --indices: indices (comma-separated) of alleles in the data file to run over | ||
# -s, --skipAmbiguous: skip sites with ambiguous phasing. Recommended for gene flow analysis | ||
# --unboundedCrossCoal: do not bound the relative cross coalescence rate to be <=1. | ||
# --loBoundLambda: Give a lower bound for lambda rates (default=0) | ||
# --hiBoundLambda: Give an upper bound for lambda rates (default=infinity) | ||
# -h, --help: show this message |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#! /bin/bash | ||
|
||
# Ensure we're running from the correct location | ||
CWD_check() | ||
{ | ||
#local SCRIPTS_DIR | ||
local MAIN_DIR | ||
local RUN_DIR | ||
|
||
SCRIPTS_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"`) | ||
MAIN_DIR=$(readlink -f `dirname "${SCRIPTS_DIR}/"`) | ||
RUN_DIR=$(readlink -f .) | ||
|
||
if [ "${RUN_DIR}" != "${MAIN_DIR}" ] || ! [[ "${SCRIPTS_DIR}" =~ ^"${MAIN_DIR}"/scripts.* ]] | ||
then | ||
printf "\n\t%s\n\t%s\n\n" "Script must be run from ${MAIN_DIR}" "You are currently at: ${RUN_DIR}" 1>&2 | ||
exit 1 | ||
fi | ||
} | ||
CWD_check | ||
|
||
submitJob() | ||
{ | ||
local JOB_NAME OUT_MSMC_PFX INPUT_MULTIHETSEPS | ||
JOB_NAME="${1}" | ||
OUT_MSMC_PFX="${2}" | ||
shift 2 | ||
INPUT_MULTIHETSEPS=("${@}") | ||
|
||
echo sbatch \ | ||
-J ${JOB_NAME} \ | ||
--signal=B:USR1@300 \ | ||
--time=0-01:00:00 \ | ||
--ntasks=4 \ | ||
--nodes=1 \ | ||
--mem=4G \ | ||
-o job_files/%x__%j.out \ | ||
-e job_files/%x__%j.err \ | ||
${SCRIPTS_DIR}/09-msmc.slurm \ | ||
"${OUT_MSMC_PFX}" \ | ||
"${INPUT_MULTIHETSEPS[@]}" | ||
} | ||
|
||
# ###################################### # | ||
# sanity check on input and output files # | ||
# ###################################### # | ||
|
||
# define key variables | ||
SPECIES="bft" | ||
PROJECT="${SPECIES}-msmc" | ||
ASSEMBLY_FAI="data/assembly/asm_long.fa.fai" # only necessary for presence of input data | ||
ASSEMBLY_SEQIDS="${ASSEMBLY_FAI}" # only necessary for presence of input data | ||
INPUT_DIR="data/multihetseps" | ||
INPUT_SFX=".multihetsep.txt" | ||
OUTPUT_PFX="data/msmc/msmc" | ||
|
||
#declare -a INPUT_FILES=($(find "${INPUT_DIR}" -type f -name '*'"${INPUT_SFX}" -printf '%f ')) # <-- get input files from `find' command (assumes all needed files exist) | ||
declare -a INPUT_FILES=( $(cut -d ' ' -f 1 "${ASSEMBLY_SEQIDS}" | sed -r 's,^(.+)$,'"${INPUT_DIR}"'/\1'"${INPUT_SFX}"',' | tr '\n' ' ') ) # <-- get input files from list of sequence IDs (enables checking of input files existence) | ||
declare -a OUTPUT_FILES=("${OUTPUT_PFX}".{log,{final,loop}.txt}) | ||
OUTPUT_DIR=$(readlink -n -m `dirname "${OUTPUT_PFX}"`) | ||
|
||
EXIT_EARLY=0 | ||
|
||
# check for existence of needed input files | ||
for INPUT_FILE in "${INPUT_FILES[@]}" | ||
do | ||
if [ ! -e "${INPUT_FILE}" ] | ||
then | ||
printf "%s\n" "ERROR: Required input file does not exist: ${INPUT_FILE}" 1>&2 | ||
EXIT_EARLY=1 | ||
fi | ||
done | ||
|
||
# check for the existence of output files | ||
declare -a ALREADY_EXIST_OUTPUT_FILES | ||
for OUTPUT_FILE in "${OUTPUT_FILES[@]}" | ||
do | ||
if [ -e "${OUTPUT_FILE}" ] | ||
then | ||
ALREADY_EXIST_OUTPUT_FILES+=("${OUTPUT_FILE}") | ||
fi | ||
done | ||
|
||
if [ ${#ALREADY_EXIST_OUTPUT_FILES[@]} -gt 0 ] | ||
then | ||
printf "%s\n\trm -f" "ERROR: Output file(s) already exist(s). To run this step, first remove it/them:" 1>&2 | ||
printf " %s" "${ALREADY_EXIST_OUTPUT_FILES[@]}" 1>&2 | ||
printf "\n" 1>&2 | ||
EXIT_EARLY=1 | ||
fi | ||
|
||
# exit without submitting the job, if needed | ||
if [ $EXIT_EARLY -ne 0 ] | ||
then | ||
exit ${EXIT_EARLY} | ||
fi | ||
unset EXIT_EARLY | ||
|
||
# create output dir (if needed) | ||
mkdir -p "${OUTPUT_DIR}" &> /dev/null | ||
|
||
# ####################### # | ||
# actually submit the job # | ||
# ####################### # | ||
HPC_JOB_NAME="${PROJECT}_msmc" | ||
submitJob \ | ||
"${HPC_JOB_NAME}" \ | ||
"${OUTPUT_PFX}" \ | ||
"${INPUT_FILES[@]}" | ||
|
||
# Bonefish | ||
# 39828452. 0-00:20:33. 80% (3.2/4) CPU. 5% (3/64 GB) RAM. | ||
|