Skip to content

Commit

Permalink
Fix release commit/date labeling + remove singleton in calibration co…
Browse files Browse the repository at this point in the history
…mputations
  • Loading branch information
odelaneau committed Aug 30, 2022
1 parent 8d5a513 commit 4bb1244
Show file tree
Hide file tree
Showing 35 changed files with 84 additions and 29 deletions.
2 changes: 1 addition & 1 deletion docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ make -j static_exe
cp bin/SHAPEIT5_ligate_static ../static_bins/.

#Buld docker image
LAB=shapeit5_$(date +'%d%m%Y')\_$(git rev-parse --short HEAD)
LAB=shapeit5_$(git log -1 --format=%cd --date=short)\_$(git rev-parse --short HEAD)

cd ../docker/
mkdir -p resources
Expand Down
Binary file added docker/resources/SHAPEIT5_ligate_static
Binary file not shown.
Binary file added docker/resources/SHAPEIT5_phase_common_static
Binary file not shown.
Binary file added docker/resources/SHAPEIT5_phase_rare_static
Binary file not shown.
Binary file added docker/resources/SHAPEIT5_switch_static
Binary file not shown.
3 changes: 2 additions & 1 deletion docker/upload.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash

#Upload docker image onto DNA Nexus
LAB=shapeit5_$(date +'%d%m%Y')\_$(git rev-parse --short HEAD)
LAB=shapeit5_$(git log -1 --format=%cd --date=short)\_$(git rev-parse --short HEAD)

dx upload $LAB\.tar.gz --path "/docker/"
Binary file added ligate/bin/SHAPEIT5_ligate_static
Binary file not shown.
2 changes: 1 addition & 1 deletion ligate/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ LDFLAG=-O3

#COMMIT TRACING
COMMIT_VERS=$(shell git rev-parse --short HEAD)
COMMIT_DATE=$(shell date +'%d%m%Y')
COMMIT_DATE=$(shell git log -1 --format=%cd --date=short)
CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\"
CXXFLAG+= -D__COMMIT_DATE__=\"$(COMMIT_DATE)\"

Expand Down
2 changes: 1 addition & 1 deletion phase_common/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ LDFLAG=-O3

#COMMIT TRACING
COMMIT_VERS=$(shell git rev-parse --short HEAD)
COMMIT_DATE=$(shell date +'%d%m%Y')
COMMIT_DATE=$(shell git log -1 --format=%cd --date=short)
CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\"
CXXFLAG+= -D__COMMIT_DATE__=\"$(COMMIT_DATE)\"

Expand Down
2 changes: 1 addition & 1 deletion phase_rare/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ LDFLAG=-O3

#COMMIT TRACING
COMMIT_VERS=$(shell git rev-parse --short HEAD)
COMMIT_DATE=$(shell date +'%d%m%Y')
COMMIT_DATE=$(shell git log -1 --format=%cd --date=short)
CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\"
CXXFLAG+= -D__COMMIT_DATE__=\"$(COMMIT_DATE)\"

Expand Down
4 changes: 2 additions & 2 deletions phase_rare/src/containers/genotype_set/genotype_set_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ class rare_genotype {
float p11 = max(prb1, numeric_limits<float>::min());
float p10 = max(1.0f - prb1, numeric_limits<float>::min());

/*
if (het) {
gprobs[0] = 0.0f;
gprobs[1] = (p00*ee + p01*ed) * (p10*ed + p11*ee);
Expand All @@ -115,8 +114,8 @@ class rare_genotype {
gprobs[2] = (p00*ed + p01*ee) * (p10*ee + p11*ed);
gprobs[3] = (p00*ed + p01*ee) * (p10*ed + p11*ee);
}
*/

/*
if (het) {
gprobs[0] = 0.0f;
gprobs[1] = p00 * p11;
Expand All @@ -128,6 +127,7 @@ class rare_genotype {
gprobs[2] = p01 * p10;
gprobs[3] = p01 * p11;
}
*/

//cout << stb.str(gprobs, 3) << endl;
int maxg = alg.imax(gprobs);
Expand Down
4 changes: 2 additions & 2 deletions phase_rare/src/models/hmm_scaffold/hmm_scaffold_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ void hmm_scaffold::backward(vector < vector < unsigned int > > & cevents) {

//Impute from full conditioning set
for (int vr = 0 ; vr < cevents[vs+1].size() ; vr ++) {
G.phaseLiAndStephens(cevents[vs+1][vr], hap, alphaXbeta_prev, alphaXbeta_curr, C.indexes_pbwt_neighbour[hap], 0.6000f);
G.phaseLiAndStephens(cevents[vs+1][vr], hap, alphaXbeta_prev, alphaXbeta_curr, C.indexes_pbwt_neighbour[hap], 0.5001f);
}
}

Expand All @@ -196,7 +196,7 @@ void hmm_scaffold::backward(vector < vector < unsigned int > > & cevents) {

//Impute from full conditioning set
for (int vr = 0 ; vr < cevents[0].size() ; vr ++) {
G.phaseLiAndStephens(cevents[0][vr], hap, alphaXbeta_curr, alphaXbeta_curr, C.indexes_pbwt_neighbour[hap], 0.6000f);
G.phaseLiAndStephens(cevents[0][vr], hap, alphaXbeta_curr, alphaXbeta_curr, C.indexes_pbwt_neighbour[hap], 0.5001f);
}
}
}
Expand Down
Binary file added static_bins/SHAPEIT5_ligate_static
Binary file not shown.
Binary file added static_bins/SHAPEIT5_phase_common_static
Binary file not shown.
Binary file added static_bins/SHAPEIT5_phase_rare_static
Binary file not shown.
Binary file added static_bins/SHAPEIT5_switch_static
Binary file not shown.
2 changes: 1 addition & 1 deletion switch/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ LDFLAG=-O3

#COMMIT TRACING
COMMIT_VERS=$(shell git rev-parse --short HEAD)
COMMIT_DATE=$(shell date +'%d%m%Y')
COMMIT_DATE=$(shell git log -1 --format=%cd --date=short)
CXXFLAG+= -D__COMMIT_ID__=\"$(COMMIT_VERS)\"
CXXFLAG+= -D__COMMIT_DATE__=\"$(COMMIT_DATE)\"

Expand Down
2 changes: 1 addition & 1 deletion switch/src/io/haplotype_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ void haplotype_reader::readHaplotypes(string ftruth, string festi, string ffreq,

//4. Probabilities
rPP = bcf_get_format_float(sr->readers[1].header, line_e, "PP", &vPP, &nPP);
if (rPP == n_samples_estimated) {
if (rPP == n_samples_estimated && H.MAC.back() != 1) {
for(int i = 0 ; i < n_samples_estimated ; i ++) {
int index = mapping[i];
if (index >= 0) {
Expand Down
21 changes: 10 additions & 11 deletions switch/src/models/haplotype_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ haplotype_checker::~haplotype_checker() {

void haplotype_checker::check() {
vrb.title("Check phasing discordances"); tac.clock();
unsigned long int n_missed = 0;
unsigned long int n_missed = 0, n_incorrect = 0 ;
for (int i = 0 ; i < H.IDXesti.size() ; i++) {
for (int l_curr = 0, l_prev = -1 ; l_curr < H.n_variants ; l_curr ++) {
bool curr_t0 = H.Htrue[2*H.IDXesti[i]+0][l_curr];
Expand All @@ -35,19 +35,18 @@ void haplotype_checker::check() {
Checked[i][l_curr] = true;

//Calibration
if (H.Hprob[H.IDXesti[i]][l_curr]) {
if (H.Hprob[H.IDXesti[i]][l_curr] && H.MAC[l_curr] > 1) {
string key = stb.str(l_curr) + "_" + stb.str(H.IDXesti[i]);
map < string, float > :: iterator itM = H.Vprob.find(key);
if (itM != H.Vprob.end()) {
//cout << "Found [" << key << "]" << endl;
int bin = itM->second * (Calib.size()-1);
Calib[bin][0] += itM->second;
Calib[bin][1] += Errors[i][l_curr];
Calib[bin][2] += Checked[i][l_curr];

} else {
n_missed ++;
}
if (itM->second >= 0.0f && itM->second <= 1.0f) {
int bin = itM->second * (Calib.size()-1);
Calib[bin][0] += itM->second;
Calib[bin][1] += Errors[i][l_curr];
Calib[bin][2] += Checked[i][l_curr];
} else n_incorrect ++;
} else n_missed ++;
}
}
l_prev = l_curr;
Expand All @@ -61,7 +60,7 @@ void haplotype_checker::check() {
n_phased_hets += Checked[i][l];
}
vrb.bullet("#Phasing switch error rate = " + stb.str(n_phasing_errors * 100.0f / n_phased_hets, 5));
vrb.bullet("#missed = " + stb.str(n_missed));
vrb.bullet("#missed = " + stb.str(n_missed) + " / #incorrect = " + stb.str(n_incorrect));
vrb.bullet("Timing: " + stb.str(tac.rel_time()*1.0/1000, 2) + "s");
}

Expand Down
Binary file added tasks/phasingUKB/plots/PDFs/figure2d_147754.pdf
Binary file not shown.
Binary file added tasks/phasingUKB/plots/calibration.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion tasks/phasingUKB/plots/figure2.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ for (s in nSIZE:nSIZE) {
#SHPser1 = freqSER(prefix, suffix, s);
#BQ2[s] = SHPser1[2]

pdf(paste("PDFs/figure2c_", SIZE[s], ".pdf", sep=""), 10,5)
pdf(paste("PDFs/figure2d_", SIZE[s], ".pdf", sep=""), 10,5)
par(mfrow=c(1,2))


Expand Down
42 changes: 42 additions & 0 deletions tasks/phasingUKB/plots/figure4.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
###### COLORS ######

library(RColorBrewer)
COLpair = brewer.pal(12,"Paired")
COLdiff = brewer.pal(8,"Set1")

###### VECTORS ######
REG=read.table("/home/olivier/Dropbox/Repository/shapeit5/tasks/phasingUKB/step2_wgs/step2_splitchunks/chr20.size4Mb.txt", head=FALSE)
SIZE=c(2000,5000,10000,20000,50000,100000,147754)
BIN=c(0,1,5,10,20,50,100,200,500,1000,2000,5000,10000,20000,50000,100000, 150000)
PAR=c("default", "scaffold")
TYPE=c("fqa", "fqc", "fqr", "fqf")
nSIZE=length(SIZE)
nTYPE=length(TYPE)
nPAR=length(PAR)
nREG=length(REG)
nBIN=length(BIN)
options(scipen=999)

#########################################################################################
# FIGURE1A #
# Plot switch error rates vs sample size #
#########################################################################################

#LOAD BEAGLE SER BETWEEN ALL VARIANTS
error = rep(0, 20)
freq = rep(0, 20)
total = rep(0, 20)
for (s in nSIZE:nSIZE) {
for (r in 1:nrow(REG)) {
str1="~/Fuse/PhasingUKB/Phasing/PhasingWGS/step5_runshapeit_phase2/N";
str2="/benchmark_ukb23352_c20_qc_v1.subset.N";
tmp=paste(str1, SIZE[s], str2, SIZE[s], ".", REG$V3[r], ".shapeit5.default.fqf.calibration.switch.txt.gz", sep="");
cat (tmp, "\n")
DATA = read.table(tmp, head=FALSE)
error = error + DATA$V5
total = total + DATA$V6
freq = freq + DATA$V4

}
}

14 changes: 14 additions & 0 deletions tasks/phasingUKB/step2_wgs/step2_splitchunks/chr20.size4Mb2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
0 chr20 chr20:60061-4293150 chr20:60061-4043148 3983088 861265
1 chr20 chr20:3793152-8202604 chr20:4043152-7952602 3909451 861263
2 chr20 chr20:7702567-12266861 chr20:7952603-12016861 4064259 861264
3 chr20 chr20:11766860-16186937 chr20:12016862-15936931 3920070 861263
4 chr20 chr20:15686932-20128744 chr20:15936935-19878738 3941804 861264
5 chr20 chr20:19628749-23940686 chr20:19878749-23690686 3811938 861262
8 chr20 chr20:33209734-37755751 chr20:33459734-37505746 4046013 861264
9 chr20 chr20:37255734-41801423 chr20:37505749-41551423 4045675 861263
10 chr20 chr20:41301424-45796623 chr20:41551424-45546623 3995200 861264
11 chr20 chr20:45296627-49754422 chr20:45546636-49504423 3957788 861262
12 chr20 chr20:49254423-53599712 chr20:49504425-53349713 3845289 861264
13 chr20 chr20:53099714-57367594 chr20:53349714-57117595 3767882 861262
14 chr20 chr20:56867597-61158685 chr20:57117596-60908683 3791088 861263
15 chr20 chr20:60658672-64334127 chr20:60908687-64334127 3425441 861262
2 changes: 1 addition & 1 deletion tasks/phasingUKB/step2_wgs/step4_runshapeit5_phase1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
MAP=/mnt/project/data/shapeit_maps/chr20.b38.gmap.gz
SCA=/mnt/project/Phasing/PhasingSNParray/step5_benchmark/benchmark_c20_b0_v2.b38.sorted.N480853.phased.bcf

DOCKER=shapeit5_$(date +'%d%m%Y')\_$(git rev-parse --short HEAD)\.tar.gz
DOCKER=shapeit5_$(git log -1 --format=%cd --date=short)\_$(git rev-parse --short HEAD)\.tar.gz

for N in 2000 5000 10000 20000 50000 100000 147754; do

Expand Down
7 changes: 3 additions & 4 deletions tasks/phasingUKB/step2_wgs/step5_runshapeit5_phase2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

MAP=/mnt/project/data/shapeit_maps/chr20.b38.gmap.gz

DOCKER=shapeit5_$(date +'%d%m%Y')\_$(git rev-parse --short HEAD)\.tar.gz

DOCKER=shapeit5_$(git log -1 --format=%cd --date=short)\_$(git rev-parse --short HEAD)\.tar.gz

#for N in 2000 5000 10000 20000 50000 100000 147754; do
for N in 147754; do
Expand All @@ -20,14 +19,14 @@ for N in 147754; do
TIM=benchmark_ukb23352_c20_qc_v1.subset.N$N\.$SREG\.shapeit5.default.time
LOG=benchmark_ukb23352_c20_qc_v1.subset.N$N\.$SREG\.shapeit5.default.log

dx run app-swiss-army-knife -iimage_file="/docker/$DOCKER" --folder="/Phasing/PhasingWGS/step5_runshapeit_phase2/N$N/" -icmd="/usr/bin/time -vo $TIM SHAPEIT5_phase2_static --input $BCF --scaffold $SCA --map $MAP --output $OUT --log $LOG --scaffold-region $SREG --input-region $IREG --thread 32 && bcftools index -f $OUT --threads 8" --tag benchWGS --tag shapeit5 --tag phase2 --tag $SREG --instance-type mem3_ssd1_v2_x32 --priority normal --name benchWGS_shapeit5_phase2a_$SREG -y
dx run app-swiss-army-knife -iimage_file="/docker/$DOCKER" --folder="/Phasing/PhasingWGS/step5_runshapeit_phase2/N$N/" -icmd="/usr/bin/time -vo $TIM SHAPEIT5_phase_rare_static --input $BCF --scaffold $SCA --map $MAP --output $OUT --log $LOG --scaffold-region $SREG --input-region $IREG --thread 32 && bcftools index -f $OUT --threads 8" --tag benchWGS --tag shapeit5 --tag phase2 --tag $SREG --instance-type mem3_ssd1_v2_x32 --priority normal --name benchWGS_shapeit5_phase2a_$SREG -y

#SCAFFOLDED
# SCA=/mnt/project/Phasing/PhasingWGS/step4_runshapeit_phase1/N$N/benchmark_ukb23352_c20_qc_v1.subset.N$N\.$SREG\.shapeit5.scaffold.bcf
# OUT=benchmark_ukb23352_c20_qc_v1.subset.N$N\.$SREG\.shapeit5.scaffold.bcf
# TIM=benchmark_ukb23352_c20_qc_v1.subset.N$N\.$SREG\.shapeit5.scaffold.time
# LOG=benchmark_ukb23352_c20_qc_v1.subset.N$N\.$SREG\.shapeit5.scaffold.log
# dx run app-swiss-army-knife -iimage_file="/docker/$DOCKER" --folder="/Phasing/PhasingWGS/step5_runshapeit_phase2/N$N/" -icmd="/usr/bin/time -vo $TIM SHAPEIT5_phase2_static --input $BCF --scaffold $SCA --map $MAP --output $OUT --log $LOG --scaffold-region $SREG --input-region $IREG --thread 32 && bcftools index -f $OUT --threads 8" --tag benchWGS --tag shapeit5 --tag phase2 --tag $SREG --instance-type mem3_ssd1_v2_x32 --priority normal --name benchWGS_shapeit5_phase2b_$SREG -y
# dx run app-swiss-army-knife -iimage_file="/docker/$DOCKER" --folder="/Phasing/PhasingWGS/step5_runshapeit_phase2/N$N/" -icmd="/usr/bin/time -vo $TIM SHAPEIT5_phase_rare_static --input $BCF --scaffold $SCA --map $MAP --output $OUT --log $LOG --scaffold-region $SREG --input-region $IREG --thread 32 && bcftools index -f $OUT --threads 8" --tag benchWGS --tag shapeit5 --tag phase2 --tag $SREG --instance-type mem3_ssd1_v2_x32 --priority normal --name benchWGS_shapeit5_phase2b_$SREG -y

done < /home/olivier/Dropbox/Repository/shapeit5/tasks/phasingUKB/step2_wgs/step2_splitchunks/chr20.size4Mb.txt

Expand Down
2 changes: 1 addition & 1 deletion tasks/phasingUKB/step2_wgs/step6_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ BCF=/mnt/project/Phasing/PhasingWGS/step1_preparedata/benchmark_ukb23352_c20_qc_
VAL=/mnt/project/Phasing/PhasingWGS/step1_preparedata/validation_ukb23352_c20_qc_v1.bcf
PED=/mnt/project/Phasing/PhasingSNParray/step1_dataqc/samples.families.txt

DOCKER=shapeit5_$(date +'%d%m%Y')\_$(git rev-parse --short HEAD)\.tar.gz
DOCKER=shapeit5_$(git log -1 --format=%cd --date=short)\_$(git rev-parse --short HEAD)\.tar.gz

#####################################################################################################
# BENCHMARCH BEAGLE5 #
Expand Down
Binary file modified test/10k/msprime.rare.test1.bcf
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.bcf.csi
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.block.switch.txt.gz
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.calibration.switch.txt.gz
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.flipsAndSwitches.txt.gz
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.frequency.switch.txt.gz
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.sample.switch.txt.gz
Binary file not shown.
Binary file modified test/10k/msprime.rare.test1.variant.switch.txt.gz
Binary file not shown.

0 comments on commit 4bb1244

Please sign in to comment.