Skip to content

Commit

Permalink
feat: new fields in AIRR export
Browse files Browse the repository at this point in the history
feat: AIRR export added to integration tests
feat: minor gradle script upgrades.
  • Loading branch information
dbolotin committed Feb 16, 2022
1 parent cb413f0 commit fcc7e1c
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 47 deletions.
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ plugins {
`java-library`
application
`maven-publish`
id("com.palantir.git-version") version "0.12.3"
id("com.palantir.git-version") version "0.13.0"
id("com.github.johnrengelman.shadow") version "7.1.2"
}

Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.1.1-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
2 changes: 1 addition & 1 deletion gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ case "`uname`" in
Darwin* )
darwin=true
;;
MINGW* )
MSYS* | MINGW* )
msys=true
;;
NONSTOP* )
Expand Down
2 changes: 2 additions & 0 deletions itests/case1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
set -euxo pipefail

mixcr align -s hs -OvParameters.geneFeatureToAlign=VGeneWithP -OsaveOriginalReads=true test_R1.fastq test_R2.fastq case1.vdjca
mixcr exportAirr case1.vdjca case1.vdjca.airr.tsv
mixcr assemble case1.vdjca case1.clns
mixcr exportAirr case1.clns case1.clns.airr.tsv
4 changes: 4 additions & 0 deletions itests/case4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,9 @@ set -euxo pipefail
# Checking generic pipeline with relatively big input files
mixcr analyze amplicon --receptor-type tra --impute-germline-on-export -s hs --starting-material rna --contig-assembly --5-end v-primers --3-end j-primers --adapters adapters-present CD4M1_test_R1.fastq.gz CD4M1_test_R2.fastq.gz case4

# Checking AIRR export on big files
mixcr exportAirr case4.vdjca case4.vdjca.airr.tsv
mixcr exportAirr case4.clna case4.clna.airr.tsv

# Checking skip steps behaviour
mixcr analyze amplicon --receptor-type tra --impute-germline-on-export -s hs --starting-material rna --contig-assembly --5-end v-primers --3-end j-primers --adapters adapters-present CD4M1_test_R1.fastq.gz CD4M1_test_R2.fastq.gz case4
107 changes: 74 additions & 33 deletions src/main/java/com/milaboratory/mixcr/cli/CommandExportAirr.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

import cc.redberry.pipe.CUtils;
import cc.redberry.pipe.OutputPortCloseable;
import cc.redberry.pipe.util.CountingOutputPort;
import com.milaboratory.mixcr.basictypes.*;
import com.milaboratory.mixcr.export.AirrVDJCObjectWrapper;
import com.milaboratory.mixcr.export.FieldExtractor;
import io.repseq.core.GeneFeature;
import com.milaboratory.util.CanReportProgress;
import com.milaboratory.util.SmartProgressReporter;
import io.repseq.core.GeneType;
import io.repseq.core.VDJCLibraryRegistry;

Expand All @@ -18,6 +20,9 @@

import static com.milaboratory.mixcr.basictypes.IOUtil.*;
import static com.milaboratory.mixcr.export.AirrColumns.*;
import static io.repseq.core.GeneFeature.*;
import static io.repseq.core.GeneType.*;
import static io.repseq.core.ReferencePoint.*;
import static picocli.CommandLine.*;

@Command(name = "exportAirr",
Expand All @@ -41,65 +46,84 @@ public String getType() {
return info0.fileType;
}

@SuppressWarnings("UnnecessaryLocalVariable")
private List<FieldExtractor<AirrVDJCObjectWrapper>> CommonExtractors() {
ComplexReferencePoint vnpEnd = new Leftmost(VEndTrimmed, VEnd);
ComplexReferencePoint dnpBegin = new Rightmost(DBegin, DBeginTrimmed);
ComplexReferencePoint dnpEnd = new Leftmost(DEnd, DEndTrimmed);
ComplexReferencePoint jnpBegin = new Rightmost(JBegin, JBeginTrimmed);

ComplexReferencePoint np1Begin = vnpEnd;
ComplexReferencePoint np1End = new Leftmost(dnpBegin, jnpBegin);

ComplexReferencePoint np2Begin = dnpEnd;
ComplexReferencePoint np2End = jnpBegin;

List<FieldExtractor<AirrVDJCObjectWrapper>> ret = new ArrayList<>(Arrays.asList(
new Sequence(targetId),
new RevComp(),
new Productive(),

new VDJCCalls(GeneType.Variable),
new VDJCCalls(GeneType.Diversity),
new VDJCCalls(GeneType.Joining),
new VDJCCalls(GeneType.Constant),
new VDJCCalls(Variable),
new VDJCCalls(Diversity),
new VDJCCalls(Joining),
new VDJCCalls(Constant),

new SequenceAlignment(targetId),
new GermlineAlignment(targetId),

new CompleteVDJ(targetId),

new NFeature(GeneFeature.CDR3, "junction"),
new AAFeature(GeneFeature.CDR3, "junction_aa"),
new NFeature(CDR3, "junction"),
new AAFeature(CDR3, "junction_aa"),

new NFeature(targetId, np1Begin, np1End, "np1"),
new NFeature(targetId, np2Begin, np2End, "np2"),

new NFeature(CDR1, "cdr1"),
new AAFeature(CDR1, "cdr1_aa"),

new NFeature(GeneFeature.CDR1, "cdr1"),
new AAFeature(GeneFeature.CDR1, "cdr1_aa"),
new NFeature(CDR2, "cdr2"),
new AAFeature(CDR2, "cdr2_aa"),

new NFeature(GeneFeature.CDR2, "cdr2"),
new AAFeature(GeneFeature.CDR2, "cdr2_aa"),
new NFeature(ShortCDR3, "cdr3"),
new AAFeature(ShortCDR3, "cdr3_aa"),

new NFeature(GeneFeature.ShortCDR3, "cdr3"),
new AAFeature(GeneFeature.ShortCDR3, "cdr3_aa"),
new NFeature(FR1, "fwr1"),
new AAFeature(FR1, "fwr1_aa"),

new NFeature(GeneFeature.FR1, "fwr1"),
new AAFeature(GeneFeature.FR1, "fwr1_aa"),
new NFeature(FR2, "fwr2"),
new AAFeature(FR2, "fwr2_aa"),

new NFeature(GeneFeature.FR2, "fwr2"),
new AAFeature(GeneFeature.FR2, "fwr2_aa"),
new NFeature(FR3, "fwr3"),
new AAFeature(FR3, "fwr3_aa"),

new NFeature(GeneFeature.FR3, "fwr3"),
new AAFeature(GeneFeature.FR3, "fwr3_aa"),
new NFeature(FR4, "fwr4"),
new AAFeature(FR4, "fwr4_aa"),

new NFeature(GeneFeature.FR4, "fwr4"),
new AAFeature(GeneFeature.FR4, "fwr4_aa"),
new AlignmentScoring(targetId, Variable),
new AlignmentCigar(targetId, Variable),

new AlignmentScoring(targetId, GeneType.Variable),
new AlignmentCigar(targetId, GeneType.Variable),
new AlignmentScoring(targetId, Diversity),
new AlignmentCigar(targetId, Diversity),

new AlignmentScoring(targetId, GeneType.Diversity),
new AlignmentCigar(targetId, GeneType.Diversity),
new AlignmentScoring(targetId, Joining),
new AlignmentCigar(targetId, Joining),

new AlignmentScoring(targetId, GeneType.Joining),
new AlignmentCigar(targetId, GeneType.Joining),
new AlignmentScoring(targetId, Constant),
new AlignmentCigar(targetId, Constant),

new AlignmentScoring(targetId, GeneType.Constant),
new AlignmentCigar(targetId, GeneType.Constant)
new NFeatureLength(CDR3, "junction_length"),
new NFeatureLength(targetId, np1Begin, np1End, "np1_length"),
new NFeatureLength(targetId, np2Begin, np2End, "np2_length")
));

for (GeneType gt : GeneType.VDJC_REFERENCE)
for (GeneType gt : VDJC_REFERENCE)
for (boolean start : new boolean[]{true, false})
for (boolean germline : new boolean[]{true, false})
ret.add(new SequenceAlignmentBoundary(targetId, gt, start, germline));

for (GeneType gt : GeneType.VDJC_REFERENCE)
for (GeneType gt : VDJC_REFERENCE)
for (boolean start : new boolean[]{true, false})
ret.add(new AirrAlignmentBoundary(targetId, gt, start));

Expand Down Expand Up @@ -129,34 +153,51 @@ public void run0() throws Exception {

Path inPath = Paths.get(in);
VDJCLibraryRegistry libraryRegistry = VDJCLibraryRegistry.getDefault();
CountingOutputPort cPort;
CanReportProgress progressReporter = null;

switch (getType()) {
case MAGIC_CLNA:
extractors = CloneExtractors();
ClnAReader clnaReader = new ClnAReader(inPath, libraryRegistry, 4);
//noinspection unchecked,rawtypes
port = (OutputPortCloseable) clnaReader.readClones();
cPort = new CountingOutputPort<>((OutputPortCloseable) clnaReader.readClones());
port = cPort;
closeable = clnaReader;
progressReporter = SmartProgressReporter.extractProgress(cPort, clnaReader.numberOfClones());
break;
case MAGIC_CLNS:
extractors = CloneExtractors();
ClnsReader clnsReader = new ClnsReader(inPath, libraryRegistry);

// I know, still writing airr is much slower...
int maxCount = 0;
try (OutputPortCloseable<Clone> p = clnsReader.readClones()) {
for (Clone ignore : CUtils.it(p))
++maxCount;
}

//noinspection unchecked,rawtypes
port = (OutputPortCloseable) clnsReader.readClones();
cPort = new CountingOutputPort<>((OutputPortCloseable) clnsReader.readClones());
port = cPort;
closeable = clnsReader;
progressReporter = SmartProgressReporter.extractProgress(cPort, maxCount);
break;
case MAGIC_VDJC:
extractors = AlignmentsExtractors();
VDJCAlignmentsReader alignmentsReader = new VDJCAlignmentsReader(inPath, libraryRegistry);
//noinspection unchecked,rawtypes
port = (OutputPortCloseable) alignmentsReader;
closeable = alignmentsReader;
progressReporter = alignmentsReader;
break;
default:
throwValidationException("Unexpected file type.");
return;
}

SmartProgressReporter.startProgressReport("Exporting to AIRR format", progressReporter);

try (PrintStream output = new PrintStream(out);
AutoCloseable c = closeable; OutputPortCloseable<VDJCObject> p = port) {
boolean first = true;
Expand Down
Loading

0 comments on commit fcc7e1c

Please sign in to comment.