Skip to content

Commit

Permalink
Feature/rebase 3.2.2 january28 (#482)
Browse files Browse the repository at this point in the history
* Implement the feature and Add unit tests (#374)

* Feature/affected positions 4061 (#379)

* adding covered position to annotated transcript output

* fixing crash for SVs

* Trim the covered amino acids as well

* Fix the bug that covered protein position not being updated when cds start or end is not -1

* Refactoring

* Feature/hgvsg 4082 (#380)

* Initial implementation of HGVS g. notation and updated reference files.

* Some SonarQube updates.

* Updated the version number

* Removed a line that prevented non-MT variants from annotating HGVS g. notation.

* Updated the version number
  • Loading branch information
Stromberg, Michael authored and rajatshuvro committed Feb 13, 2020
1 parent 82adcf0 commit 24d753e
Show file tree
Hide file tree
Showing 193 changed files with 2,326 additions and 2,248 deletions.
1 change: 1 addition & 0 deletions CacheUtils/CacheUtils.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<ItemGroup>
<ProjectReference Include="..\CommandLine\CommandLine.csproj" />
<ProjectReference Include="..\Compression\Compression.csproj" />
<ProjectReference Include="..\ReferenceUtils\ReferenceUtils.csproj" />
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" />
<ProjectReference Include="..\VariantAnnotation\VariantAnnotation.csproj" />
</ItemGroup>
Expand Down
38 changes: 5 additions & 33 deletions CacheUtils/Commands/ExtractTranscripts/ExtractTranscriptsMain.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
using System.IO;
using CacheUtils.MiniCache;
using CacheUtils.PredictionCache;
using CacheUtils.Sequence;
using CacheUtils.TranscriptCache;
using CommandLine.Builders;
using CommandLine.NDesk.Options;
Expand All @@ -18,7 +17,6 @@
using VariantAnnotation.IO.Caches;
using VariantAnnotation.Logger;
using VariantAnnotation.Providers;
using VariantAnnotation.Sequence;

namespace CacheUtils.Commands.ExtractTranscripts
{
Expand All @@ -41,13 +39,12 @@ private static ExitCodes ProgramExecution()
var chromosome = ReferenceNameUtilities.GetChromosome(bundle.SequenceReader.RefNameToChromosome, _referenceName);
bundle.Load(chromosome);

string outputStub = GetOutputStub(chromosome, bundle.Source);
var interval = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition);
var transcripts = GetTranscripts(logger, bundle, interval);
string outputStub = GetOutputStub(chromosome, bundle.Source);
var interval = new ChromosomeInterval(chromosome, _referencePosition, _referenceEndPosition);
var transcripts = GetTranscripts(logger, bundle, interval);

var sift = GetPredictionStaging(logger, "SIFT", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs);
var polyphen = GetPredictionStaging(logger, "PolyPhen", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs);
string referenceBases = GetReferenceBases(logger, bundle.SequenceReader, interval);
var sift = GetPredictionStaging(logger, "SIFT", transcripts, chromosome, bundle.SiftPredictions, bundle.SiftReader, x => x.SiftIndex, numRefSeqs);
var polyphen = GetPredictionStaging(logger, "PolyPhen", transcripts, chromosome, bundle.PolyPhenPredictions, bundle.PolyPhenReader, x => x.PolyPhenIndex, numRefSeqs);

var regulatoryRegionIntervalArrays = GetRegulatoryRegionIntervalArrays(logger, bundle.TranscriptCache, interval, numRefSeqs);
var transcriptIntervalArrays = PredictionUtilities.UpdateTranscripts(transcripts, bundle.SiftPredictions,
Expand All @@ -58,8 +55,6 @@ private static ExitCodes ProgramExecution()
WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.TranscriptPath(outputStub)), transcriptStaging, "transcript");
WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.SiftPath(outputStub)), sift.Staging, "SIFT");
WriteCache(logger, FileUtilities.GetCreateStream(CacheConstants.PolyPhenPath(outputStub)), polyphen.Staging, "PolyPhen");
WriteReference(logger, CacheConstants.BasesPath(outputStub), bundle.SequenceReader, chromosome,
referenceBases, interval.Start);

return ExitCodes.Success;
}
Expand All @@ -69,19 +64,6 @@ private static TranscriptCacheStaging GetTranscriptStaging(CacheHeader header,
IntervalArray<IRegulatoryRegion>[] regulatoryRegionIntervalArrays) =>
TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays);

private static void WriteReference(ILogger logger, string outputPath, CompressedSequenceReader reader,
IChromosome chromosome, string referenceBases, int offset)
{
logger.Write("- writing reference bases... ");
var cytogeneticBands = new CytogeneticBands(reader.CytogeneticBands);

using (var writer = new CompressedSequenceWriter(FileUtilities.GetCreateStream(outputPath),
reader.ReferenceMetadataList, cytogeneticBands, reader.Assembly))
{
writer.Write(chromosome.EnsemblName, referenceBases, offset);
}
logger.WriteLine("finished.");
}

private static void WriteCache(ILogger logger, Stream stream, IStaging staging, string description)
{
Expand All @@ -96,16 +78,6 @@ private static string GetOutputStub(IChromosome chromosome, Source source) => Pa
private static string GetSource(Source source) =>
source != Source.BothRefSeqAndEnsembl ? source.ToString() : "Both";

private static string GetReferenceBases(ILogger logger, CompressedSequenceReader reader, IChromosomeInterval interval)
{
logger.Write("- retrieving reference bases... ");
reader.GetCompressedSequence(interval.Chromosome);
string referenceBases = reader.Sequence.Substring(interval.Start, interval.End - interval.Start + 1);
logger.WriteLine($"{referenceBases.Length} bases extracted.");

return referenceBases;
}

private static (PredictionCacheStaging Staging, Prediction[] Predictions) GetPredictionStaging(ILogger logger,
string description, IEnumerable<ITranscript> transcripts, IChromosome chromosome, IReadOnlyList<Prediction> oldPredictions,
PredictionCacheReader reader, Func<ITranscript, int> indexFunc, int numRefSeqs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
using CommandLine.NDesk.Options;
using CommandLine.Utilities;
using Compression.FileHandling;
using Compression.Utilities;
using ErrorHandling;
using Genome;
using IO;
using Microsoft.Extensions.Configuration;
using VariantAnnotation.Interface;
Expand Down Expand Up @@ -66,28 +64,18 @@ private static (GeneInfoData GeneInfoData, AssemblyDataStore Assembly37, Assembl
logger.Write("- loading datastores... ");
var loadBenchmark = new Benchmark();

var dicts = GetSequenceDictionaries(filePaths.GRCh38.ReferencePath, ExternalFiles.AssemblyFile37.FilePath, ExternalFiles.AssemblyFile38.FilePath);
var (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(filePaths.GRCh38.ReferencePath);

var geneInfoData = GeneInfoData.Create(ExternalFiles.GeneInfoFile.FilePath);
var dataStore37 = AssemblyDataStore.Create("GRCh37", logger, filePaths.GRCh37, dicts.RefNameToChromosome, dicts.Accession37, true);
var dataStore38 = AssemblyDataStore.Create("GRCh38", logger, filePaths.GRCh38, dicts.RefNameToChromosome, dicts.Accession38, false);
var hgnc = Hgnc.Create(ExternalFiles.HgncFile.FilePath, dicts.RefNameToChromosome);
var dataStore37 = AssemblyDataStore.Create("GRCh37", logger, filePaths.GRCh37, refNameToChromosome, true);
var dataStore38 = AssemblyDataStore.Create("GRCh38", logger, filePaths.GRCh38, refNameToChromosome, false);
var hgnc = Hgnc.Create(ExternalFiles.HgncFile.FilePath, refNameToChromosome);

logger.WriteLine($"{Benchmark.ToHumanReadable(loadBenchmark.GetElapsedTime())}");

return (geneInfoData, dataStore37, dataStore38, hgnc);
}

private static (IDictionary<string, IChromosome> RefNameToChromosome, IDictionary<string, IChromosome>
Accession37, IDictionary<string, IChromosome> Accession38) GetSequenceDictionaries(string referencePath,
string assemblyInfo37Path, string assemblyInfo38Path)
{
var (_, refNameToChromosome, _) = SequenceHelper.GetDictionaries(referencePath);
var accession37Dict = AssemblyReader.GetAccessionToChromosome(GZipUtilities.GetAppropriateStreamReader(assemblyInfo37Path), refNameToChromosome);
var accession38Dict = AssemblyReader.GetAccessionToChromosome(GZipUtilities.GetAppropriateStreamReader(assemblyInfo38Path), refNameToChromosome);
return (refNameToChromosome, accession37Dict, accession38Dict);
}

private static UgaGene[] CombineGenomeAssemblies(ILogger logger, Dictionary<ushort, List<UgaGene>> genesByRef37, Dictionary<ushort, List<UgaGene>> genesByRef38)
{
logger.WriteLine();
Expand Down
4 changes: 2 additions & 2 deletions CacheUtils/Genes/DataStores/AssemblyDataStore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ private AssemblyDataStore(string description, ILogger logger, EnsemblGtf ensembl

public static AssemblyDataStore Create(string description, ILogger logger,
FilePaths.AssemblySpecificPaths paths, IDictionary<string, IChromosome> refNameToChromosome,
IDictionary<string, IChromosome> accessionToChromosome, bool useGrch37)
bool useGrch37)
{
string ensemblGtfPath = useGrch37 ? ExternalFiles.EnsemblGtfFile37.FilePath : ExternalFiles.EnsemblGtfFile38.FilePath;
string refseqGffPath = useGrch37 ? ExternalFiles.RefSeqGffFile37.FilePath : ExternalFiles.RefSeqGffFile38.FilePath;
string refseqGenomeGffPath = useGrch37 ? ExternalFiles.RefSeqGenomeGffFile37.FilePath : ExternalFiles.RefSeqGenomeGffFile38.FilePath;

var ensemblGtf = EnsemblGtf.Create(ensemblGtfPath, refNameToChromosome);
var refSeqGff = RefSeqGff.Create(refseqGffPath, refseqGenomeGffPath, accessionToChromosome);
var refSeqGff = RefSeqGff.Create(refseqGffPath, refseqGenomeGffPath, refNameToChromosome);

var (refIndexToChromosome, _, _) = SequenceHelper.GetDictionaries(paths.ReferencePath);
var globalCache = GlobalCache.Create(paths.RefSeqCachePath, paths.EnsemblCachePath, refIndexToChromosome, refNameToChromosome);
Expand Down
36 changes: 0 additions & 36 deletions CacheUtils/Genes/IO/AssemblyReader.cs

This file was deleted.

167 changes: 0 additions & 167 deletions CacheUtils/Sequence/CompressedSequenceWriter.cs

This file was deleted.

Loading

0 comments on commit 24d753e

Please sign in to comment.