-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature/heteroplasmy tsv 4696 (#524)
* Make the TSV output * Initial implementation of the sample level heteroplasmy provider * output null in the JSON output if heteroplasmyPercentile is not available * Fix broken unit tests * Add proper unit tests * Bugfix * Use gzipped file to reduce the size of compiled file * format change * Fix broken unit tests
- Loading branch information
1 parent
c48c1e8
commit a5a16f7
Showing
37 changed files
with
390 additions
and
196 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Genome; | ||
|
||
namespace MitoHeteroplasmy | ||
{ | ||
public interface IMitoHeteroplasmyProvider | ||
{ | ||
double?[] GetVrfPercentiles(string genotype, IChromosome chrome, int position, string[] altAllele, double[] vrfs); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
<LangVersion>latest</LangVersion> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<EmbeddedResource Include="Resources\MitoHeteroplasmy.tsv.gz" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<ProjectReference Include="..\OptimizedCore\OptimizedCore.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Genome; | ||
|
||
namespace MitoHeteroplasmy | ||
{ | ||
public sealed class MitoHeteroplasmyProvider : IMitoHeteroplasmyProvider | ||
{ | ||
private const string MitoChromUcscName = "chrM"; | ||
private static readonly Dictionary<string, int> AlleleToInt = new Dictionary<string, int> { { "A", 0 }, { "C", 1 }, { "G", 2 }, { "T", 3 } }; | ||
private const int SequenceLengthMax = int.MaxValue / 4; | ||
|
||
private readonly Dictionary<int, (int[] Vrfs, int[] AlleleDepths)> _alleleToVrf = new Dictionary<int, (int[], int[])>(); | ||
|
||
public void Add(int position, string altAllele, IEnumerable<double> vrfs, int[] alleleDepths) | ||
{ | ||
var vrfsInt = vrfs.Select(ToIntVrfForm).ToArray(); | ||
_alleleToVrf[EncodeMitoPositionAndAltAllele(position, altAllele)] = (vrfsInt, alleleDepths); | ||
} | ||
|
||
public double?[] GetVrfPercentiles(string genotypes, IChromosome chrom, int position, string[] altAlleles, double[] vrfs) | ||
{ | ||
if (vrfs == null) return null; | ||
if (chrom.UcscName != MitoChromUcscName) return null; | ||
|
||
var sampleAlleles = genotypes.Split('|', '/').Where(x => x != "0") | ||
.Select(x => GetAlleleByGenotype(x, altAlleles)); | ||
|
||
var percentiles = vrfs.Zip(sampleAlleles, (vrf, allele) => GetVrfPercentile(position, allele, vrf)).ToArray(); | ||
return percentiles.All(x => x == null) ? null : percentiles; | ||
} | ||
|
||
private static string GetAlleleByGenotype(string genotypeIndex, string[] altAlleles) => altAlleles[int.Parse(genotypeIndex) - 1]; | ||
|
||
private double? GetVrfPercentile(int position, string altAllele, double vrf) | ||
{ | ||
if (string.IsNullOrEmpty(altAllele)) return null; | ||
|
||
var positionAndAltAlleleIntForm = EncodeMitoPositionAndAltAllele(position, altAllele); | ||
|
||
if (!_alleleToVrf.TryGetValue(positionAndAltAlleleIntForm, out (int[] Vrfs, int[] AlleleDepths) data)) return null; | ||
|
||
var scaledVrf = vrf * 1000; | ||
int nearestBiggerVrfIndex; | ||
for (nearestBiggerVrfIndex = 0; nearestBiggerVrfIndex < data.Vrfs.Length; nearestBiggerVrfIndex++) | ||
{ | ||
if (data.Vrfs[nearestBiggerVrfIndex] > scaledVrf) break; | ||
} | ||
|
||
var numSmallerOrEqualAlleles = 0.0; | ||
var numAllAlleles = 0; | ||
for (var i = 0; i < data.AlleleDepths.Length; i++) | ||
{ | ||
if (i < nearestBiggerVrfIndex) numSmallerOrEqualAlleles += data.AlleleDepths[i]; | ||
numAllAlleles += data.AlleleDepths[i]; | ||
} | ||
|
||
return numSmallerOrEqualAlleles / numAllAlleles; | ||
} | ||
|
||
private static int ToIntVrfForm(double vrf) => Convert.ToInt32(vrf * 1000); | ||
|
||
private static int EncodeMitoPositionAndAltAllele(int position, string altAllele) => SequenceLengthMax * AlleleToInt[altAllele] + position; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
using System; | ||
using System.IO; | ||
using System.IO.Compression; | ||
using System.Linq; | ||
using System.Reflection; | ||
using OptimizedCore; | ||
|
||
namespace MitoHeteroplasmy | ||
{ | ||
public static class MitoHeteroplasmyReader | ||
{ | ||
private const int PositionIndex = 0; | ||
private const int AltIndex = 2; | ||
private const int VrfIndex = 3; | ||
private const int AlleleDepthIndex = 4; | ||
|
||
private const string ResourceName = "MitoHeteroplasmy.Resources.MitoHeteroplasmy.tsv.gz"; | ||
public static MitoHeteroplasmyProvider GetData() | ||
{ | ||
var assembly = Assembly.GetExecutingAssembly(); | ||
using var stream = assembly.GetManifestResourceStream(ResourceName); | ||
if (stream == null) throw new NullReferenceException("Unable to read from the Mitochondrial Heteroplasmy file"); | ||
|
||
using var gzStream = new GZipStream(stream, CompressionMode.Decompress); | ||
using var reader = new StreamReader(gzStream); | ||
reader.ReadLine(); | ||
|
||
var mitoHeteroplasmyData = new MitoHeteroplasmyProvider(); | ||
while (true) | ||
{ | ||
string line = reader.ReadLine(); | ||
if (line == null) break; | ||
|
||
var fields = line.OptimizedSplit('\t'); | ||
var position = int.Parse(fields[PositionIndex]); | ||
var vrfs = fields[VrfIndex].Split(',').Select(double.Parse); | ||
var alleleDepths = fields[AlleleDepthIndex].Split(',').Select(int.Parse).ToArray(); | ||
mitoHeteroplasmyData.Add(position, fields[AltIndex], vrfs, alleleDepths); | ||
} | ||
|
||
return mitoHeteroplasmyData; | ||
} | ||
} | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.