-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Michael Stromberg
committed
Aug 1, 2016
1 parent
fd88eef
commit 79a4d63
Showing
1,755 changed files
with
242,951 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,3 +250,6 @@ paket-files/ | |
# JetBrains Rider | ||
.idea/ | ||
*.sln.iml | ||
|
||
# .NET Core | ||
project.lock.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
using System.Collections.Generic; | ||
using ErrorHandling.Exceptions; | ||
using VariantAnnotation.DataStructures; | ||
|
||
namespace CacheUtils.Archive | ||
{ | ||
public static class BioTypeUtilities | ||
{ | ||
#region members | ||
|
||
private static readonly Dictionary<string, BioType> StringToBioType = new Dictionary<string, BioType>(); | ||
|
||
private const string AmbiguousOrfBiotypeKey = "ambiguous_orf"; | ||
private const string AntisenseBiotypeKey = "antisense"; | ||
private const string AntisenseRnaBiotypeKey = "antisense_RNA"; | ||
private const string BidirectionalPromoterLncRnaKey = "bidirectional_promoter_lncrna"; | ||
private const string GuideRnaBiotypeKey = "guide_RNA"; | ||
private const string IgCGeneBiotypeKey = "IG_C_gene"; | ||
private const string IgCPseudoGeneBiotypeKey = "IG_C_pseudogene"; | ||
private const string IgDGeneBiotypeKey = "IG_D_gene"; | ||
private const string IgJGeneBiotypeKey = "IG_J_gene"; | ||
private const string IgJPseudoGeneBiotypeKey = "IG_J_pseudogene"; | ||
private const string IgVGeneBiotypeKey = "IG_V_gene"; | ||
private const string IgVPseudoGeneBiotypeKey = "IG_V_pseudogene"; | ||
private const string LongIntergenicNonCodingRnaBiotypeKey = "lincRNA"; | ||
private const string LongNonCodingRnaBiotypeKey = "lncRNA"; | ||
private const string MacroLongNonCodingRnaBiotypeKey = "macro_lncRNA"; | ||
private const string MessengerRnaBiotypeKey = "mRNA"; | ||
private const string MicroRnaBiotypeKey = "miRNA"; | ||
private const string MiscRnaBiotypeKey = "misc_RNA"; | ||
private const string MitochondrialRibosomalRnaBiotypeKey = "Mt_rRNA"; | ||
private const string MitochondrialTransferRnaBiotypeKey = "Mt_tRNA"; | ||
private const string NonCodingBiotypeKey = "non_coding"; | ||
private const string NonsenseMediatedDecayBiotypeKey = "nonsense_mediated_decay"; | ||
private const string NonStopDecayBiotypeKey = "non_stop_decay"; | ||
private const string PolymorphicPseudoGene = "polymorphic_pseudogene"; | ||
private const string ProcessedPseudoGeneBiotypeKey = "processed_pseudogene"; | ||
private const string ProcessedTranscriptBiotypeKey = "processed_transcript"; | ||
private const string ProteinCodingBiotypeKey = "protein_coding"; | ||
private const string PseudoGeneBiotypeKey = "pseudogene"; | ||
private const string RetainedIntronBiotypeKey = "retained_intron"; | ||
private const string RetrotransposedBiotypeKey = "retrotransposed"; | ||
private const string RibonucleaseMrpBiotypeKey = "RNase_MRP_RNA"; | ||
private const string RibonucleasePBiotypeKey = "RNase_P_RNA"; | ||
private const string RibosomalRnaBiotypeKey = "rRNA"; | ||
private const string RibozymeBiotypeKey = "ribozyme"; | ||
private const string SenseIntronicBiotypeKey = "sense_intronic"; | ||
private const string SenseOverlappingBiotypeKey = "sense_overlapping"; | ||
private const string SmallRnaBiotypeKey = "sRNA"; | ||
private const string SmallCytoplasmicRnaBiotypeKey = "scRNA"; | ||
private const string SmallCajalBodySpecificRnaBiotypeKey = "scaRNA"; | ||
private const string SmallNuclearRnaBiotypeKey = "snRNA"; | ||
private const string SmallNucleolarRnaBiotypeKey = "snoRNA"; | ||
private const string SignalRecognitionParticleRnaBiotypeKey = "SRP_RNA"; | ||
private const string TelomeraseRnaBiotypeKey = "telomerase_RNA"; | ||
private const string ThreePrimeOverlappingNcRnaBiotypeKey = "3prime_overlapping_ncrna"; | ||
private const string TranscribedProcessedPseudoGeneBiotypeKey = "transcribed_processed_pseudogene"; | ||
private const string TranscribedUnitaryPseudoGeneBiotypeKey = "transcribed_unitary_pseudogene"; | ||
private const string TranscribedUnprocessedPseudoGeneBiotypeKey = "transcribed_unprocessed_pseudogene"; | ||
private const string TranscriptionElongationComplexBiotypeKey = "TEC"; | ||
private const string TranslatedProcessedPseudogeneKey = "translated_processed_pseudogene"; | ||
private const string TranslatedUnprocessedPseudogeneKey = "translated_unprocessed_pseudogene"; | ||
private const string TransferRnaBiotypeKey = "tRNA"; | ||
private const string TrCGeneBiotypeKey = "TR_C_gene"; | ||
private const string TrDGeneBiotypeKey = "TR_D_gene"; | ||
private const string TrJGeneBiotypeKey = "TR_J_gene"; | ||
private const string TrJPseudoGeneBiotypeKey = "TR_J_pseudogene"; | ||
private const string TrVGeneBiotypeKey = "TR_V_gene"; | ||
private const string TrVPseudoGeneBiotypeKey = "TR_V_pseudogene"; | ||
private const string UnitaryPseudoGeneBiotypeKey = "unitary_pseudogene"; | ||
private const string UnprocessedPseudoGeneBiotypeKey = "unprocessed_pseudogene"; | ||
private const string VaultRnaBiotypeKey = "vaultRNA"; | ||
private const string YRnaBiotypeKey = "Y_RNA"; | ||
|
||
#endregion | ||
|
||
// constructor | ||
static BioTypeUtilities() | ||
{ | ||
AddBioType(AmbiguousOrfBiotypeKey, BioType.AmbiguousOrf); | ||
AddBioType(AntisenseBiotypeKey, BioType.Antisense); | ||
AddBioType(AntisenseRnaBiotypeKey, BioType.AntisenseRNA); | ||
AddBioType(BidirectionalPromoterLncRnaKey, BioType.BidirectionalPromoterLncRNA); | ||
AddBioType(GuideRnaBiotypeKey, BioType.GuideRNA); | ||
AddBioType(IgCGeneBiotypeKey, BioType.IgCGene); | ||
AddBioType(IgCPseudoGeneBiotypeKey, BioType.IgCPseudoGene); | ||
AddBioType(IgDGeneBiotypeKey, BioType.IgDGene); | ||
AddBioType(IgJGeneBiotypeKey, BioType.IgJGene); | ||
AddBioType(IgJPseudoGeneBiotypeKey, BioType.IgJPseudoGene); | ||
AddBioType(IgVGeneBiotypeKey, BioType.IgVGene); | ||
AddBioType(IgVPseudoGeneBiotypeKey, BioType.IgVPseudoGene); | ||
AddBioType(LongIntergenicNonCodingRnaBiotypeKey, BioType.LongIntergenicNonCodingRna); | ||
AddBioType(LongNonCodingRnaBiotypeKey, BioType.lncRNA); | ||
AddBioType(MacroLongNonCodingRnaBiotypeKey, BioType.macroLncRNA); | ||
AddBioType(MessengerRnaBiotypeKey, BioType.mRNA); | ||
AddBioType(MicroRnaBiotypeKey, BioType.miRNA); | ||
AddBioType(MiscRnaBiotypeKey, BioType.RNA); | ||
AddBioType(MitochondrialRibosomalRnaBiotypeKey, BioType.MitochondrialRibosomalRna); | ||
AddBioType(MitochondrialTransferRnaBiotypeKey, BioType.MitochondrialTransferRna); | ||
AddBioType(NonCodingBiotypeKey, BioType.NonCoding); | ||
AddBioType(NonsenseMediatedDecayBiotypeKey, BioType.NonsenseMediatedDecay); | ||
AddBioType(NonStopDecayBiotypeKey, BioType.NonStopDecay); | ||
AddBioType(PolymorphicPseudoGene, BioType.PolymorphicPseudoGene); | ||
AddBioType(ProcessedPseudoGeneBiotypeKey, BioType.ProcessedPseudoGene); | ||
AddBioType(ProcessedTranscriptBiotypeKey, BioType.ProcessedTranscript); | ||
AddBioType(ProteinCodingBiotypeKey, BioType.ProteinCoding); | ||
AddBioType(PseudoGeneBiotypeKey, BioType.PseudoGene); | ||
AddBioType(RetainedIntronBiotypeKey, BioType.RetainedIntron); | ||
AddBioType(RetrotransposedBiotypeKey, BioType.Retrotransposed); | ||
AddBioType(RibonucleaseMrpBiotypeKey, BioType.RibonucleaseMrpRna); | ||
AddBioType(RibonucleasePBiotypeKey, BioType.RibonucleasePRna); | ||
AddBioType(RibosomalRnaBiotypeKey, BioType.RibosomalRna); | ||
AddBioType(RibozymeBiotypeKey, BioType.Ribozyme); | ||
AddBioType(SenseIntronicBiotypeKey, BioType.SenseIntronic); | ||
AddBioType(SenseOverlappingBiotypeKey, BioType.SenseOverlapping); | ||
AddBioType(SignalRecognitionParticleRnaBiotypeKey, BioType.SignalRecognitionParticleRNA); | ||
AddBioType(SmallRnaBiotypeKey, BioType.sRNA); | ||
AddBioType(SmallCytoplasmicRnaBiotypeKey, BioType.scRNA); | ||
AddBioType(SmallCajalBodySpecificRnaBiotypeKey, BioType.scaRNA); | ||
AddBioType(SmallNuclearRnaBiotypeKey, BioType.snRNA); | ||
AddBioType(SmallNucleolarRnaBiotypeKey, BioType.snoRNA); | ||
AddBioType(TelomeraseRnaBiotypeKey, BioType.TelomeraseRNA); | ||
AddBioType(ThreePrimeOverlappingNcRnaBiotypeKey, BioType.ThreePrimeOverlappingNcRna); | ||
AddBioType(TranscribedProcessedPseudoGeneBiotypeKey, BioType.TranscribedProcessedPseudoGene); | ||
AddBioType(TranscribedUnitaryPseudoGeneBiotypeKey, BioType.TranscribedUnitaryPseudoGene); | ||
AddBioType(TranscribedUnprocessedPseudoGeneBiotypeKey, BioType.TranscribedUnprocessedPseudoGene); | ||
AddBioType(TranscriptionElongationComplexBiotypeKey, BioType.TranscriptionElongationComplex); | ||
AddBioType(TransferRnaBiotypeKey, BioType.tRNA); | ||
AddBioType(TranslatedProcessedPseudogeneKey, BioType.TranslatedProcessedPseudogene); | ||
AddBioType(TranslatedUnprocessedPseudogeneKey, BioType.TranslatedUnprocessedPseudogene); | ||
AddBioType(TrCGeneBiotypeKey, BioType.TrCGene); | ||
AddBioType(TrDGeneBiotypeKey, BioType.TrDGene); | ||
AddBioType(TrJGeneBiotypeKey, BioType.TrJGene); | ||
AddBioType(TrJPseudoGeneBiotypeKey, BioType.TrJPseudoGene); | ||
AddBioType(TrVGeneBiotypeKey, BioType.TrVGene); | ||
AddBioType(TrVPseudoGeneBiotypeKey, BioType.TrVPseudoGene); | ||
AddBioType(UnitaryPseudoGeneBiotypeKey, BioType.UnitaryPseudoGene); | ||
AddBioType(UnprocessedPseudoGeneBiotypeKey, BioType.UnprocessedPseudoGene); | ||
AddBioType(VaultRnaBiotypeKey, BioType.VaultRNA); | ||
AddBioType(YRnaBiotypeKey, BioType.YRNA); | ||
} | ||
|
||
/// <summary> | ||
/// adds the biotype to both dictionaries | ||
/// </summary> | ||
private static void AddBioType(string s, BioType bioType) | ||
{ | ||
StringToBioType[s] = bioType; | ||
} | ||
|
||
/// <summary> | ||
/// returns the biotype given the string representation | ||
/// </summary> | ||
public static BioType GetBiotypeFromString(string s) | ||
{ | ||
BioType ret; | ||
if (!StringToBioType.TryGetValue(s, out ret)) | ||
{ | ||
throw new GeneralException($"Unable to find the specified BioType ({s}) in the BioType dictionary."); | ||
} | ||
|
||
return ret; | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
using System; | ||
using System.IO; | ||
using System.IO.Compression; | ||
using ErrorHandling.Exceptions; | ||
using VariantAnnotation.Compression; | ||
using VariantAnnotation.Utilities; | ||
|
||
namespace CacheUtils.Archive | ||
{ | ||
public class BlockQuickLzStream : Stream | ||
{ | ||
#region members | ||
|
||
private readonly bool _isCompressor; | ||
private readonly bool _leaveStreamOpen; | ||
|
||
private Stream _stream; | ||
|
||
private readonly string _filePath; | ||
private readonly QuickLzBlock _qlzBlock; | ||
private long _fileOffset; | ||
private bool _foundEof; | ||
|
||
#endregion | ||
|
||
#region Stream | ||
|
||
public override bool CanRead => _stream != null && _stream.CanRead; | ||
|
||
public override bool CanWrite => _stream != null && _stream.CanWrite; | ||
|
||
public override bool CanSeek => _stream != null && _stream.CanSeek; | ||
|
||
public override long Length | ||
{ | ||
get { throw new NotSupportedException(); } | ||
} | ||
|
||
public override long Position | ||
{ | ||
get { return _fileOffset + _qlzBlock.Offset; } | ||
set { throw new NotSupportedException(); } | ||
} | ||
|
||
public override long Seek(long offset, SeekOrigin origin) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public override void SetLength(long value) | ||
{ | ||
throw new NotSupportedException(); | ||
} | ||
|
||
public override void Flush() | ||
{ | ||
if (_stream == null) throw new ObjectDisposedException($"BlockQuickLzStream ({_filePath}) has already been disposed."); | ||
_stream?.Flush(); | ||
} | ||
|
||
protected override void Dispose(bool disposing) | ||
{ | ||
try | ||
{ | ||
if (disposing && _stream != null) | ||
{ | ||
if (_isCompressor) | ||
{ | ||
if (!_qlzBlock.IsFull) _qlzBlock.Write(_stream); | ||
_qlzBlock.WriteEof(_stream); | ||
} | ||
|
||
if (!_leaveStreamOpen) | ||
{ | ||
_stream.Dispose(); | ||
_stream = null; | ||
} | ||
} | ||
} | ||
finally | ||
{ | ||
base.Dispose(disposing); | ||
} | ||
} | ||
|
||
#endregion | ||
|
||
/// <summary> | ||
/// constructor | ||
/// </summary> | ||
/// <param name="stream">The stream to compress or decompress.</param> | ||
/// <param name="compressionMode">One of the enumeration values that indicates whether to compress or decompress the stream.</param> | ||
/// <param name="leaveStreamOpen">true to leave the stream open after disposing the object; otherwise, false.</param> | ||
public BlockQuickLzStream(Stream stream, CompressionMode compressionMode, bool leaveStreamOpen = false) | ||
{ | ||
if (stream == null) throw new ArgumentNullException(nameof(stream)); | ||
|
||
if ((compressionMode != CompressionMode.Compress) && (compressionMode != CompressionMode.Decompress)) | ||
throw new ArgumentOutOfRangeException(nameof(compressionMode)); | ||
|
||
_stream = stream; | ||
_isCompressor = compressionMode == CompressionMode.Compress; | ||
_leaveStreamOpen = leaveStreamOpen; | ||
_filePath = FileUtilities.GetPath(stream); | ||
_qlzBlock = new QuickLzBlock(); | ||
|
||
// sanity check: make sure we can use the stream for reading or writing | ||
if (_isCompressor && !_stream.CanWrite) throw new ArgumentException("A stream lacking write capability was provided to the block GZip compressor."); | ||
if (!_isCompressor && !_stream.CanRead) throw new ArgumentException("A stream lacking read capability was provided to the block GZip decompressor."); | ||
} | ||
|
||
/// <summary> | ||
/// Reads a number of decompressed bytes into the specified byte array. | ||
/// </summary> | ||
/// <param name="array">The array to store decompressed bytes.</param> | ||
/// <param name="offset">The byte offset in <paramref name="array"/> at which the read bytes will be placed.</param> | ||
/// <param name="count">The maximum number of decompressed bytes to read.</param> | ||
/// <returns>The number of bytes that were read into the byte array.</returns> | ||
public override int Read(byte[] array, int offset, int count) | ||
{ | ||
if (_foundEof) return 0; | ||
if (_isCompressor) throw new CompressionException("Tried to read data from a compression BlockGZipStream."); | ||
|
||
ValidateParameters(array, offset, count); | ||
if(_stream == null) throw new ObjectDisposedException("Stream has already been disposed."); | ||
|
||
int numBytesRead = 0; | ||
int dataOffset = offset; | ||
|
||
while (numBytesRead < count) | ||
{ | ||
if (!_qlzBlock.HasMoreData) | ||
{ | ||
var numBytes = _qlzBlock.Read(_stream); | ||
|
||
if (numBytes == -1) | ||
{ | ||
_foundEof = true; | ||
return numBytesRead; | ||
} | ||
|
||
_fileOffset += numBytes; | ||
} | ||
|
||
int copyLength = _qlzBlock.CopyFrom(array, dataOffset, count - numBytesRead); | ||
|
||
dataOffset += copyLength; | ||
numBytesRead += copyLength; | ||
} | ||
|
||
return numBytesRead; | ||
} | ||
|
||
private void ValidateParameters(byte[] array, int offset, int count) | ||
{ | ||
if (array == null) throw new ArgumentNullException(nameof(array)); | ||
if (offset < 0) throw new ArgumentOutOfRangeException(nameof(offset)); | ||
if (count < 0) throw new ArgumentOutOfRangeException(nameof(count)); | ||
if (array.Length - offset < count) throw new ArgumentException("Invalid Argument Offset Count"); | ||
} | ||
|
||
/// <summary> | ||
/// Writes compressed bytes to the underlying stream from the specified byte array. | ||
/// </summary> | ||
/// <param name="array">The buffer that contains the data to compress.</param> | ||
/// <param name="offset">The byte offset in <paramref name="array"/> from which the bytes will be read.</param> | ||
/// <param name="count">The maximum number of bytes to write.</param> | ||
public override void Write(byte[] array, int offset, int count) | ||
{ | ||
if (!_isCompressor) throw new CompressionException("Tried to write data to a decompression BlockGZipStream."); | ||
|
||
ValidateParameters(array, offset, count); | ||
if (_stream == null) throw new ObjectDisposedException($"Stream ({_filePath}) has already been disposed."); | ||
|
||
int numBytesWritten = 0; | ||
int dataOffset = offset; | ||
|
||
while (numBytesWritten < count) | ||
{ | ||
int copyLength = _qlzBlock.CopyTo(array, dataOffset, count - numBytesWritten); | ||
dataOffset += copyLength; | ||
numBytesWritten += copyLength; | ||
|
||
if (_qlzBlock.IsFull) _qlzBlock.Write(_stream); | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.