Skip to content

Commit

Permalink
Initial import of Nirvana 1.4.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Stromberg committed Aug 1, 2016
1 parent fd88eef commit 79a4d63
Show file tree
Hide file tree
Showing 1,755 changed files with 242,951 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,6 @@ paket-files/
# JetBrains Rider
.idea/
*.sln.iml

# .NET Core
project.lock.json
166 changes: 166 additions & 0 deletions CacheUtils/Archive/BioTypeUtilities.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
using System.Collections.Generic;
using ErrorHandling.Exceptions;
using VariantAnnotation.DataStructures;

namespace CacheUtils.Archive
{
public static class BioTypeUtilities
{
#region members

private static readonly Dictionary<string, BioType> StringToBioType = new Dictionary<string, BioType>();

private const string AmbiguousOrfBiotypeKey = "ambiguous_orf";
private const string AntisenseBiotypeKey = "antisense";
private const string AntisenseRnaBiotypeKey = "antisense_RNA";
private const string BidirectionalPromoterLncRnaKey = "bidirectional_promoter_lncrna";
private const string GuideRnaBiotypeKey = "guide_RNA";
private const string IgCGeneBiotypeKey = "IG_C_gene";
private const string IgCPseudoGeneBiotypeKey = "IG_C_pseudogene";
private const string IgDGeneBiotypeKey = "IG_D_gene";
private const string IgJGeneBiotypeKey = "IG_J_gene";
private const string IgJPseudoGeneBiotypeKey = "IG_J_pseudogene";
private const string IgVGeneBiotypeKey = "IG_V_gene";
private const string IgVPseudoGeneBiotypeKey = "IG_V_pseudogene";
private const string LongIntergenicNonCodingRnaBiotypeKey = "lincRNA";
private const string LongNonCodingRnaBiotypeKey = "lncRNA";
private const string MacroLongNonCodingRnaBiotypeKey = "macro_lncRNA";
private const string MessengerRnaBiotypeKey = "mRNA";
private const string MicroRnaBiotypeKey = "miRNA";
private const string MiscRnaBiotypeKey = "misc_RNA";
private const string MitochondrialRibosomalRnaBiotypeKey = "Mt_rRNA";
private const string MitochondrialTransferRnaBiotypeKey = "Mt_tRNA";
private const string NonCodingBiotypeKey = "non_coding";
private const string NonsenseMediatedDecayBiotypeKey = "nonsense_mediated_decay";
private const string NonStopDecayBiotypeKey = "non_stop_decay";
private const string PolymorphicPseudoGene = "polymorphic_pseudogene";
private const string ProcessedPseudoGeneBiotypeKey = "processed_pseudogene";
private const string ProcessedTranscriptBiotypeKey = "processed_transcript";
private const string ProteinCodingBiotypeKey = "protein_coding";
private const string PseudoGeneBiotypeKey = "pseudogene";
private const string RetainedIntronBiotypeKey = "retained_intron";
private const string RetrotransposedBiotypeKey = "retrotransposed";
private const string RibonucleaseMrpBiotypeKey = "RNase_MRP_RNA";
private const string RibonucleasePBiotypeKey = "RNase_P_RNA";
private const string RibosomalRnaBiotypeKey = "rRNA";
private const string RibozymeBiotypeKey = "ribozyme";
private const string SenseIntronicBiotypeKey = "sense_intronic";
private const string SenseOverlappingBiotypeKey = "sense_overlapping";
private const string SmallRnaBiotypeKey = "sRNA";
private const string SmallCytoplasmicRnaBiotypeKey = "scRNA";
private const string SmallCajalBodySpecificRnaBiotypeKey = "scaRNA";
private const string SmallNuclearRnaBiotypeKey = "snRNA";
private const string SmallNucleolarRnaBiotypeKey = "snoRNA";
private const string SignalRecognitionParticleRnaBiotypeKey = "SRP_RNA";
private const string TelomeraseRnaBiotypeKey = "telomerase_RNA";
private const string ThreePrimeOverlappingNcRnaBiotypeKey = "3prime_overlapping_ncrna";
private const string TranscribedProcessedPseudoGeneBiotypeKey = "transcribed_processed_pseudogene";
private const string TranscribedUnitaryPseudoGeneBiotypeKey = "transcribed_unitary_pseudogene";
private const string TranscribedUnprocessedPseudoGeneBiotypeKey = "transcribed_unprocessed_pseudogene";
private const string TranscriptionElongationComplexBiotypeKey = "TEC";
private const string TranslatedProcessedPseudogeneKey = "translated_processed_pseudogene";
private const string TranslatedUnprocessedPseudogeneKey = "translated_unprocessed_pseudogene";
private const string TransferRnaBiotypeKey = "tRNA";
private const string TrCGeneBiotypeKey = "TR_C_gene";
private const string TrDGeneBiotypeKey = "TR_D_gene";
private const string TrJGeneBiotypeKey = "TR_J_gene";
private const string TrJPseudoGeneBiotypeKey = "TR_J_pseudogene";
private const string TrVGeneBiotypeKey = "TR_V_gene";
private const string TrVPseudoGeneBiotypeKey = "TR_V_pseudogene";
private const string UnitaryPseudoGeneBiotypeKey = "unitary_pseudogene";
private const string UnprocessedPseudoGeneBiotypeKey = "unprocessed_pseudogene";
private const string VaultRnaBiotypeKey = "vaultRNA";
private const string YRnaBiotypeKey = "Y_RNA";

#endregion

// constructor
static BioTypeUtilities()
{
AddBioType(AmbiguousOrfBiotypeKey, BioType.AmbiguousOrf);
AddBioType(AntisenseBiotypeKey, BioType.Antisense);
AddBioType(AntisenseRnaBiotypeKey, BioType.AntisenseRNA);
AddBioType(BidirectionalPromoterLncRnaKey, BioType.BidirectionalPromoterLncRNA);
AddBioType(GuideRnaBiotypeKey, BioType.GuideRNA);
AddBioType(IgCGeneBiotypeKey, BioType.IgCGene);
AddBioType(IgCPseudoGeneBiotypeKey, BioType.IgCPseudoGene);
AddBioType(IgDGeneBiotypeKey, BioType.IgDGene);
AddBioType(IgJGeneBiotypeKey, BioType.IgJGene);
AddBioType(IgJPseudoGeneBiotypeKey, BioType.IgJPseudoGene);
AddBioType(IgVGeneBiotypeKey, BioType.IgVGene);
AddBioType(IgVPseudoGeneBiotypeKey, BioType.IgVPseudoGene);
AddBioType(LongIntergenicNonCodingRnaBiotypeKey, BioType.LongIntergenicNonCodingRna);
AddBioType(LongNonCodingRnaBiotypeKey, BioType.lncRNA);
AddBioType(MacroLongNonCodingRnaBiotypeKey, BioType.macroLncRNA);
AddBioType(MessengerRnaBiotypeKey, BioType.mRNA);
AddBioType(MicroRnaBiotypeKey, BioType.miRNA);
AddBioType(MiscRnaBiotypeKey, BioType.RNA);
AddBioType(MitochondrialRibosomalRnaBiotypeKey, BioType.MitochondrialRibosomalRna);
AddBioType(MitochondrialTransferRnaBiotypeKey, BioType.MitochondrialTransferRna);
AddBioType(NonCodingBiotypeKey, BioType.NonCoding);
AddBioType(NonsenseMediatedDecayBiotypeKey, BioType.NonsenseMediatedDecay);
AddBioType(NonStopDecayBiotypeKey, BioType.NonStopDecay);
AddBioType(PolymorphicPseudoGene, BioType.PolymorphicPseudoGene);
AddBioType(ProcessedPseudoGeneBiotypeKey, BioType.ProcessedPseudoGene);
AddBioType(ProcessedTranscriptBiotypeKey, BioType.ProcessedTranscript);
AddBioType(ProteinCodingBiotypeKey, BioType.ProteinCoding);
AddBioType(PseudoGeneBiotypeKey, BioType.PseudoGene);
AddBioType(RetainedIntronBiotypeKey, BioType.RetainedIntron);
AddBioType(RetrotransposedBiotypeKey, BioType.Retrotransposed);
AddBioType(RibonucleaseMrpBiotypeKey, BioType.RibonucleaseMrpRna);
AddBioType(RibonucleasePBiotypeKey, BioType.RibonucleasePRna);
AddBioType(RibosomalRnaBiotypeKey, BioType.RibosomalRna);
AddBioType(RibozymeBiotypeKey, BioType.Ribozyme);
AddBioType(SenseIntronicBiotypeKey, BioType.SenseIntronic);
AddBioType(SenseOverlappingBiotypeKey, BioType.SenseOverlapping);
AddBioType(SignalRecognitionParticleRnaBiotypeKey, BioType.SignalRecognitionParticleRNA);
AddBioType(SmallRnaBiotypeKey, BioType.sRNA);
AddBioType(SmallCytoplasmicRnaBiotypeKey, BioType.scRNA);
AddBioType(SmallCajalBodySpecificRnaBiotypeKey, BioType.scaRNA);
AddBioType(SmallNuclearRnaBiotypeKey, BioType.snRNA);
AddBioType(SmallNucleolarRnaBiotypeKey, BioType.snoRNA);
AddBioType(TelomeraseRnaBiotypeKey, BioType.TelomeraseRNA);
AddBioType(ThreePrimeOverlappingNcRnaBiotypeKey, BioType.ThreePrimeOverlappingNcRna);
AddBioType(TranscribedProcessedPseudoGeneBiotypeKey, BioType.TranscribedProcessedPseudoGene);
AddBioType(TranscribedUnitaryPseudoGeneBiotypeKey, BioType.TranscribedUnitaryPseudoGene);
AddBioType(TranscribedUnprocessedPseudoGeneBiotypeKey, BioType.TranscribedUnprocessedPseudoGene);
AddBioType(TranscriptionElongationComplexBiotypeKey, BioType.TranscriptionElongationComplex);
AddBioType(TransferRnaBiotypeKey, BioType.tRNA);
AddBioType(TranslatedProcessedPseudogeneKey, BioType.TranslatedProcessedPseudogene);
AddBioType(TranslatedUnprocessedPseudogeneKey, BioType.TranslatedUnprocessedPseudogene);
AddBioType(TrCGeneBiotypeKey, BioType.TrCGene);
AddBioType(TrDGeneBiotypeKey, BioType.TrDGene);
AddBioType(TrJGeneBiotypeKey, BioType.TrJGene);
AddBioType(TrJPseudoGeneBiotypeKey, BioType.TrJPseudoGene);
AddBioType(TrVGeneBiotypeKey, BioType.TrVGene);
AddBioType(TrVPseudoGeneBiotypeKey, BioType.TrVPseudoGene);
AddBioType(UnitaryPseudoGeneBiotypeKey, BioType.UnitaryPseudoGene);
AddBioType(UnprocessedPseudoGeneBiotypeKey, BioType.UnprocessedPseudoGene);
AddBioType(VaultRnaBiotypeKey, BioType.VaultRNA);
AddBioType(YRnaBiotypeKey, BioType.YRNA);
}

/// <summary>
/// adds the biotype to both dictionaries
/// </summary>
private static void AddBioType(string s, BioType bioType)
{
StringToBioType[s] = bioType;
}

/// <summary>
/// returns the biotype given the string representation
/// </summary>
public static BioType GetBiotypeFromString(string s)
{
BioType ret;
if (!StringToBioType.TryGetValue(s, out ret))
{
throw new GeneralException($"Unable to find the specified BioType ({s}) in the BioType dictionary.");
}

return ret;
}
}

}
188 changes: 188 additions & 0 deletions CacheUtils/Archive/BlockQuickLzStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
using System;
using System.IO;
using System.IO.Compression;
using ErrorHandling.Exceptions;
using VariantAnnotation.Compression;
using VariantAnnotation.Utilities;

namespace CacheUtils.Archive
{
public class BlockQuickLzStream : Stream
{
#region members

private readonly bool _isCompressor;
private readonly bool _leaveStreamOpen;

private Stream _stream;

private readonly string _filePath;
private readonly QuickLzBlock _qlzBlock;
private long _fileOffset;
private bool _foundEof;

#endregion

#region Stream

public override bool CanRead => _stream != null && _stream.CanRead;

public override bool CanWrite => _stream != null && _stream.CanWrite;

public override bool CanSeek => _stream != null && _stream.CanSeek;

public override long Length
{
get { throw new NotSupportedException(); }
}

public override long Position
{
get { return _fileOffset + _qlzBlock.Offset; }
set { throw new NotSupportedException(); }
}

public override long Seek(long offset, SeekOrigin origin)
{
throw new NotSupportedException();
}

public override void SetLength(long value)
{
throw new NotSupportedException();
}

public override void Flush()
{
if (_stream == null) throw new ObjectDisposedException($"BlockQuickLzStream ({_filePath}) has already been disposed.");
_stream?.Flush();
}

protected override void Dispose(bool disposing)
{
try
{
if (disposing && _stream != null)
{
if (_isCompressor)
{
if (!_qlzBlock.IsFull) _qlzBlock.Write(_stream);
_qlzBlock.WriteEof(_stream);
}

if (!_leaveStreamOpen)
{
_stream.Dispose();
_stream = null;
}
}
}
finally
{
base.Dispose(disposing);
}
}

#endregion

/// <summary>
/// constructor
/// </summary>
/// <param name="stream">The stream to compress or decompress.</param>
/// <param name="compressionMode">One of the enumeration values that indicates whether to compress or decompress the stream.</param>
/// <param name="leaveStreamOpen">true to leave the stream open after disposing the object; otherwise, false.</param>
public BlockQuickLzStream(Stream stream, CompressionMode compressionMode, bool leaveStreamOpen = false)
{
if (stream == null) throw new ArgumentNullException(nameof(stream));

if ((compressionMode != CompressionMode.Compress) && (compressionMode != CompressionMode.Decompress))
throw new ArgumentOutOfRangeException(nameof(compressionMode));

_stream = stream;
_isCompressor = compressionMode == CompressionMode.Compress;
_leaveStreamOpen = leaveStreamOpen;
_filePath = FileUtilities.GetPath(stream);
_qlzBlock = new QuickLzBlock();

// sanity check: make sure we can use the stream for reading or writing
if (_isCompressor && !_stream.CanWrite) throw new ArgumentException("A stream lacking write capability was provided to the block GZip compressor.");
if (!_isCompressor && !_stream.CanRead) throw new ArgumentException("A stream lacking read capability was provided to the block GZip decompressor.");
}

/// <summary>
/// Reads a number of decompressed bytes into the specified byte array.
/// </summary>
/// <param name="array">The array to store decompressed bytes.</param>
/// <param name="offset">The byte offset in <paramref name="array"/> at which the read bytes will be placed.</param>
/// <param name="count">The maximum number of decompressed bytes to read.</param>
/// <returns>The number of bytes that were read into the byte array.</returns>
public override int Read(byte[] array, int offset, int count)
{
if (_foundEof) return 0;
if (_isCompressor) throw new CompressionException("Tried to read data from a compression BlockGZipStream.");

ValidateParameters(array, offset, count);
if(_stream == null) throw new ObjectDisposedException("Stream has already been disposed.");

int numBytesRead = 0;
int dataOffset = offset;

while (numBytesRead < count)
{
if (!_qlzBlock.HasMoreData)
{
var numBytes = _qlzBlock.Read(_stream);

if (numBytes == -1)
{
_foundEof = true;
return numBytesRead;
}

_fileOffset += numBytes;
}

int copyLength = _qlzBlock.CopyFrom(array, dataOffset, count - numBytesRead);

dataOffset += copyLength;
numBytesRead += copyLength;
}

return numBytesRead;
}

private void ValidateParameters(byte[] array, int offset, int count)
{
if (array == null) throw new ArgumentNullException(nameof(array));
if (offset < 0) throw new ArgumentOutOfRangeException(nameof(offset));
if (count < 0) throw new ArgumentOutOfRangeException(nameof(count));
if (array.Length - offset < count) throw new ArgumentException("Invalid Argument Offset Count");
}

/// <summary>
/// Writes compressed bytes to the underlying stream from the specified byte array.
/// </summary>
/// <param name="array">The buffer that contains the data to compress.</param>
/// <param name="offset">The byte offset in <paramref name="array"/> from which the bytes will be read.</param>
/// <param name="count">The maximum number of bytes to write.</param>
public override void Write(byte[] array, int offset, int count)
{
if (!_isCompressor) throw new CompressionException("Tried to write data to a decompression BlockGZipStream.");

ValidateParameters(array, offset, count);
if (_stream == null) throw new ObjectDisposedException($"Stream ({_filePath}) has already been disposed.");

int numBytesWritten = 0;
int dataOffset = offset;

while (numBytesWritten < count)
{
int copyLength = _qlzBlock.CopyTo(array, dataOffset, count - numBytesWritten);
dataOffset += copyLength;
numBytesWritten += copyLength;

if (_qlzBlock.IsFull) _qlzBlock.Write(_stream);
}
}
}
}
Loading

0 comments on commit 79a4d63

Please sign in to comment.