Skip to content

Commit

Permalink
link to github
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaron Quinlan committed Aug 15, 2012
1 parent 554924b commit 0598b75
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 60 deletions.
81 changes: 33 additions & 48 deletions bioinformatics.bib
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
@BOOK{ostell2001,
AUTHOR = {Ostell, J. and Wheeln, A. and Kans, J.},
AUTHOR = {Ostell, J. and others},
TITLE = {Bioinformatics: A Practical Guide to the Analysis of Genes and
Proteins},
PUBLISHER = {Wiley Interscience},
Expand All @@ -8,16 +8,15 @@ @BOOK{ostell2001
}

@BOOK{kirk2010,
AUTHOR = {D. Kirk and W. Hwu},
AUTHOR = {Kirk, D. and Hwu, W.},
TITLE = {Programming Massively Parallel Processors: A Hands-On Approach},
PUBLISHER = {Elsevier},
YEAR = {2010},
}


@ARTICLE{aparicio2004,
AUTHOR = {Aparicio, O. and Geisberg, J. V. and Sekinger, E. and Yang, A.
and Moqtaderi, Z. and Struhl, K.},
AUTHOR = {Aparicio, O. and others},
TITLE = {Chromatin immunoprecipitation for determining the association of
proteins with specific genomic sequences in vivo.},
YEAR = {2005},
Expand All @@ -27,8 +26,7 @@ @ARTICLE{aparicio2004
}

@ARTICLE{johnson2007,
AUTHOR = {Johnson, D. S. and Mortazavi, A. and Myers, R. M. and
Wold, B.},
AUTHOR = {Johnson, D. S. and others},
TITLE = {Genome-wide mapping of in vivo protein-{DNA} interactions.},
YEAR = {2007},
JOURNAL = {Science},
Expand Down Expand Up @@ -61,9 +59,7 @@ @ARTICLE{alekseyenko2007
}

@ARTICLE{kent2002,
AUTHOR = {Kent, W. J. and Sugnet, C. W. and Furey, T. S. and
Roskin, K. M. and Pringle, T. H. and Zahler, A. M. and
Haussler, A. D.},
AUTHOR = {Kent, W. J. and others},
TITLE = {The Human Genome Browser at {UCSC}},
YEAR = {2002},
JOURNAL = {Genome Research},
Expand All @@ -85,8 +81,7 @@ @INPROCEEDINGS{ben-or1983
}
@INPROCEEDINGS{goodrich1993,
AUTHOR = {Goodrich, M. T. and Tsay, J.-J. and Vengroff, D. E. and
Vitter, J. S.},
AUTHOR = {Goodrich, M. T. and others},
TITLE = {External-memory computational geometry},
BOOKTITLE = {Proceedings of the 1993 {IEEE} 34th Annual Foundations of
Computer Science},
Expand All @@ -97,7 +92,7 @@ @INPROCEEDINGS{goodrich1993
}
@INPROCEEDINGS{kriegel1991,
AUTHOR = {Kriegel, H.-P. and Brinkhoff, T. and Schneider, R.},
AUTHOR = {Kriegel, H. P. and others},
TITLE = {The Combination of Spatial Access Methods and Computational
Geometry in Geographic Database Systems},
BOOKTITLE = {Data Structures and Efficient Algorithms, Final Report on
Expand All @@ -109,7 +104,7 @@ @INPROCEEDINGS{kriegel1991
}

@INPROCEEDINGS{mckenney2009,
AUTHOR = {McKenney, Mark and McGuire, Tynan},
AUTHOR = {McKenney, M. and McGuire, T.},
TITLE = {A parallel plane sweep algorithm for multi-core systems},
BOOKTITLE = {Proceedings of the 17th ACM SIGSPATIAL International
Conference on Advances in Geographic Information Systems},
Expand Down Expand Up @@ -142,8 +137,7 @@ @ARTICLE{merrill2011
}

@ARTICLE{robinson2011,
AUTHOR = {J. T. Robinson and H. Thorvaldsdóttir and W. Winckler and
M. Guttman and E. S. Lander and G. Getz and J. P. Mesirov},
AUTHOR = {Robinson, J. T. and others},
TITLE = {Integrative Genomics Viewer},
YEAR = {2011},
JOURNAL = {Nature Biotechnology},
Expand All @@ -152,18 +146,16 @@ @ARTICLE{robinson2011
}

@ARTICLE{li2009,
AUTHOR = {H. Li and B. Handsaker and A. Wysoker and T. Fennell and J. Ruan
and N. Homer and G. Marth and G. Abecasis and R. Durbin and 1000
Genome Project Data Processing Subgroup},
TITLE = {The Sequence alignment/map {(SAM)} format and SAMtools},
AUTHOR = {Li, H. and others},
TITLE = {The Sequence alignment/map {(SAM)} format and {SAMtools}},
YEAR = {2009},
JOURNAL = {Bioinformatics},
VOLUME = {25},
PAGES = {2078--2049}
}

@ARTICLE{li2011,
AUTHOR = {H. Li},
AUTHOR = {Li, H.},
TITLE = {Tabix: Fast Retrieval of Sequence Features from Generic
{TAB}-delimited Files},
YEAR = {2011},
Expand All @@ -173,10 +165,7 @@ @ARTICLE{li2011
}

@ARTICLE{neph2012,
AUTHOR = {S. Neph and M. S. Kuehn and A. P. Reynolds and E. Haugen and R.
E. Thurman and A. K. Johnson and E. Rynes and M. T. Maurano and J.
Vierstra and S. Thomas and R. Sandstrom and R. Humbert and J. A.
Stamatoyannopoulos},
AUTHOR = {Neph, S. and others},
TITLE = {{BEDOPS}: High performance genomic feature operations},
YEAR = {2012},
JOURNAL = {Bioinformatics},
Expand All @@ -185,11 +174,9 @@ @ARTICLE{neph2012
}

@ARTICLE{mckenna2010,
AUTHOR = { A. McKenna and M. Hanna and E. Banks and A. Sivachenko and K.
Cibulskis and A. Kernytsky and K. Garimella and D. Altshuler and S.
Gabriel and M. Daly and M. A. DePristo},
AUTHOR = { McKenna, A. and others},
TITLE = {The Genome Analysis Toolkit: a MapReduce framework for analyzing
next-generation DNA sequencing data},
next-generation {DNA} sequencing data},
YEAR = {2010},
JOURNAL = {Genome Research},
MONTH = {Sep},
Expand All @@ -199,10 +186,7 @@ @ARTICLE{mckenna2010
}

@ARTICLE{giardine2005,
AUTHOR = {B. Giardine and C. Riemer and R. C. Hardison and R. Burhans and
L. Elnitski L and P. Shah and Y. Zhang and D. Blankenberg and
I. Albert and J. Taylor and W. Miller and W. J. Kent and
A. Nekrutenko},
AUTHOR = {Giardine, B. and others},
TITLE = {Galaxy: a platform for interactive large-scale genome analysis},
YEAR = {2005},
JOURNAL = {Genome Research},
Expand All @@ -213,16 +197,16 @@ @ARTICLE{giardine2005
}

@ARTICLE{richardson2006,
AUTHOR = {J. E. Richardson},
TITLE = {Fjoin: simple and efficient computation of feature overlaps},
AUTHOR = { Richardson, J. E.},
TITLE = {{fjoin}: simple and efficient computation of feature overlaps},
YEAR = {2006},
JOURNAL = {Journal of Computational Biology},
VOLUME = {13},
PAGES = {1457--1464}
}

@ARTICLE{misra1982,
AUTHOR = {J. Mirsa and D. Gries},
AUTHOR = {Mirsa, J. and Gries, D.},
TITLE = {Finding Repeated Elements},
YEAR = {1982},
JOURNAL = {Science of Computer Programming},
Expand All @@ -231,30 +215,31 @@ @ARTICLE{misra1982
}

@ARTICLE{favorov2012,
AUTHOR = {A. Favorov, L. Mularoni, et al},
AUTHOR = {Favorov, A. and others},
TITLE = {Exploring Massive, Genome Scale Datasets with the
GenometriCorr Package},
{GenometriCorr} Package},
YEAR = {2012},
JOURNAL = {PLoS Comput Biol},
VOLUME = {8},
NUMBER = {5}
}

@ARTICLE{encode2007,
AUTHOR = {ENCODE Project Consortium, E. Birney, et al},
TITLE = {Identification and analysis of functional elements in 1% of
the human genome by the ENCODE pilot project},
AUTHOR = {{ENCODE Project Consortium}},
TITLE = {Identification and analysis of functional elements in 1\% of
the human genome by the {ENCODE} pilot project},
YEAR = {2007},
JOURNAL = {Nature},
VOLUME = {447},
NUMBER = {7146},
PAGES = {799--816}
}


@ARTICLE{gerstein2010,
AUTHOR = {M. B. Gerstein, Z. J. Lu, et al},
TITLE = {Integrative Analysis of the Caenorhabditis elegans Genome by
the modENCODE Project},
AUTHOR = {Gerstein, M. B. and others},
TITLE = {Integrative Analysis of the \emph{Caenorhabditis elegans} Genome by
the {modENCODE} Project},
YEAR = {2010},
JOURNAL = {Science},
VOLUME = {330},
Expand All @@ -263,7 +248,7 @@ @ARTICLE{gerstein2010
}

@ARTICLE{durbin2010,
AUTHOR = {R. M. Durbin, D. L. Altschuler, et al},
AUTHOR = {{The 1000 Genomes Project Consortium}},
TITLE = {A map of human genome variation from population-scale
sequencing},
YEAR = {2010},
Expand All @@ -274,8 +259,8 @@ @ARTICLE{durbin2010
}

@INPROCEEDINGS{satish2009,
AUTHOR = {N. Satish and M. Harris and M. Garland},
TITLE = {Designing efficient sorting algorithms for manycore GPUs},
AUTHOR = {Satish, N. and others},
TITLE = {Designing efficient sorting algorithms for manycore {GPU}s},
BOOKTITLE = {International Symposium on Parallel and Distributed Processing,
2009},
SERIES = {IPDPS '09},
Expand All @@ -285,8 +270,8 @@ @INPROCEEDINGS{satish2009
}

@INPROCEEDINGS{tzeng2008,
AUTHOR = {S. Tzeng and L.Y. Wei},
TITLE = {Parallel white noise generation on a GPU via cryptographic
AUTHOR = {Tzeng, S. and Wei, L. Y.},
TITLE = {Parallel white noise generation on a {GPU} via cryptographic
hash},
BOOKTITLE = {Proceedings of the 2008 Symposium on Interactive 3D Graphics
and Games},
Expand Down
24 changes: 12 additions & 12 deletions bioinformatics.tex
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ \section{Results:}
relationships between sets of genomic intervals.

\section{Availability:}
\href{http://bedtools.googlecode.com}{http://bedtools.googlecode.com}
\href{https://github.com/arq5x/bits}{https://github.com/arq5x/bits}

\section{Contact:} [email protected]
\end{abstract}
Expand Down Expand Up @@ -378,13 +378,13 @@ \subsection{Binary Interval Search (BITS) Algorithm}
integer lists $B_S = [b_1.start, b_2.start, \dots, b_M.start]$ and $B_E =
[b_1.end, b_2.end, \dots, b_M.end]$, which are each sorted numerically in
ascending order. Next, two binary searches are performed,
$last=\textsc{ BSearch}(B_E, a_i.start)$ and
$first=\textsc{ BSearch}(B_S, a_i.end)$. Since $B_E$ is a sorted list of each
interval end coordinate in $B$, the elements less than or equal to $last$ in
$B_E$ correspond to the set of intervals in $B$ that end \emph{before} $a_i$
starts (i.e., to the ``left'' of $a_i$). Similarly, the elements greater than
or equal to $first$ in $B_S$ correspond to the set of intervals in $B$ that
start \emph{after} $a_i$ ends (i.e., to the ``right'' of $a_i$). From these two
$last=\textsc{ BSearch}(B_E, a_i.start)$ and $first=\textsc{ BSearch}(B_S,
a_i.end)$. Since $B_E$ is a sorted list of each interval end coordinate in $B$,
the elements with indices less than or equal to $last$ in $B_E$ correspond to
the set of intervals in $B$ that end \emph{before} $a_i$ starts (i.e., to the
``left'' of $a_i$). Similarly, the elements with indices greater than or equal
to $first$ in $B_S$ correspond to the set of intervals in $B$ that start
\emph{after} $a_i$ ends (i.e., to the ``right'' of $a_i$). From these two
values, we can directly infer the size of the intersection set
$\mathcal{I}(B,a_i)$ (i.e., the \emph{count} of intersections in $B$ for $a_i$):
\vspace{-.75em}
Expand Down Expand Up @@ -435,8 +435,8 @@ \subsection{Binary Interval Search (BITS) Algorithm}
\BlankLine
\textbf{Function} \textsc{Counter}$(A,B)$
\Begin {
$B_S \gets [b_1.start, \dots, b_{|B|}.start]$;
$B_E \gets [b_1.end, \dots, b_{|B|}.end]$;
$B_S \gets [b_1.start, \dots, b_{|B|}.start]$\;
$B_E \gets [b_1.end, \dots, b_{|B|}.end]$\;
\textsc{Sort}($B_S$)\;
\textsc{Sort}($B_E$)\;
$c \gets 0$\;
Expand Down Expand Up @@ -1042,7 +1042,7 @@ \subsection{Uncovering novel genomic relationships.}
computational burden made feasible by the facility with which
the BITS algorithm could be applied to GPU architectures. Indeed, each
iteration of our Monte Carlo simulation tested for
intersections among 4,425,582,168 intervals among the 25,281 datasets,
intersections among 4 billion intervals among the 25 thousand datasets,
yielding over 44 trillion comparisons for the entire simulation. Whereas
this simulation took just over 6 days (9,069 minutes) on a single
computer with one GPU card, we estimate that it would take at least
Expand All @@ -1066,7 +1066,7 @@ \subsection{Uncovering novel genomic relationships.}
% CONCLUSION
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\vspace{-2em}
\section{Conclusion}
We have developed a novel algorithm for interval intersection that
is uniquely suited to scalable computing architectures such as GPUs.
Expand Down

0 comments on commit 0598b75

Please sign in to comment.