From fa4175c8c3b9f81249ef663cb59f9a096774d169 Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 08:20:51 +0200 Subject: [PATCH 01/29] Added references for construct_lcp algorithms --- extras/literature.bib | 70 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/extras/literature.bib b/extras/literature.bib index 599ae93cc..7f9f93046 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -24,6 +24,76 @@ @inproceedings{KAR:MAN:PUG:2009 bibsource = {DBLP, http://dblp.uni-trier.de} } +// used in include/sdsl/construct_lcp.hpp +@inproceedings{KAS:LEE:ARI:ARI:PAR:2001, +author = {Toru Kasai and Gunho Lee and Hiroki Arimura and Setsuo Arikawa and Kunsoo Park}, +title = {Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its Applications}, +booktitle = {Proceedings of the 12th Annual Symposium on Combinatorial Pattern Matching, + (CPM 2001)}, +year = {2001}, +pages = {181-192}, +ee = {http://link.springer.de/link/service/series/0558/bibs/2089/20890181.htm}, +crossref = {DBLP:conf/cpm/2001}, +bibsource = {DBLP, http://dblp.uni-trier.de} +} + +// used in include/sdsl/construct_lcp.hpp +@inproceedings{GOG:OHL:2011, +author = {Simon Gog and Enno Ohlebusch}, +title = {Fast and Lightweight LCP-Array Construction Algorithms}, +booktitle = {Proceedings of the 13th Workshop on Algorithm Engineering and Experiments, + (ALENEX 2011)}, +year = {2011}, +pages = {25-34}, +ee = {http://www.siam.org/proceedings/alenex/2011/alx11_03_gogs.pdf}, +crossref = {DBLP:conf/alenex/2011}, +bibsource = {DBLP, http://dblp.uni-trier.de} +} + +// used in include/sdsl/construct_lcp.hpp +@article{GOG:OHL:2010, +author = {Simon Gog and Enno Ohlebusch}, +title = {Lightweight LCP-Array Construction in Linear Time}, +journal = {CoRR}, +volume = {abs/1012.4263}, +year = {2010}, +ee = {http://arxiv.org/abs/1012.4263}, +bibsource = {DBLP, http://dblp.uni-trier.de} +} + +// used in include/sdsl/construct_lcp.hpp +@inproceedings{BEL:GOG:OHL:SCH:2011, +author = {Timo Beller and Simon Gog and Enno Ohlebusch and Thomas Schnattinger}, +title = {Computing the Longest Common Prefix Array Based on the Burrows-Wheeler Transform}, +booktitle = {Proceedings of String Processing and Information Retrieval, 18th International + Symposium, (SPIRE 2011)}, +year = {2011}, +pages = {197-208}, +ee = {http://dx.doi.org/10.1007/978-3-642-24583-1_20}, +crossref = {DBLP:conf/spire/2011}, +bibsource = {DBLP, http://dblp.uni-trier.de} +} + +// used in include/sdsl/construct_lcp.hpp +@article{BEL:GOG:OHL:SCH:2013, +author = {Timo Beller and Simon Gog and Enno Ohlebusch and Thomas Schnattinger}, +title = {Computing the Longest Common Prefix Array Based on the Burrows-Wheeler Transform}, +journal = {Journal of Discrete Algorithms}, +issue_date= {January, 2013}, +volume = {18}, +number = {0}, +month = {January}, +year = {2013}, +issn = {1570-8667}, +pages = {22-31}, +numpages = {10}, +url = {http://dx.doi.org/10.1016/j.jda.2012.07.007}, +doi = {10.1016/j.jda.2012.07.007}, +acmid = {2428912}, +publisher = {Elsevier Science Publishers B. V.}, +address = {Amsterdam, The Netherlands, The Netherlands}, +} + // used in include/sdsl/rrr_vector.hpp @inproceedings{RAM:RAM:RAO:2002, author = {Rajeev Raman and Venkatesh Raman and S. Srinivasa Rao}, From fdcb68b1109446e396880a8e6151bbd79934165d Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 08:24:33 +0200 Subject: [PATCH 02/29] Formating --- extras/literature.bib | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/extras/literature.bib b/extras/literature.bib index 7f9f93046..d1b8cfde3 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -314,20 +314,20 @@ @inproceedings{OHL:FIS:GOG:2010 // used in tutorial/document_listing/document_listing_sada.hpp @article{SAD:JDA:2007, -author = {Sadakane, Kunihiko}, -title = {Succinct data structures for flexible text retrieval systems}, -journal = {Journal of Discrete Algorithms}, +author = {Sadakane, Kunihiko}, +title = {Succinct data structures for flexible text retrieval systems}, +journal = {Journal of Discrete Algorithms}, issue_date = {March, 2007}, -volume = {5}, -number = {1}, -month = {March}, -year = {2007}, -issn = {1570-8667}, -pages = {12--22}, -numpages = {11}, -url = {http://dx.doi.org/10.1016/j.jda.2006.03.011}, -doi = {10.1016/j.jda.2006.03.011}, -acmid = {1224678}, -publisher = {Elsevier Science Publishers B. V.}, -address = {Amsterdam, The Netherlands, The Netherlands}, +volume = {5}, +number = {1}, +month = {March}, +year = {2007}, +issn = {1570-8667}, +pages = {12--22}, +numpages = {11}, +url = {http://dx.doi.org/10.1016/j.jda.2006.03.011}, +doi = {10.1016/j.jda.2006.03.011}, +acmid = {1224678}, +publisher = {Elsevier Science Publishers B. V.}, +address = {Amsterdam, The Netherlands, The Netherlands}, } From 87343f39e86ca544f8bb866051d2c053909dc479 Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 08:39:15 +0200 Subject: [PATCH 03/29] Fixed some inconsistency --- extras/literature.bib | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/extras/literature.bib b/extras/literature.bib index d1b8cfde3..5f49dd4ee 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -98,7 +98,7 @@ @article{BEL:GOG:OHL:SCH:2013 @inproceedings{RAM:RAM:RAO:2002, author = {Rajeev Raman and Venkatesh Raman and S. Srinivasa Rao}, title = {Succinct indexable dictionaries with applications to encoding k-ary trees and multisets}, -booktitle = {Proceedings of the Thirteenth Annual ACM-SIAM Symposium +booktitle = {Proceedings of the 13th Annual ACM-SIAM Symposium on Discrete Algorithms (SODA 2002)}, year = {2002}, pages = {233-242}, @@ -160,7 +160,7 @@ @inproceedings{VIG:2008 @inproceedings{GRO:VIT:2003, author = {Roberto Grossi and Ankur Gupta and Jeffrey Scott Vitter}, title = {High-order entropy-compressed text indexes}, -booktitle = {Proceedings of the Fourteenth Annual ACM-SIAM Symposium +booktitle = {Proceedings of the 14th Annual ACM-SIAM Symposium on Discrete Algorithms (SODA 2003)}, year = {2003}, pages = {841-850}, @@ -172,7 +172,7 @@ @inproceedings{GRO:VIT:2003 @inproceedings{MAK:NAV:2005, author = {Veli M{\"a}kinen and Gonzalo Navarro}, title = {Succinct Suffix Arrays Based on Run-Length Encoding}, -booktitle = {Combinatorial Pattern Matching, 16th Annual Symposium, +booktitle = {Proceedings of the 16th Annual Symposium on Combinatorial Pattern Matching, (CPM 2005)}, year = {2005}, pages = {45-56}, @@ -196,9 +196,9 @@ @inproceedings{CLA:NAV:2008 @inproceedings{BRI:LAD:NAV:2009, author = {Nieves R. Brisaboa and Susana Ladra and Gonzalo Navarro}, title = {Directly Addressable Variable-Length Codes}, -booktitle = {SPIRE}, -year = {Proceedings of String Processing and Information Retrieval, 16th International +booktitle = {Proceedings of String Processing and Information Retrieval, 16th International Symposium, (SPIRE 2009)}, +year = {2009}, pages = {122-130}, ee = {http://dx.doi.org/10.1007/978-3-642-03784-9_12}, bibsource = {DBLP, http://dblp.uni-trier.de} @@ -209,7 +209,7 @@ @inproceedings{SAD:2002 author = {Kunihiko Sadakane}, title = {Succinct representations of {LCP} information and improvements in the compressed suffix arrays}, -booktitle = {Proceedings of the Thirteenth Annual ACM-SIAM Symposium +booktitle = {Proceedings of the 13th Annual ACM-SIAM Symposium on Discrete Algorithms (SODA 2002)}, year = {2002}, pages = {225-232}, @@ -220,9 +220,9 @@ @inproceedings{SAD:2002 // used in include/sdsl/bp_support_sada.hpp @techreport{SAD:2008, author = {Kunihiko Sadakane}, -title = {The Ultimate Balanced Parentheses}, +title = {The Ultimate Balanced Parentheses}, institution = {Dept. of Computer Science and Communication Engineering, - Kyushu University, Japan}, + Kyushu University, Japan}, year = {2008} } @@ -242,8 +242,8 @@ @inproceedings{GOG:FIS:2010 @inproceedings{NAV:PRO:2012, author = {Gonzalo Navarro and Eliana Providel}, title = {Fast, Small, Simple Rank/Select on Bitmaps}, -booktitle = {Proceedings of the - 11th International Symposium on Experimental Algorithms (SEA 2013)}, +booktitle = {Proceedings of the 11th International Symposium on Experimental Algorithms + (SEA 2013)}, year = {2012}, pages = {295-306}, ee = {http://dx.doi.org/10.1007/978-3-642-30850-5_26}, @@ -254,7 +254,7 @@ @inproceedings{NAV:PRO:2012 @article{LAR:SAD:2007, author = {N. Jesper Larsson and Kunihiko Sadakane}, title = {Faster suffix sorting}, -journal = {Theor. Comput. Sci.}, +journal = {Theoretical Computer Science}, volume = {387}, number = {3}, year = {2007}, @@ -267,8 +267,8 @@ @article{LAR:SAD:2007 @inproceedings{OKA:SAD:2007, author = {Daisuke Okanohara and Kunihiko Sadakane}, title = {Practical Entropy-Compressed Rank/Select Dictionary}, -booktitle = {Proceedings of the Nine Workshop on Algorithm Engineering - and Experiments (ALENEX 2007)}, +booktitle = {Proceedings of the 9th Workshop on Algorithm Engineering and Experiments + (ALENEX 2007)}, year = {2007}, ee = {http://www.siam.org/proceedings/alenex/2007/alx07_007okanoharad2.pdf}, bibsource = {DBLP, http://dblp.uni-trier.de} @@ -291,7 +291,7 @@ @article{SAD:2003 @article{SAD:2007, author = {Kunihiko Sadakane}, title = {Compressed Suffix Trees with Full Functionality}, -journal = {Theory Comput. Syst.}, +journal = {Theory of Computing Systems}, volume = {41}, number = {4}, year = {2007}, @@ -304,7 +304,7 @@ @article{SAD:2007 @inproceedings{OHL:FIS:GOG:2010, author = {Enno Ohlebusch and Johannes Fischer and Simon Gog}, title = {CST++}, -booktitle = {Proceedings of String Processing and Information Retrieval - 17th International +booktitle = {Proceedings of String Processing and Information Retrieval, 17th International Symposium, (SPIRE 2010)}, year = {2010}, pages = {322-333}, @@ -323,7 +323,7 @@ @article{SAD:JDA:2007 month = {March}, year = {2007}, issn = {1570-8667}, -pages = {12--22}, +pages = {12-22}, numpages = {11}, url = {http://dx.doi.org/10.1016/j.jda.2006.03.011}, doi = {10.1016/j.jda.2006.03.011}, From ffbae54c3c909c7b4683a32179424cee49f72a89 Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 08:47:03 +0200 Subject: [PATCH 04/29] Formating --- extras/literature.bib | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/extras/literature.bib b/extras/literature.bib index 5f49dd4ee..d52f466e8 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -76,22 +76,22 @@ @inproceedings{BEL:GOG:OHL:SCH:2011 // used in include/sdsl/construct_lcp.hpp @article{BEL:GOG:OHL:SCH:2013, -author = {Timo Beller and Simon Gog and Enno Ohlebusch and Thomas Schnattinger}, -title = {Computing the Longest Common Prefix Array Based on the Burrows-Wheeler Transform}, -journal = {Journal of Discrete Algorithms}, -issue_date= {January, 2013}, -volume = {18}, -number = {0}, -month = {January}, -year = {2013}, -issn = {1570-8667}, -pages = {22-31}, -numpages = {10}, -url = {http://dx.doi.org/10.1016/j.jda.2012.07.007}, -doi = {10.1016/j.jda.2012.07.007}, -acmid = {2428912}, -publisher = {Elsevier Science Publishers B. V.}, -address = {Amsterdam, The Netherlands, The Netherlands}, +author = {Timo Beller and Simon Gog and Enno Ohlebusch and Thomas Schnattinger}, +title = {Computing the Longest Common Prefix Array Based on the Burrows-Wheeler Transform}, +journal = {Journal of Discrete Algorithms}, +issue_date = {January, 2013}, +volume = {18}, +number = {0}, +month = {January}, +year = {2013}, +issn = {1570-8667}, +pages = {22-31}, +numpages = {10}, +url = {http://dx.doi.org/10.1016/j.jda.2012.07.007}, +doi = {10.1016/j.jda.2012.07.007}, +acmid = {2428912}, +publisher = {Elsevier Science Publishers B. V.}, +address = {Amsterdam, The Netherlands, The Netherlands}, } // used in include/sdsl/rrr_vector.hpp @@ -314,7 +314,7 @@ @inproceedings{OHL:FIS:GOG:2010 // used in tutorial/document_listing/document_listing_sada.hpp @article{SAD:JDA:2007, -author = {Sadakane, Kunihiko}, +author = {Kunihiko Sadakane}, title = {Succinct data structures for flexible text retrieval systems}, journal = {Journal of Discrete Algorithms}, issue_date = {March, 2007}, From 6a82107213396ddc9c6ae19885c461b81b150a75 Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 13:28:51 +0200 Subject: [PATCH 05/29] Added more references ... --- extras/literature.bib | 154 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 153 insertions(+), 1 deletion(-) diff --git a/extras/literature.bib b/extras/literature.bib index d52f466e8..ad5000807 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -330,4 +330,156 @@ @article{SAD:JDA:2007 acmid = {1224678}, publisher = {Elsevier Science Publishers B. V.}, address = {Amsterdam, The Netherlands, The Netherlands}, -} +} + +// used in include/sdsl/lcp_dac.hpp +@article{TRA:SAN:2010, +author = {Frederik Transier and Peter Sanders}, +title = {Engineering Basic Algorithms of an In-Memory Text Search Engine}, +journal = {ACM Transactions on Information Systems}, +issue_date = {December 2010}, +volume = {29}, +number = {1}, +month = {December}, +year = {2010}, +issn = {1046-8188}, +pages = {2:1-2:37}, +articleno = {2}, +numpages = {37}, +url = {http://doi.acm.org/10.1145/1877766.1877768}, +doi = {10.1145/1877766.1877768}, +acmid = {1877768}, +publisher = {ACM}, +address = {New York, NY, USA}, +} + +// used in include/sdsl/lcp_dac.hpp +@article{WIL:ZOB:1999, +author = {Hugh E. Williams and Justin Zobel}, +title = {Compressing Integers for Fast File Access}, +journal = {The Computer Journal} +volume = {42}, +number = {3}, +year = {1999}, +pages = {193-201}, +url = {http://comjnl.oxfordjournals.org/content/42/3/193.abstract}, +doi = {10.1093/comjnl/42.3.193}, +} + +// used in include/sdsl/sd_vector.hpp +@article{ELI:1974, +author = {Elias, Peter}, +title = {Efficient Storage and Retrieval by Content and Address of Static Files}, +journal = {Journal of the ACM}, +issue_date = {April 1974}, +volume = {21}, +number = {2}, +month = {April}, +year = {1974}, +issn = {0004-5411}, +pages = {246-260}, +numpages = {15}, +url = {http://doi.acm.org/10.1145/321812.321820}, +doi = {10.1145/321812.321820}, +acmid = {321820}, +publisher = {ACM}, +address = {New York, NY, USA}, +} + +// used in include/sdsl/sd_vector.hpp +@book{FAN:1971, +title = {On the Number of Bits Required to Implement an Associative Memory}, +author = {Robert Mario Fano}, +series = {Computation Structures Group Memo}, +year = {1971}, +publisher = {Massachusetts Institute of Technology, Project MAC}, +} + +// used in include/sdsl/bp_support_g.hpp +// used in include/sdsl/bp_support_gg.hpp +@inproceedings{GEA:RAH:RAM:RAM:2004, +author = {Richard F. Geary and Naila Rahman and Rajeev Raman and Venkatesh Raman}, +title = {A Simple Optimal Representation for Balanced Parentheses}, +booktitle = {Proceedings of the 15th Annual Symposium on Combinatorial Pattern Matching, + (CPM 2004)}, +year = {2004}, +pages = {159-172}, +ee = {http://dx.doi.org/10.1007/978-3-540-27801-6_12}, +crossref = {DBLP:conf/cpm/2004}, +bibsource = {DBLP, http://dblp.uni-trier.de}, +} + +// used in include/sdsl/suffix_array_algorithm.hpp +@inproceedings{FER:MAN:2000, +author = {Paolo Ferragina and Giovanni Manzini}, +title = {Opportunistic Data Structures with Applications}, +booktitle = {Proceedings of the 41st Annual Symposium on Foundations of Computer Science, + (FOCS 2000)}, +year = {2000}, +pages = {390-398}, +ee = {http://doi.ieeecomputersociety.org/10.1109/SFCS.2000.892127}, +crossref = {DBLP:conf/focs/2000}, +bibsource = {DBLP, http://dblp.uni-trier.de} +} + +// used in include/sdsl/suffix_array_algorithm.hpp +@article{SCH:OHL:GOG:2013, +author = {Thomas Schnattinger and Enno Ohlebusch and Simon Gog}, +title = {Bidirectional search in a string with wavelet trees and bidirectional matching statistics}, +journal = {Information and Computation}, +volume = {213}, +year = {2012}, +issn = {0890-5401}, +pages = {13-22}, +ee = {http://dx.doi.org/10.1016/j.ic.2011.03.007}, +bibsource = {DBLP, http://dblp.uni-trier.de}, +} + +// used in include/sdsl/lcp_byte.hpp +@article{ABO:KUR:OHL:2004, +author = {Mohamed Ibrahim Abouelhoda and Stefan Kurtz and Enno Ohlebusch}, +title = {Replacing suffix trees with enhanced suffix arrays}, +journal = {Journal of Discrete Algorithms}, +volume = {2}, +number = {1}, +year = {2004}, +issn = {1570-8667}, +pages = {53-86}, +ee = {http://dx.doi.org/10.1016/S1570-8667(03)00065-0}, +bibsource = {DBLP, http://dblp.uni-trier.de}, +} + + +// used in include/sdsl/rrr_vector.hpp + * References: + * - Rasmus Pagh + * Low redundancy in dictionaries with O(1) worst case lookup time + * Technical Report 1998. + * ftp://ftp.cs.au.dk/BRICS/Reports/RS/98/28/BRICS-RS-98-28.pdf, +Section 2. +=> Rasmus Pagh ist nicht drin + + +// used in include/sdsl/bp_support_gg.hpp +@inproceedings{OHL:GOG:2009, +author = {Enno Ohlebusch and Simon Gog}, +title = {A Compressed Enhanced Suffix Array Supporting Fast String Matching}, +booktitle = {Proceedings of String Processing and Information Retrieval, 16th International + Symposium, (SPIRE 2009)}, +year = {2009}, +pages = {51-62}, +ee = {http://dx.doi.org/10.1007/978-3-642-03784-9_6}, +crossref = {DBLP:conf/spire/2009}, +bibsource = {DBLP, http://dblp.uni-trier.de}, +} + + + +// used in include/sdsl/select_support_mcl.hpp +@phdthesis{CLA:1996, +title = {Compact Pat Trees}, +author = {David Clark}, +year = {1996}, +school = {Department of Computer Science, University of Waterloo}, +ee = {http://www.nlc-bnc.ca/obj/s4/f2/dsk3/ftp04/nq21335.pdf}, +} \ No newline at end of file From 59530cb5e4d57179b9b4db759c11a05d23e279ab Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 13:30:57 +0200 Subject: [PATCH 06/29] Removed accidentally added comment --- extras/literature.bib | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/extras/literature.bib b/extras/literature.bib index ad5000807..f4b80c939 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -419,7 +419,7 @@ @inproceedings{FER:MAN:2000 pages = {390-398}, ee = {http://doi.ieeecomputersociety.org/10.1109/SFCS.2000.892127}, crossref = {DBLP:conf/focs/2000}, -bibsource = {DBLP, http://dblp.uni-trier.de} +bibsource = {DBLP, http://dblp.uni-trier.de}, } // used in include/sdsl/suffix_array_algorithm.hpp @@ -449,16 +449,18 @@ @article{ABO:KUR:OHL:2004 bibsource = {DBLP, http://dblp.uni-trier.de}, } - // used in include/sdsl/rrr_vector.hpp - * References: - * - Rasmus Pagh - * Low redundancy in dictionaries with O(1) worst case lookup time - * Technical Report 1998. - * ftp://ftp.cs.au.dk/BRICS/Reports/RS/98/28/BRICS-RS-98-28.pdf, -Section 2. -=> Rasmus Pagh ist nicht drin - +@inproceedings{PAG:1999, +author = {Rasmus Pagh}, +title = {Low Redundancy in Static Dictionaries with O(1) Worst Case Lookup Time}, +booktitle = {Proceedings of the 26th International Colloquium on Automata, Languages and Programming + (ICALP 1999)} +year = {1999}, +pages = {595-604}, +ee = {http://dx.doi.org/10.1007/3-540-48523-6_56}, +crossref = {DBLP:conf/icalp/99}, +bibsource = {DBLP, http://dblp.uni-trier.de}, +} // used in include/sdsl/bp_support_gg.hpp @inproceedings{OHL:GOG:2009, @@ -473,8 +475,6 @@ @inproceedings{OHL:GOG:2009 bibsource = {DBLP, http://dblp.uni-trier.de}, } - - // used in include/sdsl/select_support_mcl.hpp @phdthesis{CLA:1996, title = {Compact Pat Trees}, From 2b0a2053b6dc669035fad6f83f0234ffda8fe9f3 Mon Sep 17 00:00:00 2001 From: Timo Beller Date: Wed, 7 Aug 2013 13:51:58 +0200 Subject: [PATCH 07/29] Deleted crossref values --- extras/literature.bib | 9 --------- 1 file changed, 9 deletions(-) diff --git a/extras/literature.bib b/extras/literature.bib index f4b80c939..0f58b3463 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -33,7 +33,6 @@ @inproceedings{KAS:LEE:ARI:ARI:PAR:2001 year = {2001}, pages = {181-192}, ee = {http://link.springer.de/link/service/series/0558/bibs/2089/20890181.htm}, -crossref = {DBLP:conf/cpm/2001}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -46,7 +45,6 @@ @inproceedings{GOG:OHL:2011 year = {2011}, pages = {25-34}, ee = {http://www.siam.org/proceedings/alenex/2011/alx11_03_gogs.pdf}, -crossref = {DBLP:conf/alenex/2011}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -70,7 +68,6 @@ @inproceedings{BEL:GOG:OHL:SCH:2011 year = {2011}, pages = {197-208}, ee = {http://dx.doi.org/10.1007/978-3-642-24583-1_20}, -crossref = {DBLP:conf/spire/2011}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -114,7 +111,6 @@ @inproceedings{FER:SIR:VEN:2011 year = {2011}, pages = {760-771}, ee = {http://dx.doi.org/10.1007/978-3-642-23719-5_64}, -crossref = {DBLP:conf/esa/2011}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -127,7 +123,6 @@ @inproceedings{BEN:FAR:2000 year = {2000}, pages = {88-94}, ee = {http://dx.doi.org/10.1007/10719839_9}, -crossref = {DBLP:conf/latin/2000}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -405,7 +400,6 @@ @inproceedings{GEA:RAH:RAM:RAM:2004 year = {2004}, pages = {159-172}, ee = {http://dx.doi.org/10.1007/978-3-540-27801-6_12}, -crossref = {DBLP:conf/cpm/2004}, bibsource = {DBLP, http://dblp.uni-trier.de}, } @@ -418,7 +412,6 @@ @inproceedings{FER:MAN:2000 year = {2000}, pages = {390-398}, ee = {http://doi.ieeecomputersociety.org/10.1109/SFCS.2000.892127}, -crossref = {DBLP:conf/focs/2000}, bibsource = {DBLP, http://dblp.uni-trier.de}, } @@ -458,7 +451,6 @@ @inproceedings{PAG:1999 year = {1999}, pages = {595-604}, ee = {http://dx.doi.org/10.1007/3-540-48523-6_56}, -crossref = {DBLP:conf/icalp/99}, bibsource = {DBLP, http://dblp.uni-trier.de}, } @@ -471,7 +463,6 @@ @inproceedings{OHL:GOG:2009 year = {2009}, pages = {51-62}, ee = {http://dx.doi.org/10.1007/978-3-642-03784-9_6}, -crossref = {DBLP:conf/spire/2009}, bibsource = {DBLP, http://dblp.uni-trier.de}, } From 946ebfff8b9e828a80ae10eed986d97efb77d5d7 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Fri, 9 Aug 2013 21:45:51 +1000 Subject: [PATCH 08/29] Added more stuff. --- extras/literature.bib | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/extras/literature.bib b/extras/literature.bib index 715ed26ff..2f8fc271c 100644 --- a/extras/literature.bib +++ b/extras/literature.bib @@ -1,6 +1,18 @@ Literature list of implemented data structures in SDSL (not complete yet) +// used in includes/sdsl/wt_int.hpp +@article{GOG:NAV:PUG:2012, +author = {Travis Gagie and Gonzalo Navarro and Simon J. Puglisi}, +title = {New algorithms on wavelet trees and applications to information retrieval}, +journal = {Theoretical Computer Science}, +volume = {426}, +year = {2012}, +pages = {25-41}, +ee = {http://dx.doi.org/10.1016/j.tcs.2011.12.002}, +bibsource = {DBLP, http://dblp.uni-trier.de} +} + // used in include/sdsl/algorithms_for_compressed_suffix_trees.hpp @inproceedings{FIS:2010, author = {Johannes Fischer}, From b23e7014b45a729e3af40b913d06ec4cbe48a1ad Mon Sep 17 00:00:00 2001 From: Matthias Petri Date: Wed, 14 Aug 2013 18:03:13 +1000 Subject: [PATCH 09/29] removed depricated keyword 'register' --- include/sdsl/bits.hpp | 6 +++--- include/sdsl/coder_fibonacci.hpp | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/sdsl/bits.hpp b/include/sdsl/bits.hpp index abb588e7f..07c81fa3f 100644 --- a/include/sdsl/bits.hpp +++ b/include/sdsl/bits.hpp @@ -382,7 +382,7 @@ inline uint32_t bits::hi(uint64_t x) return 0; return 63 - __builtin_clzll(x); #else - register uint64_t t,tt; // temporaries + uint64_t t,tt; // temporaries if ((tt = x >> 32)) { // hi >= 32 if ((t = tt >> 16)) { // hi >= 48 return (tt = t >> 8) ? 56 + lt_hi[tt] : 48 + lt_hi[t]; @@ -516,7 +516,7 @@ inline uint64_t bits::read_int_and_move(const uint64_t*& word, uint8_t& offset, inline uint64_t bits::read_unary(const uint64_t* word, uint8_t offset) { - register uint64_t w = *word >> offset; + uint64_t w = *word >> offset; if (w) { return bits::lo(w); } else { @@ -532,7 +532,7 @@ inline uint64_t bits::read_unary(const uint64_t* word, uint8_t offset) inline uint64_t bits::read_unary_and_move(const uint64_t*& word, uint8_t& offset) { - register uint64_t w = (*word) >> offset; // temporary variable is good for the performance + uint64_t w = (*word) >> offset; // temporary variable is good for the performance if (w) { uint8_t r = bits::lo(w); offset = (offset + r+1)&0x3F; diff --git a/include/sdsl/coder_fibonacci.hpp b/include/sdsl/coder_fibonacci.hpp index f4df4db45..716a7033b 100644 --- a/include/sdsl/coder_fibonacci.hpp +++ b/include/sdsl/coder_fibonacci.hpp @@ -166,7 +166,7 @@ template inline bool fibonacci::encode(const int_vector1& v, int_vector2& z) { uint64_t z_bit_size = 0; - register uint64_t w; + uint64_t w; const uint64_t zero_val = v.width() < 64 ? (1ULL)< Date: Thu, 15 Aug 2013 13:06:18 +1000 Subject: [PATCH 10/29] Added access to the first row of the M the first row of the bwt permutation matrix can now be accessed via the F[] structure such as auto sym = csa.F[i]. --- include/sdsl/csa_bitcompressed.hpp | 3 ++ include/sdsl/csa_sada.hpp | 3 ++ include/sdsl/csa_wt.hpp | 3 ++ include/sdsl/suffix_array_helper.hpp | 44 ++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/include/sdsl/csa_bitcompressed.hpp b/include/sdsl/csa_bitcompressed.hpp index 0d016b3e4..357dbda7b 100644 --- a/include/sdsl/csa_bitcompressed.hpp +++ b/include/sdsl/csa_bitcompressed.hpp @@ -72,6 +72,7 @@ class csa_bitcompressed typedef psi_of_sa_and_isa psi_type; typedef bwt_of_csa_psi bwt_type; typedef text_of_csa text_type; + typedef first_row_of_csa first_row_type; typedef _sa_order_sampling sa_sample_type; typedef int_vector<> isa_sample_type; typedef t_alphabet_strat alphabet_type; @@ -106,6 +107,8 @@ class csa_bitcompressed const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma; const psi_type& psi = m_psi; const bwt_type bwt = bwt_type(this); + const bwt_type L = bwt_type(this); + const first_row_type F = first_row_type(this); const text_type text = text_type(this); const sa_sample_type& sa_sample = m_sa; const isa_sample_type& isa_sample = m_isa; diff --git a/include/sdsl/csa_sada.hpp b/include/sdsl/csa_sada.hpp index c56f0cae3..f583e2e57 100644 --- a/include/sdsl/csa_sada.hpp +++ b/include/sdsl/csa_sada.hpp @@ -79,6 +79,7 @@ class csa_sada typedef psi_of_csa_psi psi_type; typedef bwt_of_csa_psi bwt_type; typedef text_of_csa text_type; + typedef first_row_of_csa first_row_type; typedef typename t_sa_sample_strat::template type::sample_type sa_sample_type; typedef t_isa isa_sample_type; typedef t_alphabet_strat alphabet_type; @@ -127,6 +128,8 @@ class csa_sada const typename alphabet_type::sigma_type& sigma = m_alphabet.sigma; const psi_type psi = psi_type(this); const bwt_type bwt = bwt_type(this); + const bwt_type L = bwt_type(this); + const first_row_type F = first_row_type(this); const text_type text = text_type(this); const sa_sample_type& sa_sample = m_sa_sample; const isa_sample_type& isa_sample = m_isa_sample; diff --git a/include/sdsl/csa_wt.hpp b/include/sdsl/csa_wt.hpp index 3ff376ef7..52d13af5a 100644 --- a/include/sdsl/csa_wt.hpp +++ b/include/sdsl/csa_wt.hpp @@ -84,6 +84,7 @@ class csa_wt typedef ptrdiff_t difference_type; typedef psi_of_csa_wt psi_type; typedef bwt_of_csa_wt bwt_type; + typedef first_row_of_csa first_row_type; typedef text_of_csa text_type; typedef t_wt wavelet_tree_type; typedef typename t_sa_sample_strat::template type::sample_type sa_sample_type; @@ -122,6 +123,8 @@ class csa_wt const psi_type psi = psi_type(this); const bwt_type bwt = bwt_type(this); const text_type text = text_type(this); + const first_row_type F = first_row_type(this); + const bwt_type L = bwt_type(this); const sa_sample_type& sa_sample = m_sa_sample; const isa_sample_type& isa_sample = m_isa_sample; const wavelet_tree_type& wavelet_tree = m_wavelet_tree; diff --git a/include/sdsl/suffix_array_helper.hpp b/include/sdsl/suffix_array_helper.hpp index e3c710d7f..7b7be701c 100644 --- a/include/sdsl/suffix_array_helper.hpp +++ b/include/sdsl/suffix_array_helper.hpp @@ -471,6 +471,50 @@ class bwt_of_csa_wt }; +template +class first_row_of_csa +{ + public: + typedef const typename t_csa::char_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator;// STL Container requirement + private: + const t_csa* m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree + first_row_of_csa() {}; // disable default constructor + public: + //! Constructor + first_row_of_csa(t_csa* csa) { + m_csa = csa; + } + //! Calculate F[i] + /*! \param i The index for which the \f$\F\f$ value should be calculated, \f$i\in [0..size()-1]\f$. + * \par Time complexity + * \f$ \Order{\log |\Sigma|} \f$ + */ + value_type operator[](size_type i)const { + assert(m_csa != nullptr); + assert(i < size()); + return first_row_symbol(i, *m_csa); + } + //! Returns the size of the F column. + size_type size()const { + return m_csa->size(); + } + //! Returns if the F column is empty. + size_type empty()const { + return m_csa->empty(); + } + //! Returns a const_iterator to the first element. + const_iterator begin()const { + return const_iterator(this, 0); + } + //! Returns a const_iterator to the element after the last element. + const_iterator end()const { + return const_iterator(this, size()); + } +}; + template class text_of_csa From aa60e4aeaa2e8c19dc08a0a3d9ff446f8f2fb0af Mon Sep 17 00:00:00 2001 From: Matthias Petri Date: Thu, 15 Aug 2013 13:40:23 +1000 Subject: [PATCH 11/29] Added tests for F[] access method added a test for integer and byte alphabets to check if the F[] access method is working properly. --- test/CsaByteTest.cpp | 17 +++++++++++++++++ test/CsaIntTest.cpp | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/test/CsaByteTest.cpp b/test/CsaByteTest.cpp index 2fc156b4a..af2a56e6f 100644 --- a/test/CsaByteTest.cpp +++ b/test/CsaByteTest.cpp @@ -137,6 +137,23 @@ TYPED_TEST(CsaByteTest, BwtAccess) } } +TYPED_TEST(CsaByteTest, FAccess) +{ + if (test_case_file_map.find(constants::KEY_TEXT) != test_case_file_map.end()) { + TypeParam csa; + ASSERT_EQ(true, load_from_file(csa, temp_file)); + int_vector<8> text; + load_from_file(text, test_case_file_map[constants::KEY_TEXT]); + std::sort(begin(text),end(text)); + size_type n = text.size(); + ASSERT_EQ(n, csa.size()); + for (size_type j=0; j wt; + construct(wt, argv[1], 1); + + cout << "wt.size()="<< wt.size() << endl; + cout << "wt.sigma ="<< wt.sigma << endl; + if (wt.size() > 0) { + // access an element + cout << "wt[0]=" << wt[0] << endl; + // rank an element (exclude) + uint64_t r = wt.rank(wt.size(), wt[0]); + cout << "wt.rank(wt.size(), wt[0])=" << r << endl; + // select element () + cout << "wt.select(r, wt[0]) = " << wt.select(r, wt[0]) << endl; + } +} From 615a1c780fce5363719d79ea73f2cd9176668f8c Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Tue, 27 Aug 2013 14:22:50 +1000 Subject: [PATCH 13/29] Replaced NULL by nullptr --- include/sdsl/nearest_neighbour_dictionary.hpp | 2 +- include/sdsl/structure_tree.hpp | 2 +- lib/memory_management.cpp | 6 +++--- lib/structure_tree.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/sdsl/nearest_neighbour_dictionary.hpp b/include/sdsl/nearest_neighbour_dictionary.hpp index 8b822390c..c95e51d2c 100644 --- a/include/sdsl/nearest_neighbour_dictionary.hpp +++ b/include/sdsl/nearest_neighbour_dictionary.hpp @@ -220,7 +220,7 @@ class nearest_neighbour_dictionary //! Serializes the nearest_neighbour_dictionary. /*! \param out Out-Stream to serialize the data to. */ - size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { + size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const { size_type written_bytes = 0; structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); written_bytes += m_abs_samples.serialize(out, child, "absolute_samples"); diff --git a/include/sdsl/structure_tree.hpp b/include/sdsl/structure_tree.hpp index 8b8365624..0e570509c 100644 --- a/include/sdsl/structure_tree.hpp +++ b/include/sdsl/structure_tree.hpp @@ -49,7 +49,7 @@ class structure_tree public: static structure_tree_node* add_child(structure_tree_node* v, const std::string& name, const std::string& type) { if (v) return v->add_child(name,type); - return NULL; + return nullptr; }; static void add_size(structure_tree_node* v, uint64_t value) { if (v) v->add_size(value); diff --git a/lib/memory_management.cpp b/lib/memory_management.cpp index a9a2b329d..3cd9e7b7f 100644 --- a/lib/memory_management.cpp +++ b/lib/memory_management.cpp @@ -32,8 +32,8 @@ sdsl::mm_initializer::mm_initializer() // initialize static members object here // mm::m_items.clear(); mm::m_items = mm::tMVecItem(); - mm::m_data = NULL; - mm::m_out = NULL; + mm::m_data = nullptr; + mm::m_out = nullptr; } } sdsl::mm_initializer::~mm_initializer() @@ -52,7 +52,7 @@ bool mm::map_hp() { #ifdef MAP_HUGETLB size_t hpgs= (m_total_memory+HUGE_LEN-1)/HUGE_LEN; // number of huge pages required to store the int_vectors - m_data = (uint64_t*)mmap(NULL, hpgs*HUGE_LEN, HUGE_PROTECTION, HUGE_FLAGS, 0, 0); + m_data = (uint64_t*)mmap(nullptr, hpgs*HUGE_LEN, HUGE_PROTECTION, HUGE_FLAGS, 0, 0); if (m_data == MAP_FAILED) { std::cout << "mmap was not successful" << std::endl; return false; diff --git a/lib/structure_tree.cpp b/lib/structure_tree.cpp index 23232aabb..11dd01121 100644 --- a/lib/structure_tree.cpp +++ b/lib/structure_tree.cpp @@ -266,7 +266,7 @@ void write_structure_tree(const structure_tree_node* v, std::ostrea template<> void write_structure_tree(const structure_tree_node* v, std::ostream& out, size_t level) { - /* if (NULL == v or (v->children.size()==0 and v->key_values.size()==0)) { + /* if (nullptr == v or (v->children.size()==0 and v->key_values.size()==0)) { return; } typedef structure_tree_node::tKeyValue::const_iterator const_iterator; From 377ab0a4236873dbe96babb87d142969503e84df Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Tue, 27 Aug 2013 15:29:15 +1000 Subject: [PATCH 14/29] Restructured code --- include/sdsl/algorithms.hpp | 674 ------------------ ...lgorithms_for_compressed_suffix_arrays.hpp | 61 -- ...algorithms_for_compressed_suffix_trees.hpp | 406 ----------- ...rentheses.hpp => bp_support_algorithm.hpp} | 6 +- include/sdsl/bp_support_g.hpp | 34 +- include/sdsl/bp_support_gg.hpp | 22 +- include/sdsl/bp_support_sada.hpp | 18 +- include/sdsl/csa_bitcompressed.hpp | 3 +- include/sdsl/csa_sada.hpp | 3 +- include/sdsl/csa_wt.hpp | 3 +- include/sdsl/cst_sada.hpp | 3 +- include/sdsl/cst_sct3.hpp | 3 +- include/sdsl/experimental/bp_support_j.hpp | 512 ------------- include/sdsl/experimental/gap_vector.hpp | 248 ------- .../sdsl/experimental/rank_support_jmc.hpp | 179 ----- include/sdsl/lcp_bitcompressed.hpp | 1 - include/sdsl/lcp_byte.hpp | 1 - include/sdsl/lcp_dac.hpp | 1 - include/sdsl/lcp_support_sada.hpp | 1 - include/sdsl/lcp_vlc.hpp | 1 - include/sdsl/lcp_wt.hpp | 1 - include/sdsl/suffix_array_helper.hpp | 20 + include/sdsl/suffix_tree_helper.hpp | 270 +++++++ include/sdsl/test_index_performance.hpp | 3 +- lib/algorithms.cpp | 66 -- 25 files changed, 336 insertions(+), 2204 deletions(-) delete mode 100644 include/sdsl/algorithms.hpp delete mode 100644 include/sdsl/algorithms_for_compressed_suffix_arrays.hpp delete mode 100644 include/sdsl/algorithms_for_compressed_suffix_trees.hpp rename include/sdsl/{algorithms_for_balanced_parentheses.hpp => bp_support_algorithm.hpp} (99%) delete mode 100644 include/sdsl/experimental/bp_support_j.hpp delete mode 100644 include/sdsl/experimental/gap_vector.hpp delete mode 100644 include/sdsl/experimental/rank_support_jmc.hpp delete mode 100644 lib/algorithms.cpp diff --git a/include/sdsl/algorithms.hpp b/include/sdsl/algorithms.hpp deleted file mode 100644 index 42c450e7e..000000000 --- a/include/sdsl/algorithms.hpp +++ /dev/null @@ -1,674 +0,0 @@ -/* sdsl - succinct data structures library - Copyright (C) 2008 Simon Gog - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see http://www.gnu.org/licenses/ . -*/ -/*! \file algorithms.hpp - \brief algorithms.hpp contains algorithms for suffixarrays. - \author Simon Gog -*/ - -#ifndef INCLUDED_SDSL_ALGORITHMS -#define INCLUDED_SDSL_ALGORITHMS - -#include "int_vector.hpp" - -#include // for exceptions -#include -#include -#include -#include - - -namespace sdsl -{ - -//! A helper class containing algorithms for succinct data structures. -/*! - \author Simon Gog - */ -namespace algorithm -{ - -// public: -//! Calculate the zero-order entropy for a text T -/*! - * \param c Pointer to a 0-terminated string. - * \return The zero-order entropy of the text. - */ -double H_0(const unsigned char* c); - -// Claculate the star entropy of T, see Manzini 2001 for details -double H_0s(const unsigned char* c); - - -//! Calculate the Inverse Suffix Array from a Suffix Array SA. -/*! - * - Time requirement: \f$ O( sa.size() ) \f$ i.e. linear. - * - Space requirement: No additional space needed. - * \param sa Suffix Array. - * \param isa Container to store the resulting inverse Suffix Array. - */ -template -static void sa2isa(const RandomAccessContainer1& sa, RandomAccessContainer2& isa); - -template -static void sa2isa(const RandomAccessContainer& sa, int_vector<>& isa); - -//! Calculate the Inverse Permutation of a Permutation from \f$0..sa.size()-1\f$ in-place. -/*! - * \param sa A reference to the permutation of the numbers \f$0..sa.size()-1\f$ stored in a random access container. - * \par Time complexity - * \f$ \Order{sa.size()}, i.e. linear time complexity \f$ - * \par Note - * If there is space for the visited bits in the random access container the procedure is really implemented inplace. - * Otherwise we use a additional bit_vector of size sa.size() bits to store the indicator bits for the procedure. - */ -template -static void inverse_permutation_inplace(RandomAccessContainer& sa); - -//! Calculate the previous smaller value (psv) array for a random access container a. -/*! - * \param a Container to calculate the psv array. - * \param psv Container that contains the result after the calculation. - * \pre The array \e a contains only non negative values and a.size() == psv.size(). - * \post \f[ psv[i] = \begin{array}{rl} a.size() &\mbox{ if} \min\{ a[j] \mid j -void calculate_psv(const RandomAccessContainer1& a, RandomAccessContainer2& psv); - - -//! Verify the result of the method caculate_psv -/*! \return True if the RandomAccessContainer psv is the previous smaller array of a. - */ -template -static bool verify_psv(const RandomAccessContainer1& a, RandomAccessContainer2& psv); - -template -static void calculate_psv2(const RandomAccessContainer1& a, RandomAccessContainer2& psv); - -//! Calculate the next smaller value (nsv) array for a random access container a. -/*! - * \param a Container to calculate the nsv array. - * \param nsv Container that contains the result after the calculation. - * \pre The array \e a contains only non negative values and a.size() == nsv.size(). - * \post \f[ nsv[i] = \begin{array}{rl} 0 &\mbox{ if} \min\{ a[j] \mid j>i \} \geq a[i] \\ - min\{j\mid a[j] < a[i] \wedge j>i\} &\mbox{ otherwise.}\end{array} \f] - */ -template -static void calculate_nsv(const RandomAccessContainer1& a, RandomAccessContainer2& nsv); - -//! Verify the result of the method of calculate_nsv -template -static bool verify_nsv(const RandomAccessContainer1& a, RandomAccessContainer2& nsv); - -//! TODO: Impelement -template -static void calculate_lcp(const RandomAccessContainer& sa, const Text& text, RandomAccessContainer& lcp); - -//! Verify that a Suffix Array sa is correct for a text c -/*! - * \param c Text (c-string) to check the suffix array for. - \param len Length of the text c. - \param sa Suffix array to check. - \return If the suffix array is correct for the text c. - The suffix array sa is correct for c if - - the values of sa lie in the range [0..len) - - all values are different - - c+sa[i] < c+sa[i+1] for all i -static bool verify_sa(const unsigned char* c, typename RandomAccessContainer::size_type len, const RandomAccessContainer& sa); - -//! Verify that a SelectSupport rs is correct for a int_vector<1>. -template -inline static bool verify_select_support(const SelectSupport& ss, const int_vector<1>& b); - -//! Verify that two Containers have the same number of elements and the all corresponding (i-th) elements (0<=i -static bool equal_container_values(const Container1& c1, Container2& c2); - -//! Calculate the Inverse Suffix Array inplace. -/*! \param sa RandomAccessContainer that contains the Suffix Array and is replaced by the Inverse Suffix Array. - * \par Time complexity - * \f$ \Order{ 2*SA.size() }\f$, i.e. linear. - * \par Space complexity - * Additional sa.size() bits. - */ -template -static void sa2isa_inplace(RandomAccessContainer& sa); - -//! Calculate the \f$\Psi\f$-function for a given Burrows and Wheeler Transformation. -/* \param bwt Burrows and Wheeler Transformation. -* \param len Length of the bwt. -* \param psi Container of size len for the result. -* \par Time complexity - * \f$\Order{2n}\f$ i.e. linear. -* \par Space complexity -* Space of bwt (\f$\Order{n}\f$ bits) + space of uncompressed \f$\Psi\f$-function (\f$\Order{4n}\f$ bits). -*/ -template -static void bwt2psi(const unsigned char* bwt, typename RandomAccessContainer::size_type len, RandomAccessContainer& psi); - -//! Calculate the \f$\Psi\f$-function for a given suffix array. -/*! \param sa Suffix Array to calculate the \f$\Psi\f$-function for. - * \param psi RandomAccessContainer that will contain the resulting \f$\Psi\f$-function. - * \par Time complexity - * \f$ \Order{3*SA.size() }\f$, i.e. linear. - * \par Space complexity - * Additional \f$ sa.size() \cdot \log(RandomAccessContainer::size_type)\f$ bits - */ -template -static void sa2psi(const RandomAccessContainer1& sa, RandomAccessContainer2& psi); - -template -static void sa2psi(const RandomAccessContainer& sa, int_vector<>& psi); - -//! Calculate the Longest Common Prefix Table (lcptab). -/*! Algorithm from Kasai et al. "Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its Applications" - * \param sa Suffix Array to calculate the lcptab for. - * \param text Text to calculate the lcptab for. - * \param lcp RandomAccessContainer that will contain the resulting lcptab. - * \par Time complexity - * \f$ \Order{ SA.size() } \f$, i.e. linear. - */ -template -static void calculate_lcp12(const RandomAccessContainer& sa, const Text& text, RandomAccessContainer& lcp); - -//! Calculate the suffix array SA out of the \f$\Psi\f$-function and \f$ SA^{-1}[0]\f$. -/*! \param psi A \f$\Psi-\f$ function. - * \param isa_0 \f$ SA^{-1}[0] \f$. If SA[0]=n \f$ SA^{-1}[0]=\Psi(0) \f$. - * \param sa A RandomAccessContainer that will contain the resulting suffix array. - * \par Time complexity - * \f$\Order{psi.size()}\f$, i.e. linear. - */ -template -static void psi2sa(const RandomAccessContainer1& psi, const typename RandomAccessContainer1::size_type isa_0, RandomAccessContainer2& sa); - -template -static void psi2sa(const RandomAccessContainer& psi, const typename RandomAccessContainer::size_type isa_0, int_vector<>& sa); -//! Calculate the inverse suffix array SA out of the \f$\Psi\f$-function and \f$ SA^{-1}[0]\f$. -/*! \param psi A \f$\Psi-\f$ function. - * \param isa_0 \f$ SA^{-1}[0] \f$. If SA[0]=n \f$ SA^{-1}[0]=\Psi(0) \f$. - * \param isa A RandomAccessContainer that will contain the resulting inverse suffix array. - * \par Time complexity - * \f$\Order{psi.size()}\f$, i.e. linear. - */ -template -static void psi2isa(const RandomAccessContainer& psi, const typename RandomAccessContainer::size_type isa_0, RandomAccessContainer& isa); - - -template -void calculate_psv(const RandomAccessContainer1& a, RandomAccessContainer2& psv) -{ - assert(psv.size() == a.size()); - if (a.empty()) - return; - psv[0] = psv.size(); - assert(psv[0] == psv.size()); - std::stack psv_index; - typename RandomAccessContainer1::value_type min_element = a[0]; - for (typename RandomAccessContainer1::size_type i=0; i < a.size(); ++i) { - if (a[i] <= min_element) { - while (!psv_index.empty()) - psv_index.pop(); - min_element = a[i]; - psv[i] = a.size(); - psv_index.push(i); - } else { // a[i] > min_element => stack will not be empty - while (a[psv_index.top()] >= a[i]) - psv_index.pop(); - psv[i] = psv_index.top(); - psv_index.push(i); - } - } -} - -template -bool verify_psv(const RandomAccessContainer1& a, RandomAccessContainer2& psv) -{ - if (a.size()!=psv.size()) - return false; - typename RandomAccessContainer1::value_type min_element = a[0]; - for (typename RandomAccessContainer1::size_type i=0; i=i) - return false; - if (a[psv[i]] >= a[i]) - return false; - for (typename RandomAccessContainer1::size_type j=psv[i]+1; j -void calculate_psv2(const RandomAccessContainer1& a, RandomAccessContainer2& psv) -{ - assert(psv.size() == a.size()); - if (a.empty()) - return; - psv[0] = psv.size(); - assert(psv[0] == psv.size()); - // TODO implementing the algorithm with use of a stack - psv[0] = psv.size(); - typedef std::pair tPII; - std::stack psv_stack; - typename RandomAccessContainer1::value_type min_element = a[0], ai; - for (typename RandomAccessContainer1::size_type i=0; i < a.size(); ++i) { - if ((ai=a[i]) <= min_element) { - while (!psv_stack.empty()) - psv_stack.pop(); - min_element = ai; - psv[i] = a.size(); - psv_stack.push(tPII(ai, i)); - } else { // a[i] > min_element => stack will not be empty - while (psv_stack.top().first >= ai) - psv_stack.pop(); - psv[i] = psv_stack.top().second; - psv_stack.push(tPII(ai, i)); - } - } -} - -template -void calculate_nsv(const RandomAccessContainer1& a, RandomAccessContainer2& nsv) -{ - assert(nsv.size() == a.size()); - if (a.empty()) - return; - nsv[nsv.size()-1] = 0; - std::stack nsv_index; - typename RandomAccessContainer1::value_type min_element = a[nsv.size()-1]; - for (typename RandomAccessContainer1::size_type i=nsv.size(); i > 0; --i) { - if (a[i-1] <= min_element) { - while (!nsv_index.empty()) - nsv_index.pop(); - min_element = a[i-1]; - nsv[i-1] = 0; - nsv_index.push(i-1); - } else { // a[i] > min_element => stack will not be empty - while (a[nsv_index.top()] >= a[i-1]) - nsv_index.pop(); - nsv[i-1] = nsv_index.top(); - nsv_index.push(i-1); - } - } -} - - -template -bool verify_nsv(const RandomAccessContainer1& a, RandomAccessContainer2& nsv) -{ - if (a.size() != nsv.size()) - return false; - typename RandomAccessContainer1::value_type min_element = a[a.size()-1]; - for (typename RandomAccessContainer1::size_type i=a.size(); i>0; --i) { - if (a[i-1] <= min_element) { - min_element = a[i-1]; - if (nsv[i-1] != 0) // see definition of calculate_nsv - return false; - } else { - if (nsv[i-1] <= i-1) - return false; - if (a[nsv[i-1]] >= a[i-1]) - return false; - for (typename RandomAccessContainer1::size_type j=i; j -void bwt2psi(const unsigned char* bwt, typename RandomAccessContainer::size_type len, RandomAccessContainer& psi) -{ - if (psi.size() != len) - psi.resize(len); - typename RandomAccessContainer::size_type C[256] = {0}, index_of_dollar = 0; - for (typename RandomAccessContainer::size_type i=0; i use additional - perm[0] = perm_0; - bit_vector is_inverse(perm.size(), 0); // indicator bit_vector! - for (size_type i=0, j,jj,t; i=0 and perm[i] -void sa2psi(const RandomAccessContainer1& sa, RandomAccessContainer2& psi) -{ - RandomAccessContainer2 isa; // temporary array for the inverse suffix array - sa2isa(sa, isa); - psi.resize(sa.size()); - typename RandomAccessContainer1::value_type tmp; // - typename RandomAccessContainer2::iterator psi_it = psi.begin(); - for (typename RandomAccessContainer1::const_iterator sa_it = sa.begin(), end = sa.end(); sa_it != end; ++sa_it, ++psi_it) { - if ((tmp = *sa_it+1) != sa.size()) - *psi_it = isa[tmp]; - else - *psi_it = isa[0]; - } -} - -template -void sa2psi(const RandomAccessContainer& sa, int_vector<>& psi) -{ - int_vector<> isa; // temporary array for the inverse suffix array - sa2isa(sa, isa); - psi.width(bits::hi(sa.size())+1); - psi.resize(sa.size()); - typename RandomAccessContainer::value_type tmp; // - int_vector<>::iterator psi_it = psi.begin(); - for (typename RandomAccessContainer::const_iterator sa_it = sa.begin(), end = sa.end(); sa_it != end; ++sa_it, ++psi_it) { - if ((tmp = *sa_it+1) != sa.size()) - *psi_it = isa[tmp]; - else - *psi_it = isa[0]; - } -} - -template -void calculate_lcp(const RandomAccessContainer& sa, const Text& text, RandomAccessContainer& lcp) -{ - lcp = sa; - RandomAccessContainer isa; - sa2isa(sa, isa); - - lcp[0] = 0; - typename RandomAccessContainer::size_type i=0,j,k,l=0; - for (typename RandomAccessContainer::const_iterator isa_it = isa.begin(), end = isa.end(); isa_it != end; ++isa_it, ++i) { - if ((j = *isa_it)) { - k = sa[j-1]; - while (text[k+l]==text[i+l]) - ++l; - lcp[j] = l; - l = (l==0)?0:l-1; - } - } -} - -/* -TODO: add implementation and definition -template -void algorithm::calculate_lps(){ - -} -*/ - -template -void psi2sa(const RandomAccessContainer1& psi, const typename RandomAccessContainer1::size_type isa_0, RandomAccessContainer2& sa) -{ - sa.resize(psi.size()); - if (psi.empty()) - return; - typename RandomAccessContainer1::value_type isa_k = isa_0; - for (typename RandomAccessContainer1::size_type k = 0, size=psi.size(); k < size; ++k, isa_k = psi[isa_k]) { - sa[isa_k] = k; - } -} - -template -void psi2sa(const RandomAccessContainer& psi, const typename RandomAccessContainer::size_type isa_0, int_vector<>& sa) -{ - sa.width(bits::hi(psi.size())+1); - sa.resize(psi.size()); - if (psi.empty()) - return; - typename RandomAccessContainer::value_type isa_k = isa_0; - for (typename RandomAccessContainer::size_type k = 0, size=psi.size(); k < size; ++k, isa_k = psi[isa_k]) { - sa[isa_k] = k; - } -} - -template -void psi2isa(const RandomAccessContainer& psi, const typename RandomAccessContainer::size_type isa_0, RandomAccessContainer& isa) -{ - isa = psi; - if (psi.empty()) - return; - typename RandomAccessContainer::value_type isa_k = isa_0; - for (typename RandomAccessContainer::size_type k=0, size=psi.size(); k < size; ++k, isa_k = psi[isa_k]) { - isa[k] = isa_k; - } -} - -template -bool verify_sa(const unsigned char* c, typename RandomAccessContainer::size_type len, const RandomAccessContainer& sa) -{ - typedef typename RandomAccessContainer::size_type size_type; - if (sa.size() != len) { // check length - std::cerr<<"sa.size()!=len"<c[j]) { // lex order is wrong! - std::cerr<<"lex order is wrong"< // for calculate_supercartesian_tree_bp - -namespace sdsl -{ - -namespace algorithm -{ - -template -void set_isa_samples(int_vector_buffer& sa_buf, typename Csa::isa_sample_type& isa_sample) -{ - typedef typename Csa::size_type size_type; - size_type n = sa_buf.size(); - - isa_sample.width(bits::hi(n)+1); - if (n >= 1) { // so n+Csa::isa_sample_dens >= 2 - isa_sample.resize((n-1+Csa::isa_sample_dens-1)/Csa::isa_sample_dens + 1); - } - util::set_to_value(isa_sample, 0); - - for (size_type i=0; i < n; ++i) { - size_type sa = sa_buf[i]; - if ((sa % Csa::isa_sample_dens) == 0) { - isa_sample[sa/Csa::isa_sample_dens] = i; - } else if (sa+1 == n) { - isa_sample[(sa+Csa::isa_sample_dens-1)/Csa::isa_sample_dens] = i; - } - } -} - -}// end namespace algorithm - -}// end namespace sdsl - -#endif - diff --git a/include/sdsl/algorithms_for_compressed_suffix_trees.hpp b/include/sdsl/algorithms_for_compressed_suffix_trees.hpp deleted file mode 100644 index 253060df8..000000000 --- a/include/sdsl/algorithms_for_compressed_suffix_trees.hpp +++ /dev/null @@ -1,406 +0,0 @@ -/* sdsl - succinct data structures library - Copyright (C) 2009 Simon Gog - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see http://www.gnu.org/licenses/ . -*/ -/*! \file algorithms_for_compressed_suffix_trees.hpp - \brief algorithms_for_compressed_suffix_trees.hpp contains algorithms for compressed suffix trees. - \author Simon Gog -*/ -#ifndef INCLUDED_SDSL_ALGORITHMS_FOR_COMPRESSED_SUFFIX_TREES -#define INCLUDED_SDSL_ALGORITHMS_FOR_COMPRESSED_SUFFIX_TREES - -#include "int_vector.hpp" // for bit_vector -#include "sorted_stack_support.hpp" // for construct_supercartesian_tree_bp -#include "sorted_multi_stack_support.hpp" // for first_p_index_construction -#include "util.hpp" -#include // for calculate_supercartesian_tree_bp - - -namespace sdsl -{ - -namespace algorithm -{ - -//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009). -/*! \param vec Random access container for which the Super-Cartesian tree representation should be calculated. - * The value_type of vec should be an unsigned integer type. - * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. - * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. - * \par Time complexity - * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() - * \par Space complexity - * \f$ \Order{n \cdot \log n } \f$ bits. - */ -template -void construct_supercartesian_tree_bp(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) -{ - typedef typename RandomAccessContainer::size_type size_type; - bp.resize(2*vec.size()); // resize bit vector for balanaced parantheses to 2 n bits - util::set_to_value(bp, 0); - std::stack vec_stack; - - size_type k=0; - for (size_type i=0; i < vec.size(); ++i) { - typename RandomAccessContainer::value_type l = vec[i]; - if (minimum) { - while (vec_stack.size() > 0 and l < vec_stack.top()) { - vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis - } - } else { - while (vec_stack.size() > 0 and l > vec_stack.top()) { - vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis - } - } - vec_stack.push(l); - bp[k++] = 1; // writing an opening parenthesis - } - while (vec_stack.size() > 0) { - vec_stack.pop(); - bp[k++] = 0; // writing a closing parenthesis - } - assert(k == 2*vec.size()); -} - -//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009). -/*! \param vec Random access container for which the Super-Cartesian tree representation should be calculated. - * The value_type of vec should be an unsigned integer type. - * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. - * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. - * \par Time complexity - * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() - * \par Space complexity - * \f$\Order{n}\f$ bits, by the stack_support described in the paper "Optimal Succinctness For Range Minimum Queries" of Johannes Fischer. - */ -// TODO: sorted_multi_stack_support einbauen, RandomAccessContainer durch int_vector_buffer ersetzen -template -void construct_supercartesian_tree_bp_succinct(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) -{ - typedef typename RandomAccessContainer::size_type size_type; - bp.resize(2*vec.size()); // resize bit vector for balanced parentheses to 2 n bits - if (vec.size() > 0) { - util::set_to_value(bp, 0); - sorted_stack_support vec_stack(vec.size()); // <- ist das ein Problem fuer int_vector_buffer - - size_type k=0; - if (minimum) { - bp[k++] = 1; - for (size_type i=1; i < vec.size(); ++i) { - if (vec[i] < vec[i-1]) { - ++k; - while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) { - vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zero - } - } else { - vec_stack.push(i-1); // "lazy stack" trick: speed-up ca. 25% - } - bp[k++] = 1; // writing an opening parenthesis - } - /* - vec_stack.push(0); - bp[k++] = 1; - for(size_type i=1,j, start_run=1; i < vec.size(); ++i){ - if( vec[i] < vec[i-1] ){ - j = i; - while( --j >= start_run and vec[i] < vec[j]) ++k; - while(start_run <= j){ // auf den stack pushen - vec_stack.push(start_run++); - } - while( vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()] ){ - vec_stack.pop(); ++k; - } - start_run = i; - } - bp[k++] = 1; - } - */ - } else { - // hier noch ohne "lazy stack" trick - for (size_type i=0; i < vec.size(); ++i) { - while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()]) { - vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis - } - vec_stack.push(i); - bp[k++] = 1; // writing an opening parenthesis - } - } -#ifdef SDSL_DEBUG - // not necessary as bp is already initialized to zero - while (!vec_stack.empty()) { - vec_stack.pop(); - bp[k++] = 0; // writing a closing parenthesis - } - assert(k == 2*vec.size()); -#endif - } -} - -//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009). -/*! \param lcp_buf int_vector_buffer of the LCP Array for which the Super-Cartesian tree representation should be calculated. - * The value_type of vec should be an unsigned integer type. - * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. - * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. - * \par Time complexity - * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() - * \par Space complexity - * \f$\Order{2n}\f$ bits, by the multi_stack_support - */ -template -void construct_supercartesian_tree_bp_succinct(int_vector_buffer& lcp_buf, bit_vector& bp, const bool minimum=true) -{ - typedef int_vector_size_type size_type; - size_type n = lcp_buf.size(); - bp.resize(2*n); // resize bit vector for balanced parentheses to 2 n bits - if (n == 0) // if n == 0 we are done - return; - util::set_to_value(bp, 0); - sorted_multi_stack_support vec_stack(n); - - size_type k=0; - if (minimum) { - bp[k++] = 1; - size_type last = lcp_buf[0]; - for (size_type i=1, x; i < n; ++i) { - x = lcp_buf[i]; - if (x < last) { - ++k; // writing a closing parenthesis for last - while (!vec_stack.empty() and x < vec_stack.top()) { - vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zeros - } - } else { - vec_stack.push(last); // "lazy stack" trick: Beschleunigung: ca 25 % - } - bp[k++] = 1; // writing an opening parenthesis - last = x; - } - } else { - // hier noch ohne "lazy stack" trick - for (size_type i=0, x; i < n; ++i) { - x = lcp_buf[i]; - while (!vec_stack.empty() and x > vec_stack.top()) { - vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zeros - } - vec_stack.push(x); - bp[k++] = 1; // writing an opening parenthesis - } - } -} - -//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009) and the first_child bit_vector -/*! \param lcp_buf int_vector_buffer for the lcp array for which the Super-Cartesian tree representation should be calculated. - * The value_type of vec should be an unsigned integer type. - * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. - * \param bp_fc Reference to the first child bit_vector of bp. - * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. - * \par Time complexity - * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() - * \par Space complexity - * \f$\Order{2n}\f$ bits, by the multi_stack_support - */ -template -int_vector_size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer& lcp_buf, bit_vector& bp, bit_vector& bp_fc, const bool minimum=true) -{ - typedef int_vector_size_type size_type; - size_type n = lcp_buf.size(); - bp.resize(2*n); // resize bit vector for balanaced parantheses to 2 n bits - bp_fc.resize(n); - if (n == 0) // if n == 0 we are done - return 0; - size_type fc_cnt=0; // first child counter - util::set_to_value(bp, 0); - util::set_to_value(bp_fc, 0); - sorted_multi_stack_support vec_stack(n); - - size_type k=0; - size_type k_fc=0; // first child index - if (minimum) { - // hier noch ohne "lazy stack" trick - for (size_type i=0, x; i < n; ++i) { - x = lcp_buf[i]; - while (!vec_stack.empty() and x < vec_stack.top()) { - if (vec_stack.pop()) { - bp_fc[k_fc] = 1; - ++fc_cnt; - } - ++k; // writing a closing parenthesis, bp is already initialized to zeros - ++k_fc; // write a bit in first_child - } - vec_stack.push(x); - bp[k++] = 1; // writing an opening parenthesis - } - - } else { - // hier noch ohne "lazy stack" trick - for (size_type i=0, x; i < n; ++i) { - x = lcp_buf[i]; - while (!vec_stack.empty() and x > vec_stack.top()) { - if (vec_stack.pop()) { - bp_fc[k_fc] = 1; - ++fc_cnt; - } - ++k; // writing a closing parenthesis, bp is already initialized to zeros - ++k_fc; // write a bit in first_child - } - vec_stack.push(x); - bp[k++] = 1; // writing an opening parenthesis - } - } - while (!vec_stack.empty()) { - if (vec_stack.pop()) { - bp_fc[k_fc] = 1; - ++fc_cnt; - } - // writing a closing parenthesis in bp, not necessary as bp is initalized with zeros - ++k; - ++k_fc; - } -// assert( k == 2*vec.size() ); - return fc_cnt; -} - - -template -void construct_supercartesian_tree_bp_succinct2(const RandomAccessContainer& vec, bit_vector& bp, - SDSL_UNUSED const bool minimum=true) -{ - typedef typename RandomAccessContainer::size_type size_type; - bp.resize(2*vec.size()); // resize bit vector for balanced parentheses to 2 n bits - util::set_to_value(bp, 0); - sorted_stack_support vec_stack(vec.size()); // <- ist das ein Problem fuer int_vector_buffer - - size_type k=0; -// uint64_t wbuf=0; - for (size_type i=0/*, cnt64=0*/; i < vec.size(); ++i) { - while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) { - vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis - } - vec_stack.push(i); - bp[k++] = 1; // writing an opening parenthesis - while (i+1 < vec.size() and vec[i+1] >= vec[i]) { - vec_stack.push(++i); - bp[k++]; - } - } -#ifdef SDSL_DEBUG -// not neccessary as bp is already initialized to zero - while (vec_stack.size() > 0) { - vec_stack.pop(); - bp[k++] = 0; // writing a closing parenthesis - } - assert(k == 2*vec.size()); -#endif -} - -template -typename RandomAccessContainer::size_type construct_first_p_index(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) -{ - typedef typename RandomAccessContainer::size_type size_type; - size_type nr_of_first_indices = 0; - bp = bit_vector(vec.size(), 0); -// std::cerr<<"bp.size()="< 0 and vec[i] < vec[vec_stack.top()]) { - t = vec[vec_stack.top()]; - vec_stack.pop(); - if (vec_stack.size() == 0 or t != vec[vec_stack.top()]) { - bp[k] = 1; - ++nr_of_first_indices; - } - ++k; - - } - } else { - while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()]) { - t = vec[vec_stack.top()]; - vec_stack.pop(); - if (vec_stack.size() == 0 or t != vec[vec_stack.top()]) { - bp[k] = 1; - ++nr_of_first_indices; - } - ++k; - } - } - vec_stack.push(i); - } - while (vec_stack.size() > 0) { - size_type t = vec[vec_stack.top()]; - vec_stack.pop(); - if (vec_stack.size() == 0 or t != vec[vec_stack.top()]) { - bp[k] = 1; - ++nr_of_first_indices; - } - ++k; - } - assert(k == vec.size()); - return nr_of_first_indices; -} - -template -bit_vector::size_type construct_first_p_index(int_vector_buffer& lcp_buf, bit_vector& bp, const bool minimum=true) -{ - typedef bit_vector::size_type size_type; - size_type nr_of_first_indices = 0; - size_type n = lcp_buf.size(); - - bp = bit_vector(n, 0); - sorted_multi_stack_support vec_stack(n); - size_type k=0; - - if (minimum) { - for (size_type i = 0, x; i < n; ++i) { - x = lcp_buf[i]; - while (!vec_stack.empty() and x < vec_stack.top()) { - if (vec_stack.pop()) { - bp[k] = 1; - ++nr_of_first_indices; - } - ++k; - } - vec_stack.push(x); - } - } else { - for (size_type i = 0, x; i < n; ++i) { - x = lcp_buf[i]; - while (!vec_stack.empty() and x > vec_stack.top()) { - if (vec_stack.pop()) { - bp[k] = 1; - ++nr_of_first_indices; - } - ++k; - } - vec_stack.push(x); - } - } - - while (!vec_stack.empty()) { - if (vec_stack.pop()) { - bp[k] = 1; - ++nr_of_first_indices; - } - ++k; - } -// assert( k == vec.size() ); - return nr_of_first_indices; -} - -}// end namespace algorithm - -}// end namespace sdsl - -#endif - diff --git a/include/sdsl/algorithms_for_balanced_parentheses.hpp b/include/sdsl/bp_support_algorithm.hpp similarity index 99% rename from include/sdsl/algorithms_for_balanced_parentheses.hpp rename to include/sdsl/bp_support_algorithm.hpp index c2e6f7507..6e261ab7d 100644 --- a/include/sdsl/algorithms_for_balanced_parentheses.hpp +++ b/include/sdsl/bp_support_algorithm.hpp @@ -15,11 +15,11 @@ along with this program. If not, see http://www.gnu.org/licenses/ . */ /*! \file algorithms_for_balanced_parentheses.hpp - \brief algorithms.hpp contains algorithms for balanced parentheses sequences. + \brief bp_support_algorithm.hpp contains algorithms for balanced parentheses sequences. \author Simon Gog */ -#ifndef INCLUDED_SDSL_ALGORITHMS_FOR_BALANCED_PARENTHESES -#define INCLUDED_SDSL_ALGORITHMS_FOR_BALANCED_PARENTHESES +#ifndef INCLUDED_SDSL_BP_SUPPORT_ALGORITHM +#define INCLUDED_SDSL_BP_SUPPORT_ALGORITHM #include "int_vector.hpp" // for bit_vector #include // for calculate_pioneers_bitmap method diff --git a/include/sdsl/bp_support_g.hpp b/include/sdsl/bp_support_g.hpp index f496a9ed5..0efa2f51f 100644 --- a/include/sdsl/bp_support_g.hpp +++ b/include/sdsl/bp_support_g.hpp @@ -26,7 +26,7 @@ #include "rmq_support.hpp" #include "rank_support.hpp" #include "select_support.hpp" -#include "algorithms.hpp" +#include "bp_support_algorithm.hpp" #include "util.hpp" #include #include @@ -138,20 +138,20 @@ class bp_support_g util::init_support(m_select_bp, bp); bit_vector pioneer; // calulate pioneers - algorithm::calculate_pioneers_bitmap(*m_bp, m_block_size, pioneer); + calculate_pioneers_bitmap(*m_bp, m_block_size, pioneer); m_nnd = nnd_type(pioneer); m_pioneer_bp.resize(m_nnd.ones()); for (size_type i=1; i<= m_nnd.ones(); ++i) // replace this by an iterator!!! see todo for the nnd data structure m_pioneer_bp[i-1] = (*m_bp)[m_nnd.select(i)]; util::init_support(m_rank_pioneer_bp, &m_pioneer_bp); - algorithm::calculate_pioneers_bitmap(m_pioneer_bp, m_block_size, pioneer); + calculate_pioneers_bitmap(m_pioneer_bp, m_block_size, pioneer); m_nnd2 = nnd_type(pioneer); bit_vector pioneer_bp2 = bit_vector(m_nnd2.ones()); for (size_type i=1; i<= m_nnd2.ones(); ++i) // replace this by an iterator!!! see todo for the nnd data structure pioneer_bp2[i-1] = m_pioneer_bp[m_nnd2.select(i)]; - algorithm::calculate_matches(pioneer_bp2, m_match); - algorithm::calculate_enclose(pioneer_bp2, m_enclose); + calculate_matches(pioneer_bp2, m_match); + calculate_enclose(pioneer_bp2, m_enclose); m_range_max_match = rmq_type(&m_match); } @@ -231,11 +231,11 @@ class bp_support_g return i; } size_type mi = 0; // match for i - if ((mi=algorithm::near_find_close(*m_bp, i, m_block_size))==i) { + if ((mi=near_find_close(*m_bp, i, m_block_size))==i) { const size_type i2 = m_nnd.rank(i+1)-1; // lemma that this gives us an opening pioneer assert(m_pioneer_bp[i2]==1); // assert that i2 is an opening parenthesis size_type mi2 = 0; // match for i2 - if ((mi2=algorithm::near_find_close(m_pioneer_bp, i2, m_block_size)) == i2) { + if ((mi2=near_find_close(m_pioneer_bp, i2, m_block_size)) == i2) { const size_type i3 = m_nnd2.rank(i2+1)-1; const size_type mi3 = m_match[i3]; assert(mi3>i3); // assert that i3 is an opening parenthesis mi2 = m_nnd2.select(mi3+1); // matching pioneer position in pioneer_bp @@ -279,7 +279,7 @@ class bp_support_g return i; } size_type mi = 0; // match for i - if ((mi=algorithm::near_find_open(*m_bp, i, m_block_size)) == i) { + if ((mi=near_find_open(*m_bp, i, m_block_size)) == i) { const size_type i2 = m_nnd.rank(i); // lemma that this gives us an closing pioneer assert(m_pioneer_bp[i2]==0); // assert that i2 is an opening parenthesis const size_type mi2 = find_open_in_pioneers(i2); assert(m_pioneer_bp[mi2]==1); @@ -302,7 +302,7 @@ class bp_support_g inline size_type find_open_in_pioneers(size_type i)const { size_type mi = 0; // match for i - if ((mi=algorithm::near_find_open(m_pioneer_bp, i, m_block_size))==i) { + if ((mi=near_find_open(m_pioneer_bp, i, m_block_size))==i) { const size_type i3 = m_nnd2.rank(i); const size_type mi3 = m_match[i3]; assert(mi31 ); // mi is at greater or equal than 1 // note: mi and r are not in the same block @@ -428,7 +428,7 @@ class bp_support_g if (r_ > l_) { size_type min_ex_pos_ = r_; if (l_/m_block_size == r_/m_block_size) { - min_ex_pos_ = algorithm::near_rmq_open(m_pioneer_bp, l_, r_); + min_ex_pos_ = near_rmq_open(m_pioneer_bp, l_, r_); } else if (r_ < m_pioneer_bp.size()) { size_type min_ex_ = excess_pioneer(r_)+2*(m_pioneer_bp[r_]==0); const size_type bl_ = (l_/m_block_size+1)*m_block_size; @@ -450,13 +450,13 @@ class bp_support_g } if (min_ex_pos_ == r_) { // 2.1 - k = algorithm::near_rmq_open(m_pioneer_bp, br_, r_); + k = near_rmq_open(m_pioneer_bp, br_, r_); if (k < r_ and (ex=excess_pioneer(k)) < min_ex_) { min_ex_ = ex; min_ex_pos_ = k; } } // 2.3 - k = algorithm::near_rmq_open(m_pioneer_bp, l_, bl_); + k = near_rmq_open(m_pioneer_bp, l_, bl_); if (k < bl_ and (ex=excess_pioneer(k)) < min_ex_) { min_ex_ = ex; min_ex_pos_ = k; } @@ -471,13 +471,13 @@ class bp_support_g } if (min_ex_pos == r) { // 1.1 - k = algorithm::near_rmq_open(*m_bp, br, r); + k = near_rmq_open(*m_bp, br, r); if (k < r and (ex=excess(k)) < min_ex) { min_ex = ex; min_ex_pos = k; } } // 1.3 - k = algorithm::near_rmq_open(*m_bp, l, bl); + k = near_rmq_open(*m_bp, l, bl); if (k < bl and (ex=excess(k)) < min_ex) { min_ex = ex; min_ex_pos = k; } diff --git a/include/sdsl/bp_support_gg.hpp b/include/sdsl/bp_support_gg.hpp index 915641c45..5851ea9ef 100644 --- a/include/sdsl/bp_support_gg.hpp +++ b/include/sdsl/bp_support_gg.hpp @@ -25,7 +25,7 @@ #include "nearest_neighbour_dictionary.hpp" #include "rank_support.hpp" #include "select_support.hpp" -#include "algorithms.hpp" +#include "bp_support_algorithm.hpp" #include "util.hpp" #include #include @@ -140,7 +140,7 @@ class bp_support_gg util::init_support(m_select_bp, bp); { bit_vector pioneer; - algorithm::calculate_pioneers_bitmap_succinct(*m_bp, m_block_size, pioneer); + calculate_pioneers_bitmap_succinct(*m_bp, m_block_size, pioneer); util::assign(m_nnd, nnd_type(pioneer)); } @@ -239,7 +239,7 @@ class bp_support_gg return i; } size_type mi = 0; // match for i - if ((mi=algorithm::near_find_closing(*m_bp, i+1, 1, m_block_size))==i) { + if ((mi=near_find_closing(*m_bp, i+1, 1, m_block_size))==i) { const size_type i_ = m_nnd.rank(i+1)-1; // lemma that this gives us an opening pioneer assert(m_pioneer_bp[i_]==1); // assert that i2 is an opening parenthesis size_type mi_ = m_pioneer_bp_support->find_close(i_); assert(m_pioneer_bp[mi_]==0); @@ -249,7 +249,7 @@ class bp_support_gg size_type epb2 = excess(mi-1); // excess of first parenthesis in the pioneer block const size_type ei = excess(i); // excess at position i /* invariant: epb >= ei-1 */ //assert( epb+1 >= ei ); - return algorithm::near_find_closing(*m_bp, mi, epb2-ei+1, m_block_size); + return near_find_closing(*m_bp, mi, epb2-ei+1, m_block_size); } return mi; @@ -267,7 +267,7 @@ class bp_support_gg return i; } size_type mi = 0; // match for i - if ((mi=algorithm::near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) { + if ((mi=near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) { const size_type i_ = m_nnd.rank(i); // lemma that this gives us an closing pioneer assert(m_pioneer_bp[i_]==0); // assert that i' is an opening parenthesis const size_type mi_ = m_pioneer_bp_support->find_open(i_); assert(m_pioneer_bp[mi_]==1); @@ -277,7 +277,7 @@ class bp_support_gg size_type epb2 = excess(mi+1); // excess of last parenthesis in the pioneer block const size_type ei = excess(i); // excess at position i /*invariant: epb >= ei+1*/ //assert( epb >= ei+1 ); - return algorithm::near_find_opening(*m_bp, mi, epb2-ei+1-2*((*m_bp)[mi+1]), m_block_size); + return near_find_opening(*m_bp, mi, epb2-ei+1-2*((*m_bp)[mi+1]), m_block_size); } return mi; } @@ -296,7 +296,7 @@ class bp_support_gg if (exi == 1) // if i is not enclosed by a parentheses pair.. return size(); size_type ei; // enclose for i - if ((ei=algorithm::near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) { + if ((ei=near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) { const size_type i_ = m_nnd.rank(i); // next parenthesis in the pioneer bitmap size_type ei_; // enclose for i' ei_ = m_pioneer_bp_support->enclose(i_); @@ -306,7 +306,7 @@ class bp_support_gg // size_type epb = excess(ei); // excess of the last parenthesis in the pioneer block size_type epb2 = excess(ei+1); // excess of last parenthesis in the pioneer block /* invariant epb+1 >= exi */ //assert( epb+1 >= exi ); - return algorithm::near_find_opening(*m_bp, ei, epb2-exi+1+2*((*m_bp)[ei+1]==0), m_block_size); + return near_find_opening(*m_bp, ei, epb2-exi+1+2*((*m_bp)[ei+1]==0), m_block_size); } return ei; } @@ -342,7 +342,7 @@ class bp_support_gg size_type min_ex_pos = r; if (l/m_block_size == r/m_block_size) { - min_ex_pos = algorithm::near_rmq_open(*m_bp, l, r); + min_ex_pos = near_rmq_open(*m_bp, l, r); } else { // parentheses pair does not start in the same block // muss nicht sein: assert( l>=1 ); // l is at greater or equal than 1 // note: l and r are not in the same block @@ -360,14 +360,14 @@ class bp_support_gg min_ex = excess(k); min_ex_pos = k; } else { // 1.1 - k = algorithm::near_rmq_open(*m_bp, (r/m_block_size)*m_block_size, r); + k = near_rmq_open(*m_bp, (r/m_block_size)*m_block_size, r); if (k < r) { assert(excess(k) < min_ex); min_ex = excess(k); min_ex_pos = k; } } // 1.3 - k = algorithm::near_rmq_open(*m_bp, l, (l/m_block_size+1)*m_block_size); + k = near_rmq_open(*m_bp, l, (l/m_block_size+1)*m_block_size); if (k < (l/m_block_size+1)*m_block_size and (ex=excess(k)) < min_ex) { min_ex = ex; min_ex_pos = k; } diff --git a/include/sdsl/bp_support_sada.hpp b/include/sdsl/bp_support_sada.hpp index 1e49a4d45..1589f2bd0 100644 --- a/include/sdsl/bp_support_sada.hpp +++ b/include/sdsl/bp_support_sada.hpp @@ -25,7 +25,7 @@ #include "int_vector.hpp" #include "rank_support.hpp" #include "select_support.hpp" -#include "algorithms.hpp" +#include "bp_support_algorithm.hpp" #include "fast_cache.hpp" #include #include @@ -203,7 +203,7 @@ class bp_support_sada size_type fwd_excess(size_type i, difference_type rel)const { size_type j; // (1) search the small block for the answer - if ((j = algorithm::near_fwd_excess(*m_bp, i+1, rel, t_sml_blk)) > i) { + if ((j = near_fwd_excess(*m_bp, i+1, rel, t_sml_blk)) > i) { return j; } difference_type desired_excess = excess(i)+rel; @@ -251,7 +251,7 @@ class bp_support_sada return rel == 0 ? -1 : size(); } // (1) search the small block for the answer - if ((j = algorithm::near_bwd_excess(*m_bp, i-1, rel, t_sml_blk)) < i or j == (size_type)-1) { + if ((j = near_bwd_excess(*m_bp, i-1, rel, t_sml_blk)) < i or j == (size_type)-1) { return j; } difference_type desired_excess = excess(i)+rel; @@ -303,7 +303,7 @@ class bp_support_sada difference_type max_ex = ex + (m_sml_block_min_max[2*sml_block_idx+1] - 1); if (min_ex <= desired_excess and desired_excess <= max_ex) { - size_type j = algorithm::near_bwd_excess(*m_bp, (sml_block_idx+1)*t_sml_blk-1, desired_excess-excess((sml_block_idx+1)*t_sml_blk), t_sml_blk); + size_type j = near_bwd_excess(*m_bp, (sml_block_idx+1)*t_sml_blk-1, desired_excess-excess((sml_block_idx+1)*t_sml_blk), t_sml_blk); return j; } --sml_block_idx; @@ -326,7 +326,7 @@ class bp_support_sada difference_type min_ex = ex + (1 - ((difference_type)m_sml_block_min_max[2*sml_block_idx])); difference_type max_ex = ex + m_sml_block_min_max[2*sml_block_idx+1] - 1; if (min_ex <= desired_excess and desired_excess <= max_ex) { - size_type j = algorithm::near_fwd_excess(*m_bp, sml_block_idx*t_sml_blk, desired_excess-ex, t_sml_blk); + size_type j = near_fwd_excess(*m_bp, sml_block_idx*t_sml_blk, desired_excess-ex, t_sml_blk); return j; } ++sml_block_idx; @@ -654,13 +654,13 @@ class bp_support_sada size_type sbr = sml_block_idx(r); difference_type min_rel_ex = 0; if (sbl == sbr) { // if l and r are in the same small block - return algorithm::near_rmq(*m_bp, l, r, min_rel_ex); + return near_rmq(*m_bp, l, r, min_rel_ex); } else { difference_type min_ex = 0; // current minimal excess value size_type min_pos = 0; // current min pos enum min_pos_type {POS, SMALL_BLOCK_POS, MEDIUM_BLOCK_POS}; enum min_pos_type pos_type = POS; // current - min_pos = algorithm::near_rmq(*m_bp, l, (sbl+1)*t_sml_blk-1, min_rel_ex); // scan the leftmost small block of l + min_pos = near_rmq(*m_bp, l, (sbl+1)*t_sml_blk-1, min_rel_ex); // scan the leftmost small block of l assert(min_pos >= l); min_ex = excess(l) + min_rel_ex; @@ -744,7 +744,7 @@ class bp_support_sada pos_type = SMALL_BLOCK_POS; } // search in the small block of r - temp = algorithm::near_rmq(*m_bp, sbr*t_sml_blk, r, min_rel_ex); // scan the small block of r + temp = near_rmq(*m_bp, sbr*t_sml_blk, r, min_rel_ex); // scan the small block of r if ((excess(sbr*t_sml_blk) + min_rel_ex) <= min_ex) { // if it contains the minimum return its position assert(temp>=l and temp<=r); return temp; @@ -759,7 +759,7 @@ class bp_support_sada pos_type = SMALL_BLOCK_POS; } if (pos_type == SMALL_BLOCK_POS) { - min_pos = algorithm::near_rmq(*m_bp, min_pos*t_sml_blk, (min_pos+1)*t_sml_blk-1, min_rel_ex); + min_pos = near_rmq(*m_bp, min_pos*t_sml_blk, (min_pos+1)*t_sml_blk-1, min_rel_ex); assert(min_pos >=l and min_pos <= r); } return min_pos; diff --git a/include/sdsl/csa_bitcompressed.hpp b/include/sdsl/csa_bitcompressed.hpp index 357dbda7b..c3e40b5bf 100644 --- a/include/sdsl/csa_bitcompressed.hpp +++ b/include/sdsl/csa_bitcompressed.hpp @@ -24,7 +24,6 @@ #include "int_vector.hpp" #include "sdsl_concepts.hpp" #include "suffix_array_helper.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "util.hpp" #include "csa_sampling_strategy.hpp" @@ -137,7 +136,7 @@ class csa_bitcompressed sa_sample_type tmp_sample(config); m_sa.swap(tmp_sample); } - algorithm::set_isa_samples(sa_buf, m_isa); + set_isa_samples(sa_buf, m_isa); m_psi = psi_type(this); if (!store_to_file(m_isa, cache_file_name(constants::KEY_ISA,config), true)) { throw std::ios_base::failure("#csa_bitcompressed: Cannot store ISA to file system!"); diff --git a/include/sdsl/csa_sada.hpp b/include/sdsl/csa_sada.hpp index f583e2e57..9859493c1 100644 --- a/include/sdsl/csa_sada.hpp +++ b/include/sdsl/csa_sada.hpp @@ -23,7 +23,6 @@ #include "enc_vector.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "suffix_array_helper.hpp" #include "util.hpp" @@ -404,7 +403,7 @@ csa_sada(sa_buf, m_isa_sample); + set_isa_samples(sa_buf, m_isa_sample); mm::log("isa-sample-end"); } diff --git a/include/sdsl/csa_wt.hpp b/include/sdsl/csa_wt.hpp index 52d13af5a..0c98eb4d5 100644 --- a/include/sdsl/csa_wt.hpp +++ b/include/sdsl/csa_wt.hpp @@ -23,7 +23,6 @@ #include "wavelet_trees.hpp" #include "suffix_array_helper.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "util.hpp" #include "fast_cache.hpp" @@ -298,7 +297,7 @@ csa_wt::cs mm::log("sa-sample-end"); mm::log("isa-sample-begin"); - algorithm::set_isa_samples(sa_buf, m_isa_sample); + set_isa_samples(sa_buf, m_isa_sample); mm::log("isa-sample-end"); } diff --git a/include/sdsl/cst_sada.hpp b/include/sdsl/cst_sada.hpp index 8c1be2826..0905e57f4 100644 --- a/include/sdsl/cst_sada.hpp +++ b/include/sdsl/cst_sada.hpp @@ -23,7 +23,6 @@ #include "int_vector.hpp" #include "suffix_tree_helper.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "lcp_support_sada.hpp" #include "select_support_mcl.hpp" @@ -367,7 +366,7 @@ class cst_sada * \par Time complexity * \f$ \Order{1} \f$ * - * This method is used e.g. in the algorithm::count method. + * This method is used e.g. in the count method. */ size_type size(node_type v)const { size_type r = m_bp_support.find_close(v); diff --git a/include/sdsl/cst_sct3.hpp b/include/sdsl/cst_sct3.hpp index 745d52e53..8d53bb4ca 100644 --- a/include/sdsl/cst_sct3.hpp +++ b/include/sdsl/cst_sct3.hpp @@ -23,7 +23,6 @@ #include "int_vector.hpp" #include "suffix_tree_helper.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "lcp.hpp" #include "bp_support.hpp" @@ -1079,7 +1078,7 @@ cst_sct3::cst_sct3(cache_config& config, boo { mm::log("bps-sct-begin"); int_vector_buffer<> lcp_buf(cache_file_name(constants::KEY_LCP, config)); - m_nodes = algorithm::construct_supercartesian_tree_bp_succinct_and_first_child(lcp_buf, m_bp, m_first_child) + m_bp.size()/2; + m_nodes = construct_supercartesian_tree_bp_succinct_and_first_child(lcp_buf, m_bp, m_first_child) + m_bp.size()/2; if (m_bp.size() == 2) { // handle special case, when the tree consists only of the root node m_nodes = 1; } diff --git a/include/sdsl/experimental/bp_support_j.hpp b/include/sdsl/experimental/bp_support_j.hpp deleted file mode 100644 index a52cf976e..000000000 --- a/include/sdsl/experimental/bp_support_j.hpp +++ /dev/null @@ -1,512 +0,0 @@ -/* sdsl - succinct data structures library - Copyright (C) 2009 Simon Gog - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see http://www.gnu.org/licenses/ . -*/ -/*! \file bp_support_j.hpp - \brief bp_support_j.hpp contains an implementation of a balanced parentheses support data structure. - \author Simon Gog -*/ -#ifndef INCLUDED_SDSL_BP_SUPPORT_J -#define INCLUDED_SDSL_BP_SUPPORT_J - -#include "int_vector.hpp" -#include "rank_support.hpp" -#include "select_support.hpp" -#include "algorithms.hpp" -#include -#include -#include -#include - -namespace sdsl -{ - -//! A class that provides support for bit_vectors that represent a balanced parentheses sequence. Implementation was proposed by Jacobson (1989) and Geary et al. (CPM 2004). -/*! An opening parenthesis is represented by a 1 in the bit_vector and a closing parenthesis by a 0. - * This class could be parametrized by a rank_support and select_support. - * @ingroup bps - */ -template, class SelectSupport = select_support_mcl<1> > -class bp_support_j -{ - public: - typedef bit_vector::size_type size_type; - private: - const bit_vector* m_bp; // the supported balanced parentheses sequence as bit_vector - RankSupport m_rank; // a rank dictionary for calculation of excess - SelectSupport m_select; // additional select dictionary - - bit_vector m_pioneer_bitmap; // bitmap for pioneer positions - RankSupport m_rank_pioneer_bitmap; // - int_vector<> m_pioneer; // - - bit_vector m_enclose_pioneer_bitmap; // bitmap for enclose pioneer positions - RankSupport m_rank_enclose_pioneer_bitmap; // - int_vector<> m_enclose_pioneer; // - - uint32_t m_block_size; - uint32_t m_blocks; // number of blocks - size_type m_size; - - - // TODO: implement this space efficient!!! with the sorted_stack_support!!! 2009-12-03 - //TODO: replace this by a call of algorithm::calculate_pioneer... - void calculate_pioneers_bitmap() { - std::map pioneer_matches; - m_pioneer_bitmap.resize(m_size); // resize pioneers bitmap - util::set_zero_bits(m_pioneer_bitmap); // initialize bitmap with zeros - - std::map pioneer_matches_for_enclose; - m_enclose_pioneer_bitmap.resize(m_size); // resize pioneers bitmap for enclose - util::set_zero_bits(m_enclose_pioneer_bitmap); // initialize bitmap with zeros - -// algorithm::calculate_pioneers_bitmap(*m_bp, m_block_size, m_pioneer_bitmap); -// algorithm::calculate_matches_for_pioneers(*m_bp, m_pioneer_bitmap, m_pioneer); - - std::stack opening_parenthesis; - - // calculate positions of findclose and findopen pioneers - for (size_type block_nr = 0; block_nr < m_blocks; ++block_nr) { - std::map block_and_position; // for find_open and find_close - std::map matching_position; // for find_open and find_close - std::map block_and_position_for_enclose; // for enclose - std::map matching_position_for_enclose; // for enclose - for (size_type i=0, j=block_nr*m_block_size; i < m_block_size and j < m_size; ++i, ++j) { - if ((*m_bp)[j]) {//opening parenthesis - if (!opening_parenthesis.empty()) { - size_type position = opening_parenthesis.top(); - size_type blockpos = position/m_block_size; -// if( block_and_position_for_enclose.find(blockpos) == block_and_position_for_enclose.end() ){ // smallest j is pioneer - block_and_position_for_enclose[blockpos] = position; - matching_position_for_enclose[blockpos] = j; -// } - } - opening_parenthesis.push(j); - } else { // closing parenthesis - size_type position = opening_parenthesis.top(); - size_type blockpos = position/m_block_size; - opening_parenthesis.pop(); - block_and_position[blockpos] = position; - matching_position[blockpos] = j; // greatest j is pioneer - } - } - for (std::map::const_iterator it = block_and_position.begin(), - end = block_and_position.end(), - mit = matching_position.begin(); it != end and it->first != block_nr; ++it, ++mit) { - // opening and closing pioneers are symmetric - m_pioneer_bitmap[it->second] = 1; - pioneer_matches[it->second] = mit->second; - m_pioneer_bitmap[mit->second] = 1; - pioneer_matches[mit->second] = it->second; - } - for (std::map::const_iterator it = block_and_position_for_enclose.begin(), - end = block_and_position_for_enclose.end(), - mit = matching_position_for_enclose.begin(); it != end and it->first != block_nr; ++it, ++mit) { - m_enclose_pioneer_bitmap[mit->second] = 1; - pioneer_matches_for_enclose[mit->second] = it->second ; - } - - } - // assert that the sequence is balanced - assert(opening_parenthesis.empty()); - // store matching positions of pioneers - m_pioneer.width(bit_magic::l1BP(m_size)+1); - m_pioneer.resize(pioneer_matches.size()); - size_type cnt=0; - for (std::map::const_iterator mit = pioneer_matches.begin(); mit!= pioneer_matches.end(); ++mit) { - m_pioneer[cnt++] = mit->second; - } - - // initialize the rank dictionary for the pioneer bitmap - util::init_support(m_rank_pioneer_bitmap, &m_pioneer_bitmap); - - // store matching positions of enclose pioneers - m_enclose_pioneer.width(bit_magic::l1BP(m_size)+1); - m_enclose_pioneer.resize(pioneer_matches_for_enclose.size()); - cnt = 0; - for (std::map::const_iterator mit = pioneer_matches_for_enclose.begin(); mit != pioneer_matches_for_enclose.end(); ++mit) { - m_enclose_pioneer[cnt++] = mit->second; - } - // initialize the rank dictionary for the enclose pioneer bitmap - util::init_support(m_rank_enclose_pioneer_bitmap, &m_enclose_pioneer_bitmap); - - /* if(m_size<120 and m_size>0){ - std::cerr<<"bp"<::const_iterator mit = pioneer_matches.begin(); mit!= pioneer_matches.end(); ++mit){ - std::cerr<<"_"<first<<" "<second<size()) { -// assert(m_block_size > 0 and m_block_size <= 8192 and m_block_size%64 == 0); - m_blocks = (m_size+m_block_size-1)/m_block_size; - util::init_support(m_rank, m_bp); - util::init_support(m_select, m_bp); - calculate_pioneers_bitmap(); - } - - //! Copy constructor - bp_support_j(const bp_support_j& bp_support) { - copy(bp_support); - } - - //! Assignment operator - bp_support_j& operator=(const bp_support_j& bp_support) { - if (this != &bp_support) { - copy(bp_support); - } - return *this; - } - - void set_vector(const bit_vector* bp) { - m_bp = bp; - m_rank.set_vector(bp); - m_select.set_vector(bp); - } - - /*! Calculates the excess value at index i. - * \param i The index of which the excess value should be calculated. - */ - size_type excess(size_type i)const { - return (m_rank(i+1)<<1)-i-1; - } - - /*! Returns the number of opening parentheses up to and including index i. - * \pre{ \f$ 0\leq i < size() \f$ } - */ - size_type rank(size_type i)const { - return m_rank(i+1); - } - - /*! Returns the index of the i-th opening parenthesis. - * \param i Number of the parenthesis to select. - * \pre{ \f$1\leq i < rank(size())\f$ } - * \post{ \f$ 0\leq select(i) < size() \f$ } - */ - size_type select(size_type i)const { - return m_select(i); - } - - /*! Calculate the index of the matching closing parenthesis to the parenthesis at index i. - * \param i Index of an parenthesis. 0 <= i < size(). - * \return * i, if the parenthesis at index i is closing, - * * the position j of the matching closing parenthesis, if a matching parenthesis exists, - * * size() if no matching closing parenthesis exists. - */ - size_type find_close(size_type i)const { -#ifdef SDSL_DEBUG_BP - if (i >= m_size) { - throw std::out_of_range("OUT_OF_RANGE: bp_support_j::find_close"); - } -#endif - if (!(*m_bp)[i]) {// if there is a closing parenthesis at index i return i - return i; - } - if (m_pioneer_bitmap[i]) { - return m_pioneer[m_rank_pioneer_bitmap.rank(i)]; - } else { - size_type ip1 = i+1; // move one position to the left - uint64_t byte_prefix_sums_x_2; - const uint64_t* data = m_bp->data() + (ip1>>6); - uint8_t offset = ip1&0x3F; - uint64_t w = (~(*data))>>offset; // add $offset$ opening parenthesis at the beginning of the word - - uint8_t pos = bit_magic::first_excess_position(w, 1, byte_prefix_sums_x_2); - if (pos != 64) {// found position of matching parenthesis - return ip1+pos; - } else { // the excess value at the end of w is lower or equal zero - assert((i%64) != 0); - offset = (offset + 63)&0x3F; - // search the pioneer bitmap for the closesd preceding pioneer - pos = bit_magic::l1BP(*(m_pioneer_bitmap.data() + ((i-1)>>6)) & *(m_bp->data() + ((i-1)>>6)) & bit_magic::Li1Mask[offset]); - size_type pioneer_index = ((i-1)&0xFFFFFFFFFFFFFFC0ULL)+pos; - assert(m_pioneer_bitmap[pioneer_index] == 1); - size_type match_index = m_pioneer[m_rank_pioneer_bitmap.rank(pioneer_index)]; - assert((match_index&0xFFFFFFFFFFFFFFC0ULL) > i); - uint8_t excess_difference = excess((match_index&0xFFFFFFFFFFFFFFC0ULL)-1)-(excess(i)-1); - assert(excess_difference <=63); - return (match_index&0xFFFFFFFFFFFFFFC0ULL) - + bit_magic::first_excess_position(~*(m_bp->data()+(match_index>>6)) , excess_difference, byte_prefix_sums_x_2); - } - } - } - - //! Calculate the matching opening parenthesis to the closing parenthesis at position i - /*! \param i Index of a closing parenthesis. - * \return * i, if the parenthesis at index i is closing, - * * the position j of the matching opening parenthesis, if a matching parenthesis exists, - * * size() if no matching closing parenthesis exists. - */ - size_type find_open(size_type i)const { -#ifdef SDSL_DEBUG_BP - if (i >= m_size) { - throw std::out_of_range("OUT_OF_RANGE: bp_support_j::find_open"); - } -#endif - if ((*m_bp)[i]) {// if there is a opening parenthesis at index i return i - return i; - } - if (m_pioneer_bitmap[i]) { - return m_pioneer[m_rank_pioneer_bitmap.rank(i)]; - } else { - size_type im1 = i-1; // move one position to the right - const uint64_t* data = m_bp->data() + (im1>>6); - uint8_t close_parenthesis_index = (i&0x3F) + ((i==0)<<6); - uint8_t pos = bit_magic::find_open(*data, close_parenthesis_index); - if (pos!=64) { // found position of the matching parenthesis - return (im1&0xFFFFFFFFFFFFFFC0ULL)+pos; - } else { - assert((i%64)!=63); - // search the pioneer bitmap for the closest succeeding pioneer - pos = bit_magic::r1BP(*(m_pioneer_bitmap.data() + ((i+1)>>6)) & ~*(m_bp->data() + ((i+1)>>6)) & bit_magic::Li0Mask[i&0x3F]); - size_type pioneer_index = ((i+1)&0xFFFFFFFFFFFFFFC0ULL)+pos; - assert(m_pioneer_bitmap[pioneer_index] == 1); - size_type match_index = m_pioneer[m_rank_pioneer_bitmap.rank(pioneer_index)]; - assert(match_index < i); - int8_t excess_difference = excess(i); - if (match_index >= 64) { - excess_difference -= excess((match_index&0xFFFFFFFFFFFFFFC0ULL)-1); - } - assert(excess_difference >=-64 and excess_difference <= 64); - uint64_t dummy; - if (excess_difference >= 0) { - return (match_index&0xFFFFFFFFFFFFFFC0ULL) - + bit_magic::last_excess_position(*(m_bp->data()+(match_index>>6)), excess_difference, dummy) + 1; - } else { - return (match_index&0xFFFFFFFFFFFFFFC0ULL) - + bit_magic::last_excess_position(~*(m_bp->data()+(match_index>>6)), -excess_difference, dummy) + 1; - } - } - } - } - - //! Calculate the index of the opening parenthesis corresponding to the closest matching parenthesis pair enclosing i. - /*! \param i Index of an opening parenthesis. - * \return The index of the opening parenthesis corresponding to the closest matching parenthesis pair enclosing i, - * or size() if no such pair exists. - */ - size_type enclose(size_type i)const { -#ifdef SDSL_DEBUG_BP - if (i >= m_size) { - throw std::out_of_range("OUT_OF_RANGE: bp_support_j::enclose."); - } -#endif - if (!(*m_bp)[i]) { // if there is closing parenthesis at position i - throw std::logic_error("LOGIC_ERROR: bp_support_j::enclose. A opening parenthesis is expected as argument."); - } - if (m_enclose_pioneer_bitmap[i]) { - return m_enclose_pioneer[m_rank_enclose_pioneer_bitmap.rank(i)]; - } else { - if (i==0) - return size(); - size_type im1 = i-1; // move one position to the right - const uint64_t* data = m_bp->data() + (im1>>6); - uint8_t open_parenthesis_index = (i&0x3F) + ((i==0)<<6); - uint8_t pos = bit_magic::find_open(*data, open_parenthesis_index); - if (pos!=64) { - return (im1&0xFFFFFFFFFFFFFFC0ULL)+pos; - } else { - assert((i%64)!= 63); - // search the pioneer bitmap for the closest succeeding pioneer - uint64_t w = *(m_enclose_pioneer_bitmap.data() + ((i+1)>>6)) & bit_magic::Li0Mask[i&0x3F]; - if (w) { - pos = bit_magic::r1BP(*(m_enclose_pioneer_bitmap.data() + ((i+1)>>6)) & bit_magic::Li0Mask[i&0x3F]); - size_type pioneer_index = ((i+1)&0xFFFFFFFFFFFFFFC0ULL)+pos; - assert(m_enclose_pioneer_bitmap[pioneer_index] == 1); - size_type match_index = m_enclose_pioneer[m_rank_enclose_pioneer_bitmap.rank(pioneer_index)]; - assert(match_index < i); - int8_t excess_difference = excess(i)-2; - if (match_index >= 64) { - excess_difference -= excess((match_index&0xFFFFFFFFFFFFFFC0ULL)-1); - } - assert(excess_difference >=-64 and excess_difference <= 64); - uint64_t dummy; - if (excess_difference >= 0) { - pos = bit_magic::last_excess_position(*(m_bp->data()+(match_index>>6)), excess_difference, dummy); - if (pos==64) - return match_index&0xFFFFFFFFFFFFFFC0ULL; - else - return (match_index&0xFFFFFFFFFFFFFFC0ULL) + pos + 1; -// return (match_index&0xFFFFFFFFFFFFFFC0ULL) -// + bit_magic::last_excess_position( *(m_bp.data()+(match_index>>6)), excess_difference, dummy) + 1; - } else { - pos = bit_magic::last_excess_position(~*(m_bp->data()+(match_index>>6)), -excess_difference, dummy); - if (pos==64) - return match_index&0xFFFFFFFFFFFFFFC0ULL; - else - return (match_index&0xFFFFFFFFFFFFFFC0ULL) + pos + 1; -// return (match_index&0xFFFFFFFFFFFFFFC0ULL) -// + bit_magic::last_excess_position( ~*(m_bp.data()+(match_index>>6)), -excess_difference, dummy) + 1; - } - } else { // there exists no pioneer => there exists no enclosing parentheses pair - return m_size; - } - } - } - } - - //! The range restricted enclose operation. - /*! \param i Index of an opening parenthesis. - \param j Index of an opening parenthesis/ \f$ i i and j < m_size); - size_type mi = find_close(i); // matching parenthesis to i - assert(mi > i and mi < j); - assert(find_close(j) > j); - size_type k = enclose(j); - if (k == m_size or k < i) // there exists no opening parenthesis at position mi mi); - return kk; - } - - size_type rr_enclose_naive(size_type i, size_type j)const { - return rr_enclose(i, j); - } - - //! The double enclose operation - /*! \param i Index of an opening parenthesis. - * \param j Index of an opening parenthesis \f$ ifindclose(j) \f$. - * If such a k does not exists, double_enclose(i,j) returns size(). - */ - size_type double_enclose(size_type i, size_type j)const { - assert(j > i); - assert((*m_bp)[i]==1 and (*m_bp)[j]==1); - size_type k = rr_enclose(i, j); - if (k == size()) - return enclose(j); - else - return enclose(k); - } - - //! Return the number of zeros which procede position i in the balanced parentheses sequence. - /*! \param i Index of an parenthesis. - */ - size_type preceding_closing_parentheses(size_type i)const { - assert(i < m_size); - if (!i) return 0; - size_type ones = m_rank(i); - if (ones) { // ones > 0 - assert(m_select(ones) < i); - return i - m_select(ones) - 1; - } else { - return i; - } - } - - /*! The size of the supported balanced parentheses sequence. - * \return the size of the supported balanced parentheses sequence. - */ - size_type size() const { - return m_size; - } - - //! Serializes the bp_support_j to a stream. - /*! - * \param out The outstream to which the data structure is written. - * \return The number of bytes written to out. - */ - size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - size_type written_bytes = 0; - written_bytes += m_rank.serialize(out, child, "bp_rank"); - written_bytes += m_select.serialize(out, child, "bp_select"); - - written_bytes += m_pioneer_bitmap.serialize(out, child, "pioneer_bitmap"); - written_bytes += m_rank_pioneer_bitmap.serialize(out, child, "pioneer_bitmap_rank"); - written_bytes += m_pioneer.serialize(out, child, "pioneer"); - - written_bytes += m_enclose_pioneer_bitmap.serialize(out, child, "enclose"); - written_bytes += m_rank_enclose_pioneer_bitmap.serialize(out, child, "enclose_rank"); - written_bytes += m_enclose_pioneer.serialize(out, child, "enclose_pioneer"); - - structure_tree::add_size(child, written_bytes); - return written_bytes; - } - - //! Load the bp_support_j for a bit_vector v. - /*! - * \param in The instream from which the data strucutre is read. - * \param bp Bit vector representing a balanced parentheses sequence that is supported by this data structure. - */ - void load(std::istream& in, const bit_vector* bp) { - m_bp = bp; - if (m_bp == NULL) - return; - m_size = m_bp->size(); - m_block_size = 64; - m_blocks = (m_size+m_block_size-1)/m_block_size; - - m_rank.load(in, m_bp); - m_select.load(in, m_bp); - - m_pioneer_bitmap.load(in); - m_rank_pioneer_bitmap.load(in, &m_pioneer_bitmap); - m_pioneer.load(in); - - m_enclose_pioneer_bitmap.load(in); - m_rank_enclose_pioneer_bitmap.load(in, &m_enclose_pioneer_bitmap); - m_enclose_pioneer.load(in); - } -}; -}// end namespace sdsl - - - - -#endif diff --git a/include/sdsl/experimental/gap_vector.hpp b/include/sdsl/experimental/gap_vector.hpp deleted file mode 100644 index a1673fc4c..000000000 --- a/include/sdsl/experimental/gap_vector.hpp +++ /dev/null @@ -1,248 +0,0 @@ -/* sdsl - succinct data structures library - Copyright (C) 2012 Simon Gog - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see http://www.gnu.org/licenses/ . -*/ -/*! \file gap_vector.hpp - \brief gap_vector.hpp contains the sdsl::gap_vector class, and - classes which support rank and select for gap_vector. - \author Simon Gog -*/ -#ifndef INCLUDED_SDSL_GAP_VECTOR -#define INCLUDED_SDSL_GAP_VECTOR - -#include "int_vector.hpp" -#include "util.hpp" - -//! Namespace for the succinct data structure library -namespace sdsl -{ - -template// forward declaration needed for friend declaration -class gap_rank_support; // in gap_vector - -template// forward declaration needed for friend declaration -class gap_select_support; // in gap_vector - -//! A bit vector which compresses very sparse populated bit vectors by representing either 1's or 0's by gap encoding -template -class gap_vector -{ - public: - typedef bit_vector::size_type size_type; - typedef size_type value_type; - - friend class gap_rank_support; - friend class gap_select_support; - - typedef gap_rank_support rank_1_type; - typedef gap_select_support select_1_type; - private: - size_type m_size; - int_vector<> m_position; - - public: - gap_vector():m_size(0) {} - - gap_vector(const bit_vector& bv) { - m_size = bv.size(); - if (m_size == 0) - return; - size_type ones = util::get_one_bits(bv); - m_position = int_vector<>(ones, 0, bit_magic::l1BP(m_size)+1); - const uint64_t* bvp = bv.data(); - for (size_type i=0, one_cnt=0; i < (bv.size()+63)/64; ++i, ++bvp) { - if (*bvp) { // if there is a one in the word - for (size_type j=0; j<64 and 64*i+j < bv.size(); ++j) // check each bit of the word - if (bv[64*i+j]) { - m_position[one_cnt++] = 64*i+j; - } - } - } - } - - //! Swap method - void swap(gap_vector& v) { - if (this != &v) { - std::swap(m_size, v.m_size); - m_position.swap(v.m_position); - } - } - - //! Accessing the i-th element of the original bit_vector - /*! \param i An index i with \f$ 0 \leq i < size() \f$. - \return The i-th bit of the original bit_vector - \par Time complexity - \f$ \Order{\log m} \f$, where m equals the number of zeros - */ - value_type operator[](size_type i)const { - // binary search the entries in m_position - size_type lb=0, rb=m_position.size(), mid, pos; // start interval [lb,rb) - while (rb > lb) { - mid = (lb+rb)/2; // then mid>=lb mid pos) { - lb = mid+1; - } else if (i < pos) { - rb = mid; - } else { // i == pos - return 1; - } - } - return 0; - } - - //! Returns the size of the original bit vector. - size_type size()const { - return m_size; - } - - //! Serializes the data structure into the given ostream - size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { - size_type written_bytes = 0; - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - written_bytes += util::write_member(m_size, out, child, "size"); - written_bytes += m_position.serialize(out, child, "positions"); - structure_tree::add_size(child, written_bytes); - return written_bytes; - } - - //! Loads the data structure from the given istream. - void load(std::istream& in) { - util::read_member(m_size, in); - m_position.load(in); - } -}; - -template -class gap_rank_support -{ - public: - typedef bit_vector::size_type size_type; - typedef gap_vector bit_vector_type; - private: - const bit_vector_type* m_v; - - public: - - gap_rank_support(const bit_vector_type* v=NULL) { - set_vector(v); - } - - size_type rank(size_type i)const { - // binary search the entries in m_position - size_type lb=0, rb=m_v->m_position.size(), mid, pos=0; // start interval [lb,rb) - while (rb > lb) { - mid = (lb+rb)/2; // then mid>=lb midm_position[mid]; - if (i <= pos) { - rb = mid; - } else { - lb = mid+1; - } - } // m_position[rb] >= i - return rb; - } - - const size_type operator()(size_type i)const { - return rank(i); - } - - const size_type size()const { - return m_v->size(); - } - - void set_vector(const bit_vector_type* v=NULL) { - m_v = v; - } - - gap_rank_support& operator=(const gap_rank_support& rs) { - if (this != &rs) { - set_vector(rs.m_v); - } - return *this; - } - - void swap(gap_rank_support& rs) { } - - void load(std::istream& in, const bit_vector_type* v=NULL) { - set_vector(v); - } - - size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - size_type written_bytes = 0; - structure_tree::add_size(child, written_bytes); - return written_bytes; - } -}; - - - - -template -class gap_select_support -{ - public: - typedef bit_vector::size_type size_type; - typedef gap_vector bit_vector_type; - private: - const bit_vector_type* m_v; - - public: - gap_select_support(const bit_vector_type* v=NULL) { - set_vector(v); - } - - //! Returns the position of the i-th occurrence in the bit vector. - size_type select(size_type i)const { - return m_v->m_position[i-1]; - } - - const size_type operator()(size_type i)const { - return select(i); - } - - const size_type size()const { - return m_v->size(); - } - - void set_vector(const bit_vector_type* v=NULL) { - m_v = v; - } - - gap_select_support& operator=(const gap_select_support& rs) { - if (this != &rs) { - set_vector(rs.m_v); - } - return *this; - } - - void swap(gap_select_support& rs) { } - - void load(std::istream& in, const bit_vector_type* v=NULL) { - set_vector(v); - } - - size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - size_type written_bytes = 0; - structure_tree::add_size(child, written_bytes); - return written_bytes; - } -}; - -} - -#endif diff --git a/include/sdsl/experimental/rank_support_jmc.hpp b/include/sdsl/experimental/rank_support_jmc.hpp deleted file mode 100644 index cb95e3439..000000000 --- a/include/sdsl/experimental/rank_support_jmc.hpp +++ /dev/null @@ -1,179 +0,0 @@ -/* sdsl - succinct data structures library - Copyright (C) 2008 Simon Gog - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see http://www.gnu.org/licenses/ . -*/ -/*! \file rank_support_jmc.hpp - \brief rank_support_jmc.hpp contains classes that support a sdsl::bit_vector with constant time rank information. - \author Simon Gog -*/ -#ifndef INCLUDED_SDSL_RANK_SUPPORT_JMC -#define INCLUDED_SDSL_RANK_SUPPORT_JMC - -#include "rank_support.hpp" -#include "int_vector.hpp" - -//! Namespace for the succinct data structure library. -namespace sdsl -{ - -//! A class supporting rank queries in constant time. The implementation is a lightweight version of the data structure proposed by Jacobson (1989), Munro (1996), and Clark (1996). -/*! - * - TODO: Space complexity - * @ingroup rank_support_group - */ -class rank_support_jmc : public rank_support -{ - public: - typedef bit_vector bit_vector_type; - private: - size_type m_logn; - int_vector<0> m_superblockrank; - int_vector<0> m_blockrank; - public: - explicit rank_support_jmc(const int_vector<1>* v=NULL); - rank_support_jmc(const rank_support_jmc& rs); - inline const size_type rank(size_type idx) const; - inline const size_type operator()(size_type idx)const; - size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const; - void load(std::istream& in, const int_vector<1>* v=NULL); - void set_vector(const int_vector<1>* v); - //! Assign Operator - /*! Required for the Assignable Concept of the STL. - */ - rank_support_jmc& operator=(const rank_support_jmc& rs); - //! swap Operator - /*! Swap two rank_support_jmc in constant time. - Required for the Container Concept of the STL. - */ - void swap(rank_support_jmc& rs); -}; - -inline rank_support_jmc::rank_support_jmc(const rank_support_jmc& rs) : rank_support() -{ - set_vector(rs.m_v); - m_superblockrank = rs.m_superblockrank; - m_blockrank = rs.m_blockrank; -} - -inline rank_support_jmc& rank_support_jmc::operator=(const rank_support_jmc& rs) -{ - if (this != &rs) { - set_vector(rs.m_v); - m_superblockrank = rs.m_superblockrank; - m_blockrank = rs.m_blockrank; - } - return *this; -} - -inline void rank_support_jmc::swap(rank_support_jmc& rs) -{ - if (this != &rs) { // if rs and _this_ are not the same object - std::swap(m_logn, rs.m_logn); - m_superblockrank.swap(rs.m_superblockrank); - m_blockrank.swap(rs.m_blockrank); - } -} - -inline rank_support_jmc::rank_support_jmc(const int_vector<1>* v) -{ - m_logn = 0; - set_vector(v); - if (m_v == NULL) return; - if (m_v->empty()) { - m_blockrank.width(1); m_superblockrank.width(1); - m_blockrank.resize(1); m_superblockrank.resize(1); - m_blockrank[0] = 0; m_superblockrank[0] = 0; - return; - } - m_blockrank.width(12); - m_blockrank.resize((m_v->capacity()>>6) + (0==(m_v->size()&0x3F))); // n/64 + 2*loglog 64 - m_superblockrank.width(m_logn); - m_superblockrank.resize((m_blockrank.size()+63)>>6); - - m_blockrank[0]=0; - m_superblockrank[0]=0; - size_type cnt = 0, blockcnt = 0, wcnt = 0; - const uint64_t* data = m_v->data(); - size_type i; - for (i = 1; i < (m_v->capacity()>>6) ; ++i) { - wcnt = bit_magic::b1Cnt(*data); - ++data; - blockcnt += wcnt; - cnt += wcnt; - if ((i & 0x3F) == 0) { - m_superblockrank[i>>6] = cnt; - blockcnt = 0; - } - m_blockrank[i] = blockcnt; - } - if (0 == (m_v->size()&0x3F)) { - wcnt = bit_magic::b1Cnt(*data); - blockcnt += wcnt; - cnt += wcnt; - if ((i & 0x3F) == 0) { - m_superblockrank[i>>6] = cnt; - blockcnt = 0; - } - m_blockrank[i] = blockcnt; - } -} - -inline const rank_support_jmc::size_type rank_support_jmc::rank(size_type idx)const -{ - assert(m_v != NULL); - assert(idx <= m_v->size()); - if ((idx & 0x3F) ==0) - return m_blockrank[idx>>6] - + m_superblockrank[idx>>12]; - return - bit_magic::b1Cnt((*(m_v->data()+(idx>>6))&bit_magic::Li1Mask[idx & 0x3F])) - + m_blockrank[idx>>6] - + m_superblockrank[idx>>12]; -} - -inline const rank_support_jmc::size_type rank_support_jmc::operator()(size_type idx)const -{ - return rank(idx); -} - -inline rank_support_jmc::size_type rank_support_jmc::serialize(std::ostream& out, structure_tree_node* v, std::string name)const -{ - size_type written_bytes = 0; - structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); - written_bytes += m_blockrank.serialize(out, child, "blockrank"); - written_bytes += m_superblockrank.serialize(out, child, "superblockrank"); - structure_tree::add_size(child, written_bytes); - return written_bytes; -} - -inline void rank_support_jmc::load(std::istream& in, const int_vector<1>* v) -{ - set_vector(v); - assert(m_v != NULL); // supported bit vector should be known - m_blockrank.load(in); - m_superblockrank.load(in); -} - -inline void rank_support_jmc::set_vector(const int_vector<1>* v) -{ - if (v != NULL) { - m_v = v; - m_logn = bit_magic::l1BP(m_v->capacity())+1; - } -} - -}// end namespace sds - -#endif // end file diff --git a/include/sdsl/lcp_bitcompressed.hpp b/include/sdsl/lcp_bitcompressed.hpp index 7e4cb18b2..4c3d7294a 100644 --- a/include/sdsl/lcp_bitcompressed.hpp +++ b/include/sdsl/lcp_bitcompressed.hpp @@ -23,7 +23,6 @@ #include "lcp.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" namespace sdsl diff --git a/include/sdsl/lcp_byte.hpp b/include/sdsl/lcp_byte.hpp index 67370bbed..1eebf1989 100644 --- a/include/sdsl/lcp_byte.hpp +++ b/include/sdsl/lcp_byte.hpp @@ -23,7 +23,6 @@ #include "lcp.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include #include // for lower_bound diff --git a/include/sdsl/lcp_dac.hpp b/include/sdsl/lcp_dac.hpp index cb8ca34e9..4c9c7ce2e 100644 --- a/include/sdsl/lcp_dac.hpp +++ b/include/sdsl/lcp_dac.hpp @@ -23,7 +23,6 @@ #include "lcp.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "util.hpp" #include "rank_support_v5.hpp" diff --git a/include/sdsl/lcp_support_sada.hpp b/include/sdsl/lcp_support_sada.hpp index 63d91cc57..5d9262343 100644 --- a/include/sdsl/lcp_support_sada.hpp +++ b/include/sdsl/lcp_support_sada.hpp @@ -23,7 +23,6 @@ #include "lcp.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "csa_sada.hpp" // for default template initialization #include "select_support.hpp" // for default template initialization diff --git a/include/sdsl/lcp_vlc.hpp b/include/sdsl/lcp_vlc.hpp index 10a131bbe..71164610f 100644 --- a/include/sdsl/lcp_vlc.hpp +++ b/include/sdsl/lcp_vlc.hpp @@ -24,7 +24,6 @@ #include "lcp.hpp" #include "vlc_vector.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include #include diff --git a/include/sdsl/lcp_wt.hpp b/include/sdsl/lcp_wt.hpp index bdf914b35..19b4e0c37 100644 --- a/include/sdsl/lcp_wt.hpp +++ b/include/sdsl/lcp_wt.hpp @@ -24,7 +24,6 @@ #include "lcp.hpp" #include "wt_huff.hpp" #include "int_vector.hpp" -#include "algorithms.hpp" #include "iterators.hpp" #include "select_support_bs.hpp" #include "util.hpp" diff --git a/include/sdsl/suffix_array_helper.hpp b/include/sdsl/suffix_array_helper.hpp index 7b7be701c..425261379 100644 --- a/include/sdsl/suffix_array_helper.hpp +++ b/include/sdsl/suffix_array_helper.hpp @@ -571,7 +571,27 @@ class text_of_csa } }; +template +void set_isa_samples(int_vector_buffer& sa_buf, typename Csa::isa_sample_type& isa_sample) +{ + typedef typename Csa::size_type size_type; + size_type n = sa_buf.size(); + + isa_sample.width(bits::hi(n)+1); + if (n >= 1) { // so n+Csa::isa_sample_dens >= 2 + isa_sample.resize((n-1+Csa::isa_sample_dens-1)/Csa::isa_sample_dens + 1); + } + util::set_to_value(isa_sample, 0); + for (size_type i=0; i < n; ++i) { + size_type sa = sa_buf[i]; + if ((sa % Csa::isa_sample_dens) == 0) { + isa_sample[sa/Csa::isa_sample_dens] = i; + } else if (sa+1 == n) { + isa_sample[(sa+Csa::isa_sample_dens-1)/Csa::isa_sample_dens] = i; + } + } +} } diff --git a/include/sdsl/suffix_tree_helper.hpp b/include/sdsl/suffix_tree_helper.hpp index 9b54e28c6..112e96876 100644 --- a/include/sdsl/suffix_tree_helper.hpp +++ b/include/sdsl/suffix_tree_helper.hpp @@ -59,6 +59,276 @@ class cst_node_child_proxy iterator_type end() const { return iterator_type(m_cst,m_cst.root()); } }; +//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009). +/*! \param vec Random access container for which the Super-Cartesian tree representation should be calculated. + * The value_type of vec should be an unsigned integer type. + * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. + * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. + * \par Time complexity + * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() + * \par Space complexity + * \f$ \Order{n \cdot \log n } \f$ bits. + */ +template +void construct_supercartesian_tree_bp(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) +{ + typedef typename RandomAccessContainer::size_type size_type; + bp.resize(2*vec.size()); // resize bit vector for balanaced parantheses to 2 n bits + util::set_to_value(bp, 0); + std::stack vec_stack; + + size_type k=0; + for (size_type i=0; i < vec.size(); ++i) { + typename RandomAccessContainer::value_type l = vec[i]; + if (minimum) { + while (vec_stack.size() > 0 and l < vec_stack.top()) { + vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis + } + + } else { + while (vec_stack.size() > 0 and l > vec_stack.top()) { + vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis + } + } + vec_stack.push(l); + bp[k++] = 1; // writing an opening parenthesis + } + while (vec_stack.size() > 0) { + vec_stack.pop(); + bp[k++] = 0; // writing a closing parenthesis + } + assert(k == 2*vec.size()); +} + +//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009). +/*! \param vec Random access container for which the Super-Cartesian tree representation should be calculated. + * The value_type of vec should be an unsigned integer type. + * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. + * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. + * \par Time complexity + * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() + * \par Space complexity + * \f$\Order{n}\f$ bits, by the stack_support described in the paper "Optimal Succinctness For Range Minimum Queries" of Johannes Fischer. + */ +// TODO: sorted_multi_stack_support einbauen, RandomAccessContainer durch int_vector_buffer ersetzen +template +void construct_supercartesian_tree_bp_succinct(const RandomAccessContainer& vec, bit_vector& bp, const bool minimum=true) +{ + typedef typename RandomAccessContainer::size_type size_type; + bp.resize(2*vec.size()); // resize bit vector for balanced parentheses to 2 n bits + if (vec.size() > 0) { + util::set_to_value(bp, 0); + sorted_stack_support vec_stack(vec.size()); // <- ist das ein Problem fuer int_vector_buffer + + size_type k=0; + if (minimum) { + bp[k++] = 1; + for (size_type i=1; i < vec.size(); ++i) { + if (vec[i] < vec[i-1]) { + ++k; + while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) { + vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zero + } + } else { + vec_stack.push(i-1); // "lazy stack" trick: speed-up ca. 25% + } + bp[k++] = 1; // writing an opening parenthesis + } + /* + vec_stack.push(0); + bp[k++] = 1; + for(size_type i=1,j, start_run=1; i < vec.size(); ++i){ + if( vec[i] < vec[i-1] ){ + j = i; + while( --j >= start_run and vec[i] < vec[j]) ++k; + while(start_run <= j){ // auf den stack pushen + vec_stack.push(start_run++); + } + while( vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()] ){ + vec_stack.pop(); ++k; + } + start_run = i; + } + bp[k++] = 1; + } + */ + } else { + // hier noch ohne "lazy stack" trick + for (size_type i=0; i < vec.size(); ++i) { + while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()]) { + vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis + } + vec_stack.push(i); + bp[k++] = 1; // writing an opening parenthesis + } + } +#ifdef SDSL_DEBUG + // not necessary as bp is already initialized to zero + while (!vec_stack.empty()) { + vec_stack.pop(); + bp[k++] = 0; // writing a closing parenthesis + } + assert(k == 2*vec.size()); +#endif + } +} + +//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009). +/*! \param lcp_buf int_vector_buffer of the LCP Array for which the Super-Cartesian tree representation should be calculated. + * The value_type of vec should be an unsigned integer type. + * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. + * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. + * \par Time complexity + * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() + * \par Space complexity + * \f$\Order{2n}\f$ bits, by the multi_stack_support + */ +template +void construct_supercartesian_tree_bp_succinct(int_vector_buffer& lcp_buf, bit_vector& bp, const bool minimum=true) +{ + typedef int_vector_size_type size_type; + size_type n = lcp_buf.size(); + bp.resize(2*n); // resize bit vector for balanced parentheses to 2 n bits + if (n == 0) // if n == 0 we are done + return; + util::set_to_value(bp, 0); + sorted_multi_stack_support vec_stack(n); + + size_type k=0; + if (minimum) { + bp[k++] = 1; + size_type last = lcp_buf[0]; + for (size_type i=1, x; i < n; ++i) { + x = lcp_buf[i]; + if (x < last) { + ++k; // writing a closing parenthesis for last + while (!vec_stack.empty() and x < vec_stack.top()) { + vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zeros + } + } else { + vec_stack.push(last); // "lazy stack" trick: Beschleunigung: ca 25 % + } + bp[k++] = 1; // writing an opening parenthesis + last = x; + } + } else { + // hier noch ohne "lazy stack" trick + for (size_type i=0, x; i < n; ++i) { + x = lcp_buf[i]; + while (!vec_stack.empty() and x > vec_stack.top()) { + vec_stack.pop(); ++k; // writing a closing parenthesis, bp is already initialized to zeros + } + vec_stack.push(x); + bp[k++] = 1; // writing an opening parenthesis + } + } +} + +//! Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE 2009) and the first_child bit_vector +/*! \param lcp_buf int_vector_buffer for the lcp array for which the Super-Cartesian tree representation should be calculated. + * The value_type of vec should be an unsigned integer type. + * \param bp Reference to the balanced parentheses sequence which represents the Super-Cartesian tree. + * \param bp_fc Reference to the first child bit_vector of bp. + * \param minimum Specifies if the higher levels contains minima or maxima. Default is maxima. + * \par Time complexity + * \f$ \Order{2n} \f$, where \f$ n=\f$vec.size() + * \par Space complexity + * \f$\Order{2n}\f$ bits, by the multi_stack_support + */ +template +int_vector_size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer& lcp_buf, bit_vector& bp, bit_vector& bp_fc, const bool minimum=true) +{ + typedef int_vector_size_type size_type; + size_type n = lcp_buf.size(); + bp.resize(2*n); // resize bit vector for balanaced parantheses to 2 n bits + bp_fc.resize(n); + if (n == 0) // if n == 0 we are done + return 0; + size_type fc_cnt=0; // first child counter + util::set_to_value(bp, 0); + util::set_to_value(bp_fc, 0); + sorted_multi_stack_support vec_stack(n); + + size_type k=0; + size_type k_fc=0; // first child index + if (minimum) { + // hier noch ohne "lazy stack" trick + for (size_type i=0, x; i < n; ++i) { + x = lcp_buf[i]; + while (!vec_stack.empty() and x < vec_stack.top()) { + if (vec_stack.pop()) { + bp_fc[k_fc] = 1; + ++fc_cnt; + } + ++k; // writing a closing parenthesis, bp is already initialized to zeros + ++k_fc; // write a bit in first_child + } + vec_stack.push(x); + bp[k++] = 1; // writing an opening parenthesis + } + + } else { + // hier noch ohne "lazy stack" trick + for (size_type i=0, x; i < n; ++i) { + x = lcp_buf[i]; + while (!vec_stack.empty() and x > vec_stack.top()) { + if (vec_stack.pop()) { + bp_fc[k_fc] = 1; + ++fc_cnt; + } + ++k; // writing a closing parenthesis, bp is already initialized to zeros + ++k_fc; // write a bit in first_child + } + vec_stack.push(x); + bp[k++] = 1; // writing an opening parenthesis + } + } + while (!vec_stack.empty()) { + if (vec_stack.pop()) { + bp_fc[k_fc] = 1; + ++fc_cnt; + } + // writing a closing parenthesis in bp, not necessary as bp is initalized with zeros + ++k; + ++k_fc; + } +// assert( k == 2*vec.size() ); + return fc_cnt; +} + + +template +void construct_supercartesian_tree_bp_succinct2(const RandomAccessContainer& vec, bit_vector& bp, + SDSL_UNUSED const bool minimum=true) +{ + typedef typename RandomAccessContainer::size_type size_type; + bp.resize(2*vec.size()); // resize bit vector for balanced parentheses to 2 n bits + util::set_to_value(bp, 0); + sorted_stack_support vec_stack(vec.size()); // <- ist das ein Problem fuer int_vector_buffer + + size_type k=0; +// uint64_t wbuf=0; + for (size_type i=0/*, cnt64=0*/; i < vec.size(); ++i) { + while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) { + vec_stack.pop(); ++k; /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis + } + vec_stack.push(i); + bp[k++] = 1; // writing an opening parenthesis + while (i+1 < vec.size() and vec[i+1] >= vec[i]) { + vec_stack.push(++i); + bp[k++]; + } + } +#ifdef SDSL_DEBUG +// not neccessary as bp is already initialized to zero + while (vec_stack.size() > 0) { + vec_stack.pop(); + bp[k++] = 0; // writing a closing parenthesis + } + assert(k == 2*vec.size()); +#endif +} + } #endif diff --git a/include/sdsl/test_index_performance.hpp b/include/sdsl/test_index_performance.hpp index bde3bc804..670de72a8 100644 --- a/include/sdsl/test_index_performance.hpp +++ b/include/sdsl/test_index_performance.hpp @@ -23,7 +23,6 @@ #include "int_vector.hpp" // for bit_vector and int_vector #include "util.hpp" // for -#include "algorithms.hpp" // for backward_search #include // for rand #include // for swap #include // for std::vector @@ -422,7 +421,7 @@ void test_cst_matching_statistics(const t_cst& cst, unsigned char* S2, uint64_t while (p2+1 > 0) { uint64_t lb, rb; // perform backward search on interval \f$ [i,j] \f$ - uint64_t size = algorithm::backward_search(cst.csa, i, j, S2[p2], lb, rb); + uint64_t size = backward_search(cst.csa, i, j, S2[p2], lb, rb); if (size > 0) { q = q + 1; i = lb; j = rb; diff --git a/lib/algorithms.cpp b/lib/algorithms.cpp deleted file mode 100644 index b57f80478..000000000 --- a/lib/algorithms.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* sdsl - succinct data structures library - Copyright (C) 2008 Simon Gog - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see http://www.gnu.org/licenses/ . -*/ -#include "sdsl/algorithms.hpp" -#include -#include -#include -#include - -typedef std::map tMSS; - -namespace sdsl -{ - -double algorithm::H_0(const unsigned char* c) -{ - size_t n = strlen((const char*)c); - if (n==0) - return 0; - size_t cnt[256] = {}; - double res = 0; - for (size_t i=0; i0) { - res += (double)cnt[i]/(double)n * log2((double)n/(double)cnt[i]); -// std::cerr<<"cnt["< #include diff --git a/include/sdsl/lcp_support_tree2.hpp b/include/sdsl/lcp_support_tree2.hpp index 4d80c600d..a6c3d1355 100644 --- a/include/sdsl/lcp_support_tree2.hpp +++ b/include/sdsl/lcp_support_tree2.hpp @@ -3,9 +3,9 @@ #include "lcp.hpp" #include "util.hpp" -#include "algorithms_for_compressed_suffix_trees.hpp" #include "rank_support_v.hpp" #include "wt_huff.hpp" +#include "sorted_multi_stack_support.hpp" #include #include diff --git a/include/sdsl/suffix_array_helper.hpp b/include/sdsl/suffix_array_helper.hpp index 425261379..1cf8635ad 100644 --- a/include/sdsl/suffix_array_helper.hpp +++ b/include/sdsl/suffix_array_helper.hpp @@ -24,7 +24,6 @@ #include #include #include -#include "algorithms_for_compressed_suffix_arrays.hpp" #include "iterators.hpp" namespace sdsl diff --git a/include/sdsl/suffix_tree_helper.hpp b/include/sdsl/suffix_tree_helper.hpp index 112e96876..f13683eb0 100644 --- a/include/sdsl/suffix_tree_helper.hpp +++ b/include/sdsl/suffix_tree_helper.hpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include "sorted_multi_stack_support.hpp" +#include "sorted_stack_support.hpp" #include "iterators.hpp" namespace sdsl From e954e1e8c51bd839cd7f2deb2c4e7767725fe5bb Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Tue, 27 Aug 2013 18:20:57 +1000 Subject: [PATCH 16/29] Removed old include --- include/sdsl/rmq_succinct_sct.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/sdsl/rmq_succinct_sct.hpp b/include/sdsl/rmq_succinct_sct.hpp index c2c0128d3..e18efadde 100644 --- a/include/sdsl/rmq_succinct_sct.hpp +++ b/include/sdsl/rmq_succinct_sct.hpp @@ -23,7 +23,6 @@ #include "rmq_support.hpp" #include "int_vector.hpp" -#include "algorithms_for_compressed_suffix_trees.hpp" #include "bp_support_sada.hpp" #include "util.hpp" From d78acaefff64cd838806e1fe7ca5c8cf001fb4e7 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Tue, 27 Aug 2013 18:29:13 +1000 Subject: [PATCH 17/29] Fixed another old include --- include/sdsl/bp_support_algorithm.hpp | 2 +- include/sdsl/rmq_succinct_sada.hpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/sdsl/bp_support_algorithm.hpp b/include/sdsl/bp_support_algorithm.hpp index 7a8269b27..0a725765f 100644 --- a/include/sdsl/bp_support_algorithm.hpp +++ b/include/sdsl/bp_support_algorithm.hpp @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ . */ -/*! \file algorithms_for_balanced_parentheses.hpp +/*! \file bp_support_algorithm.hpp \brief bp_support_algorithm.hpp contains algorithms for balanced parentheses sequences. \author Simon Gog */ diff --git a/include/sdsl/rmq_succinct_sada.hpp b/include/sdsl/rmq_succinct_sada.hpp index 837df1343..d4435e2da 100644 --- a/include/sdsl/rmq_succinct_sada.hpp +++ b/include/sdsl/rmq_succinct_sada.hpp @@ -23,7 +23,6 @@ #include "rmq_support.hpp" #include "int_vector.hpp" -#include "algorithms_for_compressed_suffix_trees.hpp" #include "bp_support_sada.hpp" #include "rank_support.hpp" #include "select_support.hpp" From 9a7534e86366cc0d0d6fffe4cd88aeb188e628fd Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Tue, 27 Aug 2013 18:37:32 +1000 Subject: [PATCH 18/29] Removed old namespace qualifier --- include/sdsl/rmq_succinct_sct.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/sdsl/rmq_succinct_sct.hpp b/include/sdsl/rmq_succinct_sct.hpp index e18efadde..5c35826f0 100644 --- a/include/sdsl/rmq_succinct_sct.hpp +++ b/include/sdsl/rmq_succinct_sct.hpp @@ -81,10 +81,10 @@ class rmq_succinct_sct if (v != nullptr) { #ifdef RMQ_SCT_BUILD_BP_NOT_SUCCINCT // this method takes \f$n\log n\f$ bits extra space in the worst case - algorithm::construct_supercartesian_tree_bp(*v, m_sct_bp, t_min); + construct_supercartesian_tree_bp(*v, m_sct_bp, t_min); #else // this method takes only \f$n\f$ bits extra space in all cases - algorithm::construct_supercartesian_tree_bp_succinct(*v, m_sct_bp, t_min); + construct_supercartesian_tree_bp_succinct(*v, m_sct_bp, t_min); // TODO: constructor which uses int_vector_buffer #endif m_sct_bp_support = bp_support_type(&m_sct_bp); From 0fbcccbdf76414c644a3f8f61ad15703d60b36f5 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Tue, 27 Aug 2013 18:52:38 +1000 Subject: [PATCH 19/29] Fixed includes --- include/sdsl/rmq_succinct_sada.hpp | 1 + include/sdsl/rmq_succinct_sct.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/include/sdsl/rmq_succinct_sada.hpp b/include/sdsl/rmq_succinct_sada.hpp index d4435e2da..d2f63df4b 100644 --- a/include/sdsl/rmq_succinct_sada.hpp +++ b/include/sdsl/rmq_succinct_sada.hpp @@ -26,6 +26,7 @@ #include "bp_support_sada.hpp" #include "rank_support.hpp" #include "select_support.hpp" +#include "suffix_tree_helper.hpp" #include "util.hpp" #include // for pair #include diff --git a/include/sdsl/rmq_succinct_sct.hpp b/include/sdsl/rmq_succinct_sct.hpp index 5c35826f0..14405e5d7 100644 --- a/include/sdsl/rmq_succinct_sct.hpp +++ b/include/sdsl/rmq_succinct_sct.hpp @@ -24,6 +24,7 @@ #include "rmq_support.hpp" #include "int_vector.hpp" #include "bp_support_sada.hpp" +#include "suffix_tree_helper.hpp" #include "util.hpp" //! Namespace for the succinct data structure library. From ba6569e102255a2a21f14c81f353be468f72ebde Mon Sep 17 00:00:00 2001 From: Matthias Petri Date: Wed, 28 Aug 2013 11:00:29 +1000 Subject: [PATCH 20/29] timing functionality now c++11 chrono based time in the library is now measured using the c++11 chrono library which is more flexible than what was previously used (stop_watch) --- .../rrr_vector/src/rrr_time_and_space.cpp | 28 +++--- examples/hugepages.cpp | 12 ++- include/sdsl/memory_management.hpp | 28 +++--- include/sdsl/util.hpp | 63 ------------- lib/memory_management.cpp | 14 +-- lib/util.cpp | 93 ------------------- 6 files changed, 45 insertions(+), 193 deletions(-) diff --git a/benchmark/rrr_vector/src/rrr_time_and_space.cpp b/benchmark/rrr_vector/src/rrr_time_and_space.cpp index 56d27df32..a6d943186 100644 --- a/benchmark/rrr_vector/src/rrr_time_and_space.cpp +++ b/benchmark/rrr_vector/src/rrr_time_and_space.cpp @@ -11,6 +11,9 @@ using namespace sdsl; #define 31 BLOCK_SIZE #endif +using namespace std::chrono; +using timer = std::chrono::high_resolution_clock; + int main(int argc, char* argv[]) { if (argc < 3) { @@ -27,14 +30,13 @@ int main(int argc, char* argv[]) if (load_from_file(bv, argv[1])) { cout << "# plain_size = " << size_in_bytes(bv) << endl; uint16_t k = atoi(argv[2]); - util::stop_watch sw; - sw.start(); + auto start = timer::now(); rrr_vec_type rrr_vector(bv, k); util::clear(bv); rrr_select_type rrr_sel(&rrr_vector); rrr_rank_type rrr_rank(&rrr_vector); - sw.stop(); - cout << "# construct_time = " << sw.real_time() << endl; + auto stop = timer::now(); + cout << "# construct_time = " << duration_cast(stop-start).count() << endl; rrr_vec_type::size_type args = rrr_rank(rrr_vector.size()); cout << "# rrr_vector.size() = " << rrr_vector.size() << endl; cout << "# args = " << args << endl; @@ -47,20 +49,20 @@ int main(int argc, char* argv[]) const uint64_t reps = 10000000; uint64_t mask = 0; int_vector<64> rands = get_rnd_positions(20, mask, rrr_vector.size(), 17); - sw.start(); + start = timer::now(); test_random_access(rrr_vector, rands, mask, reps); - sw.stop(); - cout << "# access_time = " << (sw.real_time()/reps)*1000 << endl; + stop = timer::now(); + cout << "# access_time = " << duration_cast(stop-start).count()/(double)reps << endl; rands = get_rnd_positions(20, mask, rrr_vector.size()+1, 17); - sw.start(); + start = timer::now(); test_inv_random_access(rrr_rank, rands, mask, reps); - sw.stop(); - cout << "# rank_time = " << (sw.real_time()/reps)*1000 << endl; + stop = timer::now(); + cout << "# rank_time = " << duration_cast(stop-start).count()/(double)reps << endl; rands = get_rnd_positions(20, mask, args, 17); for (uint64_t i=0; i(log_time).count() + << m_pre_max_mem << ";" << "" << std::endl; if (msg.size() > 0) { // output if msg is set - - (*m_out) << log_time << ";" << m_sw.abs_user_time() << ";" - << m_sw.abs_sys_time() << ";" << m_total_memory << ";" + (*m_out) << duration_cast(log_time).count() << ";" + << m_total_memory << ";" << msg << std::endl; } m_pre_max_mem = m_total_memory; // reset memory - m_pre_rtime = log_time; + m_pre_rtime = cur; } else { m_pre_max_mem = std::max(m_pre_max_mem, m_total_memory); } diff --git a/include/sdsl/util.hpp b/include/sdsl/util.hpp index bf8e2222f..9deb8e99c 100644 --- a/include/sdsl/util.hpp +++ b/include/sdsl/util.hpp @@ -307,69 +307,6 @@ void init_support(S& s, const X* x) s.set_vector(x); // set the support object's pointer to x } -//! Get the current data and time as formated string. -std::string time_string(); - -//! A helper class to measure the time consumption of program pieces. -/*! stop_watch is a stopwatch based on the commands getrusage and - * gettimeofday. Where getrusage is used to determine the user and system time - * and gettimeofday to determine the elapsed real time. - */ -class stop_watch -{ - private: - rusage m_ruse1, m_ruse2; - timeval m_timeOfDay1, m_timeOfDay2; - static timeval m_first_t; - static rusage m_first_r; - public: - - //! Default constructor - stop_watch(); - - //! Start the stopwatch. - /*! \sa stop - */ - void start(); - - //! Stop the stopwatch. - /*! \sa start - */ - void stop(); - - //! Get the elapsed user time in milliseconds between start and stop. - /*! \sa start, stop, real_time, sys_time - */ - double user_time(); - - //! Get the elapsed system time in milliseconds between start and stop. - /*! \sa start, stop, real_time, user_time - */ - double sys_time(); - - //! Get the elapsed real time in milliseconds between start and stop. - /*! \sa start, stop, sys_time, user_time - */ - double real_time(); - - //! Get the elapsed user time in milliseconds since the first construction of a stop_watch in the current process. - /*! \sa user_time - */ - uint64_t abs_user_time(); - - //! Get the elapsed system time in milliseconds since the first construction of a stop_watch in the current process. - /*! \sa sys_time - */ - uint64_t abs_sys_time(); - - //! Get the elapsed real time in milliseconds since the first construction of a stop_watch in the current process. - /*! \sa real_time - */ - uint64_t abs_real_time(); - - uint64_t abs_page_faults(); -}; - class spin_lock { private: diff --git a/lib/memory_management.cpp b/lib/memory_management.cpp index 3cd9e7b7f..cb4c8c40a 100644 --- a/lib/memory_management.cpp +++ b/lib/memory_management.cpp @@ -2,6 +2,7 @@ #include // for malloc and free #include +#include #ifdef MAP_HUGETLB #define HUGE_LEN 1073741824 @@ -11,13 +12,14 @@ static int nifty_counter = 0; +using timer = std::chrono::high_resolution_clock; + std::map sdsl::mm::m_items; uint64_t sdsl::mm::m_total_memory; uint64_t* sdsl::mm::m_data; std::ostream* sdsl::mm::m_out; -sdsl::util::stop_watch sdsl::mm::m_sw; -uint64_t sdsl::mm::m_granularity; -uint64_t sdsl::mm::m_pre_rtime; +std::chrono::microseconds sdsl::mm::m_granularity; +timer::time_point sdsl::mm::m_pre_rtime; uint64_t sdsl::mm::m_pre_max_mem; sdsl::util::spin_lock sdsl::mm::m_spinlock; @@ -26,14 +28,14 @@ sdsl::mm_initializer::mm_initializer() { if (0 == nifty_counter++) { mm::m_total_memory = 0; - mm::m_granularity = 0; - mm::m_pre_rtime = 0; + mm::m_granularity = std::chrono::microseconds(500); mm::m_pre_max_mem = 0; // initialize static members object here // mm::m_items.clear(); mm::m_items = mm::tMVecItem(); mm::m_data = nullptr; mm::m_out = nullptr; + mm::m_pre_rtime = timer::now(); } } sdsl::mm_initializer::~mm_initializer() @@ -96,7 +98,7 @@ void mm::log_stream(std::ostream* out) m_out = out; } -void mm::log_granularity(uint64_t granularity) +void mm::log_granularity(std::chrono::microseconds granularity) { std::lock_guard lock(m_spinlock); m_granularity = granularity; diff --git a/lib/util.cpp b/lib/util.cpp index 3d5347eac..a291b7810 100644 --- a/lib/util.cpp +++ b/lib/util.cpp @@ -30,10 +30,6 @@ namespace util { uint64_t _id_helper::id = 0; -timeval stop_watch::m_first_t = {0,0}; -rusage stop_watch::m_first_r = {{0,0},{0,0}}; - - std::string basename(std::string file) { @@ -143,95 +139,6 @@ off_t file_size(const std::string& file) } } -stop_watch::stop_watch() : m_ruse1(), m_ruse2(), m_timeOfDay1(), m_timeOfDay2() -{ - timeval t; - t.tv_sec = 0; t.tv_usec = 0; - m_ruse1.ru_utime = t; m_ruse1.ru_stime = t; // init m_ruse1 - m_ruse2.ru_utime = t; m_ruse2.ru_stime = t; // init m_ruse2 - m_timeOfDay1 = t; m_timeOfDay2 = t; - if (m_first_t.tv_sec == 0) { - gettimeofday(&m_first_t, 0); - } - if (m_first_r.ru_utime.tv_sec == 0 and m_first_r.ru_utime.tv_usec ==0) { - getrusage(RUSAGE_SELF, &m_first_r); - } -} - -void stop_watch::start() -{ - gettimeofday(&m_timeOfDay1, 0); - getrusage(RUSAGE_SELF, &m_ruse1); -} - -void stop_watch::stop() -{ - getrusage(RUSAGE_SELF, &m_ruse2); - gettimeofday(&m_timeOfDay2, 0); -} - -double stop_watch::user_time() -{ - timeval t1, t2; - t1 = m_ruse1.ru_utime; - t2 = m_ruse2.ru_utime; - return ((double)(t2.tv_sec*1000000 + t2.tv_usec - (t1.tv_sec*1000000 + t1.tv_usec)))/1000.0; -} - -double stop_watch::sys_time() -{ - timeval t1, t2; - t1 = m_ruse1.ru_stime; - t2 = m_ruse2.ru_stime; - return ((double)(t2.tv_sec*1000000 + t2.tv_usec - (t1.tv_sec*1000000 + t1.tv_usec)))/1000.0; -} - -double stop_watch::real_time() -{ - double result = ((double)((m_timeOfDay2.tv_sec*1000000 + m_timeOfDay2.tv_usec)-(m_timeOfDay1.tv_sec*1000000 + m_timeOfDay1.tv_usec)))/1000.0; - return result; -} - -uint64_t stop_watch::abs_real_time() -{ - uint64_t result = (((m_timeOfDay2.tv_sec*1000000 + m_timeOfDay2.tv_usec - (m_first_t.tv_sec*1000000 + m_first_t.tv_usec))))/1000; - return result; -} - -uint64_t stop_watch::abs_user_time() -{ - timeval t1, t2; - t1 = m_first_r.ru_utime; - t2 = m_ruse2.ru_utime; - return (t2.tv_sec*1000000 + t2.tv_usec - (t1.tv_sec*1000000 + t1.tv_usec))/1000; -} - - -uint64_t stop_watch::abs_sys_time() -{ - timeval t1, t2; - t1 = m_first_r.ru_stime; - t2 = m_ruse2.ru_stime; - return (t2.tv_sec*1000000 + t2.tv_usec - (t1.tv_sec*1000000 + t1.tv_usec))/1000; -} - -uint64_t stop_watch::abs_page_faults() -{ - return m_ruse2.ru_majflt - m_first_r.ru_majflt; // does not work on my platform -} - -std::string time_string() -{ - time_t rawtime; - struct tm* timeinfo; - char buffer[1024]; - time(&rawtime); - timeinfo = localtime(&rawtime); - - strftime(buffer, 1024, "%Y-%m-%d-%H%M%S", timeinfo); - return buffer; -} - }// end namespace util }// end namespace sdsl From 2c95ff19cde72d60f297ab4eaae99ca1f8fd20b5 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Wed, 28 Aug 2013 11:43:27 +1000 Subject: [PATCH 21/29] Added "uninstall" script Here is the cmake documentation: http://www.cmake.org/Wiki/CMake_FAQ#Can_I_do_.22make_uninstall.22_with_CMake.3F --- CMakeLists.txt | 8 ++++ .../cmake_uninstall.cmake.in | 0 external/libdivsufsort-2.0.1/CMakeLists.txt | 8 ---- uninstall.sh | 37 +++++++++++++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) rename {external/libdivsufsort-2.0.1/CMakeModules => CMakeModules}/cmake_uninstall.cmake.in (100%) create mode 100755 uninstall.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f50f723e..a4705277f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,3 +41,11 @@ add_subdirectory(lib) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/Make.helper.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/Make.helper" @ONLY) + +## Add 'uninstall' target ## +CONFIGURE_FILE( + "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake" + IMMEDIATE @ONLY) +ADD_CUSTOM_TARGET(uninstall + "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake") diff --git a/external/libdivsufsort-2.0.1/CMakeModules/cmake_uninstall.cmake.in b/CMakeModules/cmake_uninstall.cmake.in similarity index 100% rename from external/libdivsufsort-2.0.1/CMakeModules/cmake_uninstall.cmake.in rename to CMakeModules/cmake_uninstall.cmake.in diff --git a/external/libdivsufsort-2.0.1/CMakeLists.txt b/external/libdivsufsort-2.0.1/CMakeLists.txt index 3afe0da83..b77a578c5 100644 --- a/external/libdivsufsort-2.0.1/CMakeLists.txt +++ b/external/libdivsufsort-2.0.1/CMakeLists.txt @@ -91,11 +91,3 @@ add_subdirectory(lib) if(BUILD_EXAMPLES) add_subdirectory(examples) endif(BUILD_EXAMPLES) - -## Add 'uninstall' target ## -CONFIGURE_FILE( - "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake" - IMMEDIATE @ONLY) -ADD_CUSTOM_TARGET(uninstall - "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake") diff --git a/uninstall.sh b/uninstall.sh new file mode 100755 index 000000000..8dfa21b30 --- /dev/null +++ b/uninstall.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# This script removes all installed sdsl files +# on a LINUX or Mac OS X system + +CUR_DIR=`pwd` +SDSL_INSTALL_PREFIX=${HOME} +if [ $# -ge 1 ]; then + SDSL_INSTALL_PREFIX=${1} +fi + +echo "Library files will be removed from" +echo "'${SDSL_INSTALL_PREFIX}/lib' and" +echo "'${SDSL_INSTALL_PREFIX}/include'" + + +cd "${CUR_DIR}" +OLD_DIR="$( cd "$( dirname "$0" )" && pwd )" # gets the directory where the script is located in +cd "${OLD_DIR}" +OLD_DIR=`pwd` + +cd build # change into the build directory +if [ $? != 0 ]; then + exit 1 +fi + +make uninstall + +if [ $? != 0 ]; then + exit 1 +fi + +./clean.sh # clean-up build directory +if [ $? != 0 ]; then + exit 1 +fi + +echo "Installed sdsl files were removed." From b4f8c06c834686414ec4d52530721d5c2c4f4459 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Wed, 28 Aug 2013 12:14:05 +1000 Subject: [PATCH 22/29] Don't install gtest. Gtest is only needed for the tests, so we don't install the library but refer to the local build directory. --- external/gtest-1.6.0/CMakeLists.txt | 4 ++-- external/gtest-1.6.0/include/gtest/CMakeLists.txt | 2 +- external/gtest-1.6.0/include/gtest/internal/CMakeLists.txt | 2 +- test/Makefile | 6 +++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/external/gtest-1.6.0/CMakeLists.txt b/external/gtest-1.6.0/CMakeLists.txt index 192d00a54..2159f2c5a 100644 --- a/external/gtest-1.6.0/CMakeLists.txt +++ b/external/gtest-1.6.0/CMakeLists.txt @@ -73,8 +73,8 @@ target_link_libraries(gtest_main gtest) add_subdirectory(include) -install(TARGETS gtest LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) -install(TARGETS gtest_main LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) +#install(TARGETS gtest LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) +#install(TARGETS gtest_main LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) ######################################################################## # diff --git a/external/gtest-1.6.0/include/gtest/CMakeLists.txt b/external/gtest-1.6.0/include/gtest/CMakeLists.txt index f7e403b6f..57af9f668 100644 --- a/external/gtest-1.6.0/include/gtest/CMakeLists.txt +++ b/external/gtest-1.6.0/include/gtest/CMakeLists.txt @@ -2,7 +2,7 @@ file(GLOB gtestFiles RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/*.h") foreach(gtestFile ${gtestFiles}) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/${gtestFile}" "${CMAKE_CURRENT_BINARY_DIR}/${gtestFile}" COPYONLY ) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${gtestFile}" DESTINATION include/gtest) +# install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${gtestFile}" DESTINATION include/gtest) endforeach(gtestFile) add_subdirectory(internal) diff --git a/external/gtest-1.6.0/include/gtest/internal/CMakeLists.txt b/external/gtest-1.6.0/include/gtest/internal/CMakeLists.txt index acfbf4860..95d885d2e 100644 --- a/external/gtest-1.6.0/include/gtest/internal/CMakeLists.txt +++ b/external/gtest-1.6.0/include/gtest/internal/CMakeLists.txt @@ -2,6 +2,6 @@ file(GLOB gtestFiles RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/*.h") foreach(gtestFile ${gtestFiles}) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/${gtestFile}" "${CMAKE_CURRENT_BINARY_DIR}/${gtestFile}" COPYONLY ) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${gtestFile}" DESTINATION include/gtest/internal) +# install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${gtestFile}" DESTINATION include/gtest/internal) endforeach(gtestFile) diff --git a/test/Makefile b/test/Makefile index bd0358a6d..612554569 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,5 +1,9 @@ include ../Make.helper -CXX_FLAGS=$(MY_CXX_FLAGS) -Wall -Werror -Wunused-parameter -g -O3 -I$(INC_DIR) -L$(LIB_DIR) +CXX_FLAGS=$(MY_CXX_FLAGS) -Wall -Werror -Wunused-parameter -g -O3 \ + -I$(INC_DIR) \ + -I../build/external/gtest-1.6.0/include \ + -L$(LIB_DIR) \ + -L../build/external/gtest-1.6.0 LIB_SDSL=$(LIB_DIR)/libsdsl.a CCLIB=-lsdsl -ldivsufsort -ldivsufsort64 -lgtest TMP_DIR=tmp From f237841b80e0769c053e17ddcc34062a386047b4 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Wed, 28 Aug 2013 12:55:01 +1000 Subject: [PATCH 23/29] Removed unused lgtest parameter. --- examples/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/Makefile b/examples/Makefile index 5d24f6709..226898711 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,6 +1,6 @@ include ../Make.helper CXX_FLAGS=$(MY_CXX_FLAGS) $(MY_CXX_OPT_FLAGS) -I$(INC_DIR) -L$(LIB_DIR) -CCLIB=-lsdsl -ldivsufsort -ldivsufsort64 -lgtest +CCLIB=-lsdsl -ldivsufsort -ldivsufsort64 SOURCES=$(wildcard *.cpp) EXECS=$(SOURCES:.cpp=.x) From e3b0df6bd37a435e59feeb044a50e189b7a104cc Mon Sep 17 00:00:00 2001 From: Matthias Petri Date: Wed, 28 Aug 2013 14:19:46 +1000 Subject: [PATCH 24/29] fixed tutorials to include the new time format --- tutorial/mm-log.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tutorial/mm-log.cpp b/tutorial/mm-log.cpp index 0b46f6e39..8019d509d 100644 --- a/tutorial/mm-log.cpp +++ b/tutorial/mm-log.cpp @@ -1,5 +1,6 @@ #include #include +#include using namespace sdsl; using namespace std; @@ -7,7 +8,7 @@ using namespace std; int main(int argc, char* argv[]) { // set granularity of logging to 20 milliseconds - mm::log_granularity(20); + mm::log_granularity(std::chrono::milliseconds(20)); // connect cout to the logging stream mm::log_stream(&cout); // generate CST From 912af6bdc600e44033bffc8dd88c39fb4bf2a701 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Wed, 28 Aug 2013 15:05:14 +1000 Subject: [PATCH 25/29] Addressed `unused variable` warning. --- benchmark/indexing_count/src/info.cpp | 1 - benchmark/indexing_count/src/run_queries_sdsl.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmark/indexing_count/src/info.cpp b/benchmark/indexing_count/src/info.cpp index b2c46ce97..824c7351b 100644 --- a/benchmark/indexing_count/src/info.cpp +++ b/benchmark/indexing_count/src/info.cpp @@ -11,7 +11,6 @@ using namespace std; int main(int argc, char* argv[]) { - char* filename; if (argc < 2) { cout << "./" << argv[0] << " index_file " << endl; return 1; diff --git a/benchmark/indexing_count/src/run_queries_sdsl.cpp b/benchmark/indexing_count/src/run_queries_sdsl.cpp index 70d0b970e..d3f881542 100644 --- a/benchmark/indexing_count/src/run_queries_sdsl.cpp +++ b/benchmark/indexing_count/src/run_queries_sdsl.cpp @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) querytype = *argv[2]; CSA_TYPE csa; - fprintf(stderr, "# File = %s\n",(string(argv[1]) + "." + string(SUF)).c_str()); + fprintf(stderr, "# File = %s\n",(string(filename) + "." + string(SUF)).c_str()); fprintf(stderr, "# program = %s\n",string(SUF).c_str()); Load_time = getTime(); load_from_file(csa, (string(argv[1]) + "." + string(SUF)).c_str()); From 540a0c29f16f1980fce894277c0e6209b138fd1a Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Wed, 28 Aug 2013 15:20:41 +1000 Subject: [PATCH 26/29] The rrr_vector benchmark is executed in [5..255] --- include/sdsl/rrr_vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sdsl/rrr_vector.hpp b/include/sdsl/rrr_vector.hpp index 2005554f5..09980a314 100644 --- a/include/sdsl/rrr_vector.hpp +++ b/include/sdsl/rrr_vector.hpp @@ -71,7 +71,7 @@ template > class rrr_vector { private: - static_assert(t_bs >= 15 and t_bs <= 256 , "rrr_vector: block size t_bs must be 15 <= t_bs <= 256."); + static_assert(t_bs >= 3 and t_bs <= 256 , "rrr_vector: block size t_bs must be 3 <= t_bs <= 256."); public: typedef bit_vector::size_type size_type; typedef bit_vector::value_type value_type; From cb0ab7fab8695fcf79e39f1998cbd7013f8f5f76 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Thu, 29 Aug 2013 09:29:11 +1000 Subject: [PATCH 27/29] Clean up code. get_rnd_positions -> util::rnd_positions --- benchmark/indexing_count/Makefile | 2 +- benchmark/indexing_extract/Makefile | 2 +- benchmark/indexing_extract/src/info.cpp | 3 - .../indexing_extract/src/run_queries_sdsl.cpp | 110 +--- benchmark/indexing_locate/Makefile | 2 +- benchmark/indexing_locate/src/info.cpp | 1 - .../indexing_locate/src/run_queries_sdsl.cpp | 107 +--- benchmark/rrr_vector/Makefile | 3 +- .../rrr_vector/src/rrr_time_and_space.cpp | 6 +- include/sdsl/test_index_performance.hpp | 537 ------------------ include/sdsl/util.hpp | 14 + lib/test_index_performance.cpp | 17 - test/CstByteTest.cpp | 2 +- test/CstIntTest.cpp | 2 +- 14 files changed, 28 insertions(+), 780 deletions(-) delete mode 100644 include/sdsl/test_index_performance.hpp delete mode 100644 lib/test_index_performance.cpp diff --git a/benchmark/indexing_count/Makefile b/benchmark/indexing_count/Makefile index fd17be500..0b7229c2b 100644 --- a/benchmark/indexing_count/Makefile +++ b/benchmark/indexing_count/Makefile @@ -27,7 +27,7 @@ TIME_FILES = $(foreach IDX_ID,$(IDX_IDS),\ $(foreach COMPILE_ID,$(COMPILE_IDS),results/$(TC_ID).$(IDX_ID).$(COMPILE_ID)))) COMP_FILES = $(addsuffix .z.info,$(TC_PATHS)) -all: $(BUILD_EXECS) $(QUERY_EXECS) pattern +all: $(BUILD_EXECS) $(QUERY_EXECS) $(INFO_EXECS) info: $(INFO_EXECS) $(INFO_FILES) cd ../../examples; make json2html.x diff --git a/benchmark/indexing_extract/Makefile b/benchmark/indexing_extract/Makefile index d325fecb1..c69c8adf9 100644 --- a/benchmark/indexing_extract/Makefile +++ b/benchmark/indexing_extract/Makefile @@ -31,7 +31,7 @@ TIME_FILES = $(foreach IDX_ID,$(IDX_IDS),\ $(foreach SAMPLE_ID,$(SAMPLE_IDS),results/$(TC_ID).$(IDX_ID).$(SAMPLE_ID)))) COMP_FILES = $(addsuffix .z.info,$(TC_PATHS)) -all: $(BUILD_EXECS) $(QUERY_EXECS) intervals +all: $(BUILD_EXECS) $(QUERY_EXECS) $(INFO_EXECS) info: $(INFO_EXECS) $(INFO_FILES) diff --git a/benchmark/indexing_extract/src/info.cpp b/benchmark/indexing_extract/src/info.cpp index b2c46ce97..f956f1c89 100644 --- a/benchmark/indexing_extract/src/info.cpp +++ b/benchmark/indexing_extract/src/info.cpp @@ -4,14 +4,11 @@ #include #include -#include - using namespace sdsl; using namespace std; int main(int argc, char* argv[]) { - char* filename; if (argc < 2) { cout << "./" << argv[0] << " index_file " << endl; return 1; diff --git a/benchmark/indexing_extract/src/run_queries_sdsl.cpp b/benchmark/indexing_extract/src/run_queries_sdsl.cpp index 7a142ce36..5513f7499 100644 --- a/benchmark/indexing_extract/src/run_queries_sdsl.cpp +++ b/benchmark/indexing_extract/src/run_queries_sdsl.cpp @@ -20,9 +20,6 @@ #define DISPLAY ('D') #define VERBOSE ('V') -/* macro to detect and to notify errors */ -#define IFERROR(error) {{if (error) { fprintf(stderr, "%s\n", error_index(error)); exit(1); }}} - using namespace sdsl; using namespace std; @@ -36,7 +33,6 @@ void pfile_info(ulong* length, ulong* numpatt); double getTime(void); void usage(char* progname); -static void* Index; /* opaque data type */ static int Verbose = 0; static ulong Index_size, Text_length; static double Load_time; @@ -46,7 +42,6 @@ static double Load_time; */ int main(int argc, char* argv[]) { - int error = 0; char* filename; char querytype; @@ -59,20 +54,15 @@ int main(int argc, char* argv[]) querytype = *argv[2]; CSA_TYPE csa; - fprintf(stderr, "Load from file %s\n",(string(argv[1]) + "." + string(SDSL_XSTR(SUF))).c_str()); + fprintf(stderr, "Load from file %s\n",(string(filename) + "." + string(SDSL_XSTR(SUF))).c_str()); Load_time = getTime(); load_from_file(csa, (string(argv[1]) + "." + string(SDSL_XSTR(SUF))).c_str()); - IFERROR(error); Load_time = getTime() - Load_time; fprintf(stderr, "# Load_index_time_in_sec = %.2f\n", Load_time); std::cerr << "# text_size = " << csa.size()-1 << std::endl; Index_size = size_in_bytes(csa); - IFERROR(error); Text_length = csa.size()-1; // -1 since we added a sentinel character -// error = get_length(Index, &Text_length); - IFERROR(error); - /* Index_size /=1024; */ fprintf(stderr, "# Index_size_in_bytes = %lu\n", Index_size); #ifdef USE_HP bool mapped = mm::map_hp(); @@ -105,20 +95,7 @@ int main(int argc, char* argv[]) do_extract(csa); break; - /* case DISPLAY: - if (argc < 4) { - usage(argv[0]); - exit (1); - } - if (argc > 4) - if (*argv[4] == VERBOSE){ - Verbose = 1; - fprintf(stdout,"%c", DISPLAY); - - } - do_display((ulong) atol(argv[3])); - break; - */ default: + default: fprintf(stderr, "Unknow option: main ru\n"); exit(1); } @@ -127,10 +104,6 @@ int main(int argc, char* argv[]) mm::unmap_hp(); } #endif - -// error = free_index(Index); - IFERROR(error); - return 0; } @@ -138,7 +111,6 @@ int main(int argc, char* argv[]) void do_count(const CSA_TYPE& csa) { - int error = 0; ulong numocc, length, tot_numocc = 0, numpatt, res_patt; double time, tot_time = 0; uchar* pattern; @@ -163,8 +135,6 @@ do_count(const CSA_TYPE& csa) /* Count */ time = getTime(); numocc = count(csa, pattern, pattern+length); -// error = count (Index, pattern, length, &numocc); - IFERROR(error); if (Verbose) { fwrite(&length, sizeof(length), 1, stdout); @@ -194,7 +164,6 @@ do_count(const CSA_TYPE& csa) void do_locate(const CSA_TYPE& csa) { - int error = 0; ulong numocc, length; //, *occ, int_vector<32> occ; ulong tot_numocc = 0, numpatt = 0, processed_pat = 0; @@ -219,7 +188,6 @@ do_locate(const CSA_TYPE& csa) // Locate time = getTime(); numocc = locate(csa, pattern, pattern+length, occ); - IFERROR(error); tot_time += (getTime() - time); ++processed_pat; @@ -242,80 +210,6 @@ do_locate(const CSA_TYPE& csa) free(pattern); } - -/* -void do_display(ulong numc) { - - int error = 0; - ulong numocc, length, i, *snippet_len, tot_numcharext = 0, numpatt; - double time, tot_time = 0; - uchar *pattern, *snippet_text; - - pfile_info (&length, &numpatt); - - pattern = (uchar *) malloc (sizeof (uchar) * (length)); - if (pattern == NULL) - { - fprintf (stderr, "Error: cannot allocate\n"); - exit (1); - } - - fprintf(stderr, "Snippet length %lu\n", numc); - - while (numpatt) - { - - if (fread (pattern, sizeof (*pattern), length, stdin) != length) - { - fprintf (stderr, "Error: cannot read patterns file\n"); - perror ("run_queries"); - exit (1); - } - - // Display - time = getTime (); - error = display (Index, pattern, length, numc, &numocc, - &snippet_text, &snippet_len); - IFERROR (error); - tot_time += (getTime () - time); - - if (Verbose) { - ulong j, len = length + 2*numc; - char blank = '\0'; - fwrite(&length, sizeof(length), 1, stdout); - fwrite(pattern, sizeof(*pattern), length, stdout); - fwrite(&numocc, sizeof(numocc), 1, stdout); - fwrite(&len, sizeof(len), 1, stdout); - - for (i = 0; i < numocc; i++){ - fwrite(snippet_text+len*i,sizeof(uchar),snippet_len[i],stdout); - for(j=snippet_len[i];j 4) - if (*argv[4] == VERBOSE){ - Verbose = 1; - fprintf(stdout,"%c", DISPLAY); - - } - do_display((ulong) atol(argv[3])); - break; - */ default: + default: fprintf(stderr, "Unknow option: main ru\n"); exit(1); } @@ -128,9 +106,6 @@ int main(int argc, char* argv[]) } #endif -// error = free_index(Index); - IFERROR(error); - return 0; } @@ -138,7 +113,6 @@ int main(int argc, char* argv[]) void do_count(const CSA_TYPE& csa) { - int error = 0; ulong numocc, length, tot_numocc = 0, numpatt, res_patt; double time, tot_time = 0; uchar* pattern; @@ -163,7 +137,6 @@ do_count(const CSA_TYPE& csa) /* Count */ time = getTime(); numocc = count(csa, pattern, pattern+length); - IFERROR(error); if (Verbose) { fwrite(&length, sizeof(length), 1, stdout); @@ -193,7 +166,6 @@ do_count(const CSA_TYPE& csa) void do_locate(const CSA_TYPE& csa) { - int error = 0; ulong numocc, length; //, *occ, int_vector<32> occ; ulong tot_numocc = 0, numpatt = 0, processed_pat = 0; @@ -218,7 +190,6 @@ do_locate(const CSA_TYPE& csa) // Locate time = getTime(); numocc = locate(csa, pattern, pattern+length, occ); - IFERROR(error); tot_time += (getTime() - time); ++processed_pat; @@ -242,79 +213,6 @@ do_locate(const CSA_TYPE& csa) } -/* -void do_display(ulong numc) { - - int error = 0; - ulong numocc, length, i, *snippet_len, tot_numcharext = 0, numpatt; - double time, tot_time = 0; - uchar *pattern, *snippet_text; - - pfile_info (&length, &numpatt); - - pattern = (uchar *) malloc (sizeof (uchar) * (length)); - if (pattern == NULL) - { - fprintf (stderr, "Error: cannot allocate\n"); - exit (1); - } - - fprintf(stderr, "Snippet length %lu\n", numc); - - while (numpatt) - { - - if (fread (pattern, sizeof (*pattern), length, stdin) != length) - { - fprintf (stderr, "Error: cannot read patterns file\n"); - perror ("run_queries"); - exit (1); - } - - // Display - time = getTime (); - error = display (Index, pattern, length, numc, &numocc, - &snippet_text, &snippet_len); - IFERROR (error); - tot_time += (getTime () - time); - - if (Verbose) { - ulong j, len = length + 2*numc; - char blank = '\0'; - fwrite(&length, sizeof(length), 1, stdout); - fwrite(pattern, sizeof(*pattern), length, stdout); - fwrite(&numocc, sizeof(numocc), 1, stdout); - fwrite(&len, sizeof(len), 1, stdout); - - for (i = 0; i < numocc; i++){ - fwrite(snippet_text+len*i,sizeof(uchar),snippet_len[i],stdout); - for(j=snippet_len[i];j $(RES_FILE) diff --git a/benchmark/rrr_vector/src/rrr_time_and_space.cpp b/benchmark/rrr_vector/src/rrr_time_and_space.cpp index a6d943186..28fddfb9c 100644 --- a/benchmark/rrr_vector/src/rrr_time_and_space.cpp +++ b/benchmark/rrr_vector/src/rrr_time_and_space.cpp @@ -48,17 +48,17 @@ int main(int argc, char* argv[]) cout << "# btnr_size = " << size_in_bytes(rrr_vector.btnr) << endl; const uint64_t reps = 10000000; uint64_t mask = 0; - int_vector<64> rands = get_rnd_positions(20, mask, rrr_vector.size(), 17); + int_vector<64> rands = util::rnd_positions>(20, mask, rrr_vector.size(), 17); start = timer::now(); test_random_access(rrr_vector, rands, mask, reps); stop = timer::now(); cout << "# access_time = " << duration_cast(stop-start).count()/(double)reps << endl; - rands = get_rnd_positions(20, mask, rrr_vector.size()+1, 17); + rands = util::rnd_positions>(20, mask, rrr_vector.size()+1, 17); start = timer::now(); test_inv_random_access(rrr_rank, rands, mask, reps); stop = timer::now(); cout << "# rank_time = " << duration_cast(stop-start).count()/(double)reps << endl; - rands = get_rnd_positions(20, mask, args, 17); + rands = util::rnd_positions>(20, mask, args, 17); for (uint64_t i=0; i // for rand -#include // for swap -#include // for std::vector -#include - -namespace sdsl -{ - -// TODO: combine all random_access_tests and random_inverse_access tests -// to two generic test cases -// split pre computation and measurement - -//! Create 2^{log_s} random integers mod m with seed x -/* - */ -int_vector<64> get_rnd_positions(uint8_t log_s, uint64_t& mask, uint64_t m=0, uint64_t x=17); - -//! Performs random accesses on a vector and returns the sum of the accessed elements -/*! \param v The container. - * \param rands Vector of locations which should be accessed. Length is a power of 2. - * Can be generated by method: get_rnd_positions(log s, mask, v.size()) - * \param mask Mask which is used to perform the modulo s operation. See `rands`. - * \param times Number of iterations. If times > rands.size() array rands will be - * run through several times. - */ -template -uint64_t test_random_access(const t_vec& v, const int_vector<64>& rands, uint64_t mask, uint64_t times=100000000) -{ - uint64_t cnt=0; - for (uint64_t i=0; i -uint64_t test_inv_random_access(const t_vec& v, const int_vector<64>& rands, uint64_t mask, uint64_t times=100000000) -{ - uint64_t cnt=0; - for (uint64_t i=0; i -uint64_t test_int_vector_random_write(t_vec& v, const int_vector<64>& rands, uint64_t mask, uint64_t times=100000000) -{ - uint64_t cnt=0; - for (uint64_t i=0; i -uint64_t test_int_vector_sequential_write(t_vec& v, uint64_t times=100000000) -{ - const uint64_t mask = (1ULL << bits::hi(v.size()))-1; - uint64_t cnt=0; - for (uint64_t i=0; i -uint64_t test_cst_dfs_iterator(t_cst& cst, uint64_t times=100000) -{ - if (times > cst.nodes()) - times = cst.nodes(); - typename t_cst::const_iterator it = cst.begin(); - const typename t_cst::const_iterator end = cst.begin(); - for (uint64_t i=0; i -uint64_t test_cst_dfs_iterator_and_depth(t_cst& cst, uint64_t times=1000000) -{ - uint64_t cnt=0; - typename t_cst::const_iterator it = cst.begin(); - const typename t_cst::const_iterator end = cst.end(); - for (uint64_t i=0; i -uint64_t test_cst_dfs_iterator_and_id(t_cst& cst, uint64_t times=1000000) -{ - uint64_t cnt=0; - typename t_cst::const_iterator it = cst.begin(); - const typename t_cst::const_iterator end = cst.end(); - for (uint64_t i=0; i -void generate_nodes_from_random_leaves(const t_cst& cst, uint64_t times, std::vector& nodes, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - std::mt19937_64 rng(x); - uint64_t n = cst.csa.size(); - // generate nodes - for (uint64_t i=0; i -void test_cst_child_operation(const t_cst& cst, uint64_t times=5000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - - std::vector nodes; - generate_nodes_from_random_leaves(cst, times, nodes, x); - // choose some chars for the text - unsigned char* letters = new unsigned char[nodes.size()+1]; - for (uint64_t i=0; i -void test_cst_parent_operation(const t_cst& cst, uint64_t times=100000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - - std::mt19937_64 rng(x); - uint64_t n = cst.csa.size(); - // take \f$ time \f$ random leaves - std::vector rand_leaf(times); - for (uint64_t i=0; i -void test_cst_1th_child_operation(const t_cst& cst, uint64_t times=1000000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - - std::vector nodes; - generate_nodes_from_random_leaves(cst, times, nodes, x); - - node_type c; // for 1th_child node - uint64_t cnt=0; - util::write_R_output("cst","1th_child","begin",nodes.size(),cnt); - for (uint64_t i=0; i -void test_cst_sibling_operation(const t_cst& cst, uint64_t times=100000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - - std::vector nodes; - generate_nodes_from_random_leaves(cst, times, nodes, x); - for (uint64_t i=0; i -void test_cst_id_operation(const t_cst& cst, uint64_t times=100000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - std::vector nodes; - generate_nodes_from_random_leaves(cst, times, nodes, x); - - uint64_t cnt = 0; - util::write_R_output("cst","id","begin",nodes.size(),cnt); - for (uint64_t i=0; i < nodes.size(); ++i) { - cnt += cst.id(nodes[i]); - } - util::write_R_output("cst","id","end",nodes.size(),cnt); -} - -//! Test depth operations for leaves and inner nodes -template -void test_cst_depth_operation(const t_cst& cst, uint64_t times=100000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - std::vector nodes; - generate_nodes_from_random_leaves(cst, times, nodes, x); - - uint64_t cnt = 0; - util::write_R_output("cst","depth","begin",nodes.size(),cnt); - for (uint64_t i=0; i < nodes.size(); ++i) { - cnt += cst.depth(nodes[i]); - } - util::write_R_output("cst","depth","end",nodes.size(),cnt); -} - - -//! Test depth operations for inner nodes -template -void test_cst_depth_operation_for_inner_nodes(const t_cst& cst, uint64_t times=100000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - std::vector nodes; - { - std::vector nodes2; - generate_nodes_from_random_leaves(cst, times, nodes2, x); - for (uint64_t i=0; i -void test_cst_lca_operation(const t_cst& cst, uint64_t times=1000000, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - // generate \f$2^{19}\f$ random pairs of leafs - uint64_t n = cst.csa.size(); - uint64_t mask = (1<<20)-1; - std::vector nodes(1<<20); - std::mt19937_64 rng(x); - for (uint64_t i=0; i < nodes.size(); ++i) { - nodes[i] = cst.select_leaf(rng()%n + 1); - } - - uint64_t cnt=0; - util::write_R_output("cst","lca","begin",times,cnt); - for (uint64_t i=0; i -void test_cst_sl_operation(const t_cst& cst, uint64_t times=500, uint64_t x=17) -{ - typedef typename t_cst::node_type node_type; - uint64_t n = cst.csa.size(); - if (times > n) - times = n; - - std::vector nodes(times); - std::mt19937_64 rng(x); - // take \f$ times \f$ random leaves and calculate each parent - for (uint64_t i=0; i -void test_cst_matching_statistics(const t_cst& cst, unsigned char* S2, uint64_t n2) -{ - typedef typename t_cst::node_type node_type; - - uint64_t cnt = 0; - uint64_t q = 0; // current match length - uint64_t p2 = n2-1; // position in S2 - uint64_t i = 0, j = cst.csa.size()-1; // \f$ \epsilon \f$ matches all suffixes of S1 - while (p2+1 > 0) { - uint64_t lb, rb; - // perform backward search on interval \f$ [i,j] \f$ - uint64_t size = backward_search(cst.csa, i, j, S2[p2], lb, rb); - if (size > 0) { - q = q + 1; - i = lb; j = rb; - p2 = p2 - 1; - } else if (i==0 and j == cst.csa.size()) { - p2 = p2 -1; - } else { - // map interval to a node of the cst and calculate parent - node_type p = cst.parent(cst.node(i, j)); - q = cst.depth(p); // update match length - i = cst.lb(p); // update left bound - j = cst.rb(p); // update right bound - } - cnt += q; - } -} - -// test the speed of find_close at random opening parentheses -template -void test_bps_find_close_and_enclose(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17) -{ - - uint64_t mask; -// uint64_t n = bps.size(); - int_vector<64> rands = get_rnd_positions(20, mask, bps.size()); - for (uint64_t i=0; i -void test_bps_find_open(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17) -{ - uint64_t mask; - uint64_t n = bps.size(); - int_vector<64> rands = get_rnd_positions(20, mask, n); - for (uint64_t i=0; i -void test_bps_double_enclose(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17) -{ - uint64_t mask; - uint64_t n = bps.size(); - int_vector<64> rands = get_rnd_positions(20, mask, bps.size()); - for (uint64_t i=0; i rands.size()) - pos = 0; - rands[2*i] = pos; - } - { - uint64_t pos = (rands[2*i]+1)%n; - while (!b[pos] and pos != rands[2*i]) // go forward until we get the next opening one - pos = (pos+1) % n; - rands[2*i+1] = pos; - } - } - uint64_t cnt = 0; - util::write_R_output("bps","double_enclose","begin",times, cnt); - for (uint64_t i=0; i +t_int_vec rnd_positions(uint8_t log_s, uint64_t& mask, uint64_t mod=0, uint64_t seed=17) +{ + mask = (1< 0) { + util::mod(rands, mod); + } + return rands; +} } // end namespace util diff --git a/lib/test_index_performance.cpp b/lib/test_index_performance.cpp deleted file mode 100644 index ba7fb1fe9..000000000 --- a/lib/test_index_performance.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "sdsl/test_index_performance.hpp" - -namespace sdsl -{ - -int_vector<64> get_rnd_positions(uint8_t log_s, uint64_t& mask, uint64_t mod, uint64_t seed) -{ - mask = (1< rands(1<0) { - util::mod(rands, mod); - } - return rands; -} - -}// end namespace diff --git a/test/CstByteTest.cpp b/test/CstByteTest.cpp index b7631542a..b93ea0f1f 100644 --- a/test/CstByteTest.cpp +++ b/test/CstByteTest.cpp @@ -183,7 +183,7 @@ TYPED_TEST(CstByteTest, LcaMethod) uint8_t log_m = 14; // create m/2 pairs of positions in [0..cst.csa.size()-1] typedef typename TypeParam::node_type node_type; - int_vector<64> rnd_pos = get_rnd_positions(log_m, mask, cst.csa.size()); + int_vector<64> rnd_pos = util::rnd_positions>(log_m, mask, cst.csa.size()); // test for random sampled nodes for (size_type i=0; i < rnd_pos.size()/2; ++i) { // get two children diff --git a/test/CstIntTest.cpp b/test/CstIntTest.cpp index ee85e9c7f..42d0b6afc 100644 --- a/test/CstIntTest.cpp +++ b/test/CstIntTest.cpp @@ -199,7 +199,7 @@ TYPED_TEST(CstIntTest, LcaMethod) uint8_t log_m = 14; // create m/2 pairs of positions in [0..cst.csa.size()-1] typedef typename TypeParam::node_type node_type; - int_vector<64> rnd_pos = get_rnd_positions(log_m, mask, cst.csa.size()); + int_vector<64> rnd_pos = util::rnd_positions>(log_m, mask, cst.csa.size()); // test for random sampled nodes for (size_type i=0; i < rnd_pos.size()/2; ++i) { // get two children From 1b201a7adde5997acfb6f5c2d7b6979d25876c4c Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Thu, 29 Aug 2013 09:56:03 +1000 Subject: [PATCH 28/29] Added benchmarks to build-test. --- benchmark/indexing_count/Makefile | 6 +++++- benchmark/indexing_extract/Makefile | 6 +++++- benchmark/indexing_locate/Makefile | 6 +++++- benchmark/rrr_vector/Makefile | 4 ++++ test/Makefile | 8 ++++++++ 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/benchmark/indexing_count/Makefile b/benchmark/indexing_count/Makefile index 0b7229c2b..734105835 100644 --- a/benchmark/indexing_count/Makefile +++ b/benchmark/indexing_count/Makefile @@ -116,9 +116,13 @@ $(BIN_DIR)/info_%: $(SRC_DIR)/info.cpp index.config include ../Make.download +clean-build: + @echo "Remove executables" + @rm -f $(QUERY_EXECS) $(BUILD_EXECS) $(INFO_EXECS) + clean: @echo "Remove executables and indexes" - @rm -f $(QUERY_EXECS) $(LOCATE_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \ + @rm -f $(QUERY_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \ $(INFO_FILES) $(INDEXES) $(BIN_DIR)/genpatterns cleanresults: diff --git a/benchmark/indexing_extract/Makefile b/benchmark/indexing_extract/Makefile index c69c8adf9..e732f103a 100644 --- a/benchmark/indexing_extract/Makefile +++ b/benchmark/indexing_extract/Makefile @@ -139,9 +139,13 @@ $(BIN_DIR)/info_%: $(SRC_DIR)/info.cpp index.config include ../Make.download +clean-build: + @echo "Remove executables" + @rm -f $(QUERY_EXECS) $(BUILD_EXECS) $(INFO_EXECS) + clean: @echo "Remove executables" - @rm -f $(QUERY_EXECS) $(LOCATE_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \ + @rm -f $(QUERY_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \ $(BIN_DIR)/genintervals cleanresults: diff --git a/benchmark/indexing_locate/Makefile b/benchmark/indexing_locate/Makefile index 61ed16873..2a311c5d3 100644 --- a/benchmark/indexing_locate/Makefile +++ b/benchmark/indexing_locate/Makefile @@ -139,9 +139,13 @@ $(BIN_DIR)/info_%: $(SRC_DIR)/info.cpp index.config include ../Make.download +clean-build: + @echo "Remove executables" + @rm -f $(QUERY_EXECS) $(BUILD_EXECS) $(INFO_EXECS) + clean: @echo "Remove executables" - @rm -f $(QUERY_EXECS) $(LOCATE_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \ + @rm -f $(QUERY_EXECS) $(BUILD_EXECS) $(INFO_EXECS) \ $(BIN_DIR)/pattern_random cleanresults: diff --git a/benchmark/rrr_vector/Makefile b/benchmark/rrr_vector/Makefile index 236e155c3..40a47a31a 100644 --- a/benchmark/rrr_vector/Makefile +++ b/benchmark/rrr_vector/Makefile @@ -67,6 +67,10 @@ results/%: include ../Make.download +clean-build: + @echo "Remove executables" + rm -f $(RRR_EXECS) + clean: rm -f $(RRR_EXECS) bin/generate_rnd_bitvector diff --git a/test/Makefile b/test/Makefile index 612554569..8f45b2bb2 100644 --- a/test/Makefile +++ b/test/Makefile @@ -75,10 +75,18 @@ test: bits-test \ build-test: $(EXECS) cd ../tutorial; make build-test cd ../examples; make build-test + cd ../benchmark/indexing_count; make + cd ../benchmark/indexing_locate; make + cd ../benchmark/indexing_extract; make + cd ../benchmark/rrr_vector; make build-test-clean: clean cd ../tutorial; make clean cd ../examples; make clean + cd ../benchmark/indexing_count; make clean-build + cd ../benchmark/indexing_locate; make clean-build + cd ../benchmark/indexing_extract; make clean-build + cd ../benchmark/rrr_vector; make clean-build generators: BitVectorGenerator.x IntVectorGenerator.x From 59a441d453eac9ccdb46c4b2e0ca20b23eccb028 Mon Sep 17 00:00:00 2001 From: Simon Gog Date: Thu, 29 Aug 2013 10:02:52 +1000 Subject: [PATCH 29/29] Removed old include. --- benchmark/rrr_vector/src/rrr_time_and_space.cpp | 1 - test/CstByteTest.cpp | 1 - test/CstIntTest.cpp | 1 - 3 files changed, 3 deletions(-) diff --git a/benchmark/rrr_vector/src/rrr_time_and_space.cpp b/benchmark/rrr_vector/src/rrr_time_and_space.cpp index 28fddfb9c..47bd38cfb 100644 --- a/benchmark/rrr_vector/src/rrr_time_and_space.cpp +++ b/benchmark/rrr_vector/src/rrr_time_and_space.cpp @@ -2,7 +2,6 @@ #include #include #include -#include using namespace std; using namespace sdsl; diff --git a/test/CstByteTest.cpp b/test/CstByteTest.cpp index b93ea0f1f..c61f0e448 100644 --- a/test/CstByteTest.cpp +++ b/test/CstByteTest.cpp @@ -1,6 +1,5 @@ #include "CstHelper.hpp" #include "sdsl/suffix_trees.hpp" -#include "sdsl/test_index_performance.hpp" #include "gtest/gtest.h" #include #include diff --git a/test/CstIntTest.cpp b/test/CstIntTest.cpp index 42d0b6afc..b4ccabe19 100644 --- a/test/CstIntTest.cpp +++ b/test/CstIntTest.cpp @@ -1,7 +1,6 @@ #include "sdsl/suffix_trees.hpp" #include "sdsl/lcp.hpp" #include "CstHelper.hpp" -#include "sdsl/test_index_performance.hpp" #include "gtest/gtest.h" #include #include // for rand()