diff --git a/include/color_classes/differential.hpp b/include/color_classes/differential.hpp index a4b005b..d51f26f 100644 --- a/include/color_classes/differential.hpp +++ b/include/color_classes/differential.hpp @@ -301,12 +301,10 @@ struct differential { num_metadata += it.position() - prev_position; prev_position = it.position(); - uint64_t curr_differential_list_size = 0; for (uint64_t i = 0; i < size; i++) { util::read_delta(it); uint64_t delta_size = it.position() - prev_position; num_differential_lists += delta_size; - curr_differential_list_size += delta_size; prev_position = it.position(); } @@ -341,6 +339,47 @@ struct differential { std::cout << std::endl; } + void dump(std::ofstream& os) const { + /* header info */ + const uint64_t num_differential_color_sets = num_partitions(); + const uint64_t num_color_lists = num_color_sets(); + os << "num_differential_color_sets " << num_differential_color_sets << '\n'; + os << "num_color_sets " << num_color_lists << '\n'; + + uint64_t last_representative = m_representative_offsets.access(num_partitions()); + + /* dump meta-color lists */ + uint64_t old_representative_begin = num_partitions(); + for (uint64_t representative_id = 0, color_set_id = 0; color_set_id != num_color_lists; ++color_set_id) { + + const uint64_t representative_begin = m_representative_offsets.access(m_clusters.rank(color_set_id)); + if (representative_begin != old_representative_begin){ + const std::vector representative_set = read_representative_set(representative_begin); + const uint64_t list_size = representative_set.size(); + os << "representative_color_list_" << representative_id++ << ' ' << list_size << ' '; + + for (uint64_t i = 0; i != list_size; ++i) { + os << representative_set[i]; + if (i != list_size - 1) os << ' '; + } + os << '\n'; + + old_representative_begin = representative_begin; + } + + const uint64_t differential_begin = m_list_offsets.access(color_set_id) + last_representative; + const std::vector differential_set = read_differential_set(differential_begin); + const uint64_t list_size = differential_set.size(); + os << "differential_color_list_" << color_set_id << ' ' << list_size << ' '; + + for (uint64_t i = 0; i != list_size; ++i) { + os << differential_set[i]; + if (i != list_size - 1) os << ' '; + } + os << '\n'; + } + } + template void visit(Visitor& visitor) { visitor.visit(m_num_docs); @@ -357,6 +396,35 @@ struct differential { std::vector m_colors; ranked_bit_vector m_clusters; + + std::vector read_representative_set(uint64_t begin) const { + auto it = bit_vector_iterator(m_colors.data(), m_colors.size(), begin); + uint64_t size = util::read_delta(it); + + if (size == 0) return {}; + + std::vector set(size); + set[0] = util::read_delta(it); + for(uint64_t i = 1; i != size; ++i){ + set[i] = set[i-1] + util::read_delta(it) + 1; + } + return set; + } + + std::vector read_differential_set(uint64_t begin) const { + auto it = bit_vector_iterator(m_colors.data(), m_colors.size(), begin); + uint64_t size = util::read_delta(it); + util::read_delta(it); // skip color_set size + + if (size == 0) return {}; + + std::vector set(size); + set[0] = util::read_delta(it); + for(uint64_t i = 1; i != size; ++i){ + set[i] = set[i-1] + util::read_delta(it) + 1; + } + return set; + } }; } // namespace fulgor diff --git a/tools/util.cpp b/tools/util.cpp index 29b7521..506f3ba 100644 --- a/tools/util.cpp +++ b/tools/util.cpp @@ -85,6 +85,12 @@ int dump_colors(int argc, char** argv) { if (sshash::util::ends_with(index_filename, constants::meta_colored_fulgor_filename_extension)) { dump_colors(index_filename, output_filename); + } else if (sshash::util::ends_with(index_filename, + constants::meta_diff_colored_fulgor_filename_extension)) { + dump_colors(index_filename, output_filename); + } else if (sshash::util::ends_with(index_filename, + constants::diff_colored_fulgor_filename_extension)) { + dump_colors(index_filename, output_filename); } else if (sshash::util::ends_with(index_filename, constants::fulgor_filename_extension)) { dump_colors(index_filename, output_filename); } else {