Skip to content

Commit

Permalink
fix read seedmers iorder
Browse files Browse the repository at this point in the history
  • Loading branch information
AlanZhangUCSC committed Nov 7, 2024
1 parent 1066e14 commit 9827b11
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 19 deletions.
7 changes: 6 additions & 1 deletion src/mgsr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,11 @@ namespace mgsr {
}
}

int32_t extend(int64_t& curEnd, mgsr::Read& curRead, int rev, std::map<int32_t, mgsr::positionInfo>& positionMap, std::unordered_map<size_t, std::set<std::map<int32_t, positionInfo>::iterator, IteratorComparator>>& hashToPositionsMap, int32_t qidx, std::map<int32_t, mgsr::positionInfo>::const_iterator refPositionIt, int32_t c) {
int32_t extend(
int64_t& curEnd, mgsr::Read& curRead, int rev, std::map<int32_t, mgsr::positionInfo>& positionMap,
std::unordered_map<size_t, std::set<std::map<int32_t, positionInfo>::iterator, IteratorComparator>>& hashToPositionsMap,
int32_t qidx, std::map<int32_t, mgsr::positionInfo>::const_iterator refPositionIt, int32_t c
) {
if (qidx == curRead.seedmersList.size() - 1) return c;
const auto& [nhash, nqbeg, nqend, nqrev, nqidx] = curRead.seedmersList[qidx+1];

Expand Down Expand Up @@ -516,6 +520,7 @@ namespace mgsr {
const auto& qend1 = last1.endPos;
const auto& qbeg2 = first2.begPos;
const auto& qend2 = last2.endPos;

auto rbeg1 = degapGlobal(rglobalbeg1, degapCoordIndex);
auto rend1 = degapGlobal(rglobalend1, degapCoordIndex);
auto rbeg2 = degapGlobal(rglobalbeg2, degapCoordIndex);
Expand Down
59 changes: 41 additions & 18 deletions src/pmi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,8 @@ void buildOrPlace(Step method, mutableTreeData& data, std::vector<std::pair<std:
std::vector<std::pair<bool, std::pair<int64_t, int64_t>>> gapRunBacktracks;
std::vector<std::pair<bool, std::pair<int64_t, int64_t>>> gapRunBlocksBacktracks;
std::vector<std::pair<bool, int64_t>> inverseBlockIdsBacktrack;
std::map<int64_t, int64_t> coordIndex;
std::map<int64_t, int64_t> degapCoordIndex;
std::map<int64_t, int64_t> regapCoordIndex;
std::vector<tupleRange> recompRanges;
blockExists_t oldBlockExists = data.blockExists;
blockStrand_t oldBlockStrand = data.blockStrand;
Expand Down Expand Up @@ -1392,7 +1393,7 @@ void buildOrPlace(Step method, mutableTreeData& data, std::vector<std::pair<std:
invertGapMap(gapMap, blockRanges[blockId], gapRunBlocksBacktracks, gapMapUpdates);
}

// makeCoordIndex(coordIndex, gapMap, blockRanges);
// makeCoordIndex(degapCoordIndex, regapCoordIndex, gapMap, blockRanges);

for (auto it = gapRunBlocksBacktracks.rbegin(); it != gapRunBlocksBacktracks.rend(); ++it) {
const auto& [del, range] = *it;
Expand Down Expand Up @@ -1672,7 +1673,7 @@ void buildOrPlace(Step method, mutableTreeData& data, std::vector<std::pair<std:
}
}

// makeCoordIndex(coordIndex, gapMap, blockRanges);
// makeCoordIndex(degapCoordIndex, regapCoordIndex, gapMap, blockRanges);

auto currBasePositions = perNodeSeedMutations_Index[dfsIndex].getBasePositions();
auto currPerPosMasks = perNodeSeedMutations_Index[dfsIndex].getPerPosMasks();
Expand Down Expand Up @@ -1799,15 +1800,15 @@ void buildOrPlace(Step method, mutableTreeData& data, std::vector<std::pair<std:
std::cout << node->identifier << " place syncmers: ";
for (int i = 0; i < onSeedsHash.size(); i++) {
if (onSeedsHash[i].has_value()) {
std::cout << mgsr::degapGlobal(i, coordIndex) << ":" << onSeedsHash[i].value().hash << "|" << onSeedsHash[i].value().isReverse << " ";
std::cout << mgsr::degapGlobal(i, degapCoordIndex) << ":" << onSeedsHash[i].value().hash << "|" << onSeedsHash[i].value().isReverse << " ";
}
}
std::cout << std::endl;
} else {
std::cout << node->identifier << " build syncmers: ";
for (int i = 0; i < onSeedsHash.size(); i++) {
if (onSeedsHash[i].has_value()) {
std::cout << mgsr::degapGlobal(i, coordIndex) << ":" << onSeedsHash[i].value().hash << "|" << onSeedsHash[i].value().isReverse << " ";
std::cout << mgsr::degapGlobal(i, degapCoordIndex) << ":" << onSeedsHash[i].value().hash << "|" << onSeedsHash[i].value().isReverse << " ";
}
}
std::cout << std::endl;
Expand All @@ -1823,15 +1824,15 @@ void buildOrPlace(Step method, mutableTreeData& data, std::vector<std::pair<std:
// }
// std::cout << std::endl;


// std::cout << node->identifier << " true syncmers: ";
// auto seq = seed_annotated_tree::getStringAtNode(node, T, false);
// auto syncmers = extractSyncmers(seq, seedK, seedS, seedT, open);
// for (const auto &[kmer, hash, isReverse, startPos] : syncmers) {
// // std::cout << startPos << ":" << hash << " ";
// std::cout << startPos << ":" << hash << "|" << isReverse << " ";
// }
// std::cout << std::endl;
if (method == Step::BUILD) {
std::cout << node->identifier << " true syncmers: ";
auto seq = seed_annotated_tree::getStringAtNode(node, T, false);
auto syncmers = extractSyncmers(seq, seedK, seedS, seedT, open);
for (const auto &[kmer, hash, isReverse, startPos] : syncmers) {
std::cout << startPos << ":" << hash << "|" << isReverse << " ";
}
std::cout << std::endl;
}
}


Expand Down Expand Up @@ -2713,6 +2714,7 @@ void place_per_read_DFS(

if (debug) {
// print out seeds at node
std::cout << std::endl;
if (method == Step::PLACE) {
std::cout << node->identifier << " place seedmers: ";
for (const auto& seedmer : positionMap) {
Expand All @@ -2725,6 +2727,23 @@ void place_per_read_DFS(
}
}

for (const auto& [hash, positions] : hashToPositionsMap) {
if (positions.size() == 0) {
std::cout << "Error: hashToPositionsMap contains empty positions" << std::endl;
exit(1);
}
for (const auto& position : positions) {
if (position->second.fhash != position->second.rhash) {
if (std::min(position->second.fhash, position->second.rhash) != hash) {
std::cout << "Error: min(fhash, rhash) != hash" << std::endl;
exit(1);
}
} else {
std::cout << "Error: fhash == rhash" << std::endl;
exit(1);
}
}
}
std::cout << std::endl;


Expand All @@ -2735,6 +2754,7 @@ void place_per_read_DFS(
std::cout << startPos << "-" << endPos << ":" << hash << "|" << isReverse << " ";
}
std::cout << std::endl;

}


Expand Down Expand Up @@ -2956,7 +2976,6 @@ void place_per_read_DFS(
readDuplicatesBackTrack.emplace_back(std::make_pair(readIndex, std::move(curReadDuplicatesBackTrack)));
}
}

int64_t pseudoScore = getPseudoScore(curRead, seedmersIndex, degapCoordIndex, regapCoordIndex, maximumGap, minimumCount, minimumScore, rescueDuplicates, rescueDuplicatesThreshold, dfsIndex);
double pseudoProb = pow(errorRate, curRead.seedmersList.size() - pseudoScore) * pow(1 - errorRate, pseudoScore);
allScores[node->identifier][readIndex] = {pseudoScore, pseudoProb};
Expand Down Expand Up @@ -3183,11 +3202,13 @@ void seedmersFromFastq(
reverseRolledHash = rol(reverseRolledHash, k) ^ std::get<0>(syncmers[l-i-1]);
}

int32_t iorder = 0;
if (forwardRolledHash != reverseRolledHash) {
size_t minHash = std::min(forwardRolledHash, reverseRolledHash);
curRead.uniqueSeedmers.emplace(minHash, std::vector<int32_t>{0});
curRead.seedmersList.emplace_back(mgsr::readSeedmer{
minHash, std::get<3>(syncmers[0]), std::get<3>(syncmers[l-1])+k-1, reverseRolledHash < forwardRolledHash, 0});
minHash, std::get<3>(syncmers[0]), std::get<3>(syncmers[l-1])+k-1, reverseRolledHash < forwardRolledHash, iorder});
++iorder;
}

// rest of kminmer
Expand All @@ -3207,11 +3228,13 @@ void seedmersFromFastq(
if (uniqueSeedmersIt == curRead.uniqueSeedmers.end()) {
curRead.uniqueSeedmers.emplace(minHash, std::vector<int32_t>{i});
curRead.seedmersList.emplace_back(mgsr::readSeedmer{
minHash, std::get<3>(syncmers[i]), std::get<3>(syncmers[i+l-1])+k-1, reverseRolledHash < forwardRolledHash, i});
minHash, std::get<3>(syncmers[i]), std::get<3>(syncmers[i+l-1])+k-1, reverseRolledHash < forwardRolledHash, iorder});
++iorder;
} else {
uniqueSeedmersIt->second.push_back(i);
curRead.seedmersList.emplace_back(mgsr::readSeedmer{
uniqueSeedmersIt->first, std::get<3>(syncmers[i]), std::get<3>(syncmers[i+l-1])+k-1, reverseRolledHash < forwardRolledHash, i});
uniqueSeedmersIt->first, std::get<3>(syncmers[i]), std::get<3>(syncmers[i+l-1])+k-1, reverseRolledHash < forwardRolledHash, iorder});
++iorder;
}
}
}
Expand Down

0 comments on commit 9827b11

Please sign in to comment.