-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Ksenia Krasheninnikova
committed
Sep 20, 2018
1 parent
276eb8c
commit 9b639fd
Showing
10 changed files
with
842 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
rootPath = ../ | ||
include ../include.mk | ||
|
||
libHeaders = $(wildcard inc/*) | ||
libSourcesAll = $(wildcard impl/*.cpp) | ||
libSources=$(subst impl/main.cpp,,${libSourcesAll}) | ||
|
||
cpp = h5c++ ${h5prefix} | ||
|
||
all : ${binPath}/hal2synteny | ||
|
||
${binPath}/hal2synteny : impl/main.cpp ${libPath}/halLib.a ${libPath}/halLiftover.a ${basicLibsDependencies} | ||
${cpp} ${cppflags} -std=c++11 -I${sonLibPath} -I${rootPath}/liftover/inc -I${rootPath}/api/inc -Iinc -Iimpl impl/main.cpp ${libPath}/halLib.a ${libPath}/halLiftover.a ${sonLibPath}/sonLib.a -o ${binPath}/hal2synteny | ||
|
||
clean: | ||
rm -f ${binPath}/hal2synteny |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
/* | ||
* To change this license header, choose License Headers in Project Properties. | ||
* To change this template file, choose Tools | Templates | ||
* and open the template in the editor. | ||
*/ | ||
|
||
|
||
#include "hal2psl.h" | ||
#include "psl.h" | ||
|
||
|
||
void hal::Hal2Psl::storePslResults(std::vector<PslBlock>& pslBlocks){ | ||
|
||
BedList::iterator i = _outBedLines.begin(); | ||
for (; i != _outBedLines.end(); ++i) | ||
{ | ||
if (_addExtraColumns == false) | ||
{ | ||
i->_extra.clear(); | ||
} | ||
makeUpPsl(i->_psl, i->_blocks, i->_strand, i->_start, i->_chrName, | ||
pslBlocks); | ||
} | ||
|
||
} | ||
std::vector<PslBlock> hal::Hal2Psl::convert2psl(hal::AlignmentConstPtr alignment, | ||
const hal::Genome* srcGenome, | ||
const hal::Genome* tgtGenome, | ||
const std::string srcChrom){ | ||
|
||
std::vector<PslBlock> pslBlocks; | ||
if (srcGenome->getNumSequences() > 0){ | ||
_srcGenome = srcGenome; | ||
_tgtGenome = tgtGenome; | ||
_coalescenceLimit = NULL; | ||
_traverseDupes = true; | ||
_addExtraColumns = false; | ||
_missedSet.clear(); | ||
_tgtSet.clear(); | ||
_tgtSet.insert(tgtGenome); | ||
hal::SequenceIteratorConstPtr seqIt = srcGenome->getSequenceIterator(); | ||
hal::SequenceIteratorConstPtr seqEnd = srcGenome->getSequenceEndIterator(); | ||
for (; !seqIt->equals(seqEnd); seqIt->toNext()) { | ||
_outBedLines.clear(); | ||
_srcSequence = seqIt->getSequence(); | ||
auto chrName = _srcSequence->getName(); | ||
if (srcChrom != chrName) {continue;} | ||
//else {std::cout << chrName << std::endl;} | ||
_bedLine._start = 0; | ||
_bedLine._end = _srcSequence->getSequenceLength(); | ||
_mappedBlocks.clear(); | ||
_outPSL = true; | ||
visitBegin(); | ||
liftInterval(_mappedBlocks); | ||
if (_mappedBlocks.size()) { | ||
assignBlocksToIntervals(); | ||
} | ||
//_outBedLines.sort(BedLineSrcLess()); | ||
storePslResults(pslBlocks); | ||
cleanResults(); | ||
} | ||
} | ||
return pslBlocks; | ||
} | ||
|
||
|
||
void hal::Hal2Psl::makeUpPsl(const std::vector<hal::PSLInfo>& vpsl, | ||
const std::vector<hal::BedBlock>& blocks, | ||
const char strand, | ||
const hal_index_t start, | ||
const std::string chrName, | ||
std::vector<PslBlock>& pslBlocks){ | ||
assert(vpsl.size() == 1); | ||
const hal::PSLInfo& psl = vpsl[0]; | ||
|
||
for (size_t i = 0; i < blocks.size(); ++i) { | ||
PslBlock b = PslBlock(); | ||
b.size = blocks[i]._length; | ||
b.qName = psl._qSeqName; | ||
b.qSize = psl._qSeqSize; | ||
|
||
b.qStart = psl._qBlockStarts[i] - psl._qChromOffset; | ||
b.qEnd = b.qStart + b.size; | ||
if (psl._qStrand == '-'){ | ||
auto posStart = b.qStart; | ||
b.qStart = psl._qSeqSize - posStart - b.size; | ||
b.qEnd = psl._qSeqSize - posStart; | ||
} | ||
|
||
b.tStart = blocks[i]._start + start; | ||
b.tEnd = b.tStart + b.size; | ||
if (strand == '-') | ||
{ | ||
auto posStart = b.tStart; | ||
b.tStart = psl._tSeqSize - posStart - b.size; | ||
b.tEnd = psl._tSeqSize - posStart; | ||
} | ||
std::stringstream ss; | ||
ss << psl._qStrand << strand; | ||
ss >> b.strand; | ||
//b.strand = psl._qStrand+'/'+strand; | ||
b.tSize = psl._tSeqSize; | ||
b.qSize = psl._qSeqSize; | ||
b.tName = chrName; | ||
pslBlocks.push_back(b); | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/* | ||
* To change this license header, choose License Headers in Project Properties. | ||
* To change this template file, choose Tools | Templates | ||
* and open the template in the editor. | ||
*/ | ||
|
||
#include "hal.h" | ||
#include "halCLParserInstance.h" | ||
|
||
#include "psl_io.cpp" | ||
#include "psl_merger.cpp" | ||
#include "hal2psl.cpp" | ||
|
||
|
||
static hal::CLParserPtr initParser() { | ||
auto optionsParser = hal::hdf5CLParserInstance(); | ||
optionsParser->addArgument("halFile", "input psl file from liftover"); | ||
optionsParser->addOption("queryGenome", "source genome", "\"\""); | ||
optionsParser->addOption("targetGenome", "reference genome name", "\"\""); | ||
optionsParser->addArgument("outPslPath", "output psl file ffor synteny blocks"); | ||
optionsParser->addOption("minBlockSize", | ||
"lower bound on synteny block length", | ||
5000); | ||
optionsParser->addOption("maxAnchorDistance", | ||
"upper bound on distance for syntenic psl blocks", | ||
5000); | ||
optionsParser->addOption("queryChromosome", | ||
"chromosome to infer synteny", | ||
"\"\""); | ||
return optionsParser; | ||
|
||
} | ||
|
||
const hal::Genome* openGenomeOrThrow(const hal::AlignmentConstPtr& alignment, | ||
const std::string& genomeName) { | ||
const hal::Genome* genome = alignment->openGenome(genomeName); | ||
if (genome == NULL){ | ||
throw hal_exception(std::string("Reference genome, ") + genomeName + | ||
", not found in alignment"); | ||
} | ||
return genome; | ||
} | ||
|
||
hal::AlignmentConstPtr openAlignmentOrThrow(const std::string& halFile, | ||
const hal::CLParserPtr& optionsParser){ | ||
|
||
auto alignment = hal::openHalAlignmentReadOnly(halFile, optionsParser); | ||
if (alignment->getNumGenomes() == 0){ | ||
throw hal_exception("hal alignment is empty"); | ||
} | ||
return alignment; | ||
} | ||
|
||
void validateInputOrThrow(const std::string& queryGenomeName, | ||
const std::string& targetGenomeName, | ||
const std::string& queryChromosome) { | ||
if (queryGenomeName == "\"\"" || targetGenomeName == "\"\"" || queryChromosome == "\"\""){ | ||
throw hal_exception("--queryGenome and --targetGenome and --queryChromosome must be" | ||
"specified"); | ||
} | ||
|
||
if (queryGenomeName == targetGenomeName){ | ||
throw hal_exception("--queryGenome and --targetGenome must be" | ||
"different"); | ||
} | ||
} | ||
|
||
|
||
|
||
int main(int argc, char *argv[]) { | ||
hal::CLParserPtr optionsParser = initParser(); | ||
std::string halFile; | ||
std::string outPslPath; | ||
std::string queryGenomeName; | ||
std::string targetGenomeName; | ||
std::string queryChromosome; | ||
hal_size_t minBlockSize; | ||
hal_size_t maxAnchorDistance; | ||
try { | ||
optionsParser->parseOptions(argc, argv); | ||
halFile = optionsParser->getArgument<std::string>("halFile"); | ||
queryGenomeName = optionsParser->getOption<std::string>("queryGenome"); | ||
targetGenomeName = optionsParser->getOption<std::string>("targetGenome"); | ||
outPslPath = optionsParser->getArgument<std::string>("outPslPath"); | ||
minBlockSize = optionsParser->getOption<hal_size_t>("minBlockSize"); | ||
maxAnchorDistance = optionsParser->getOption<hal_size_t>("maxAnchorDistance"); | ||
queryChromosome = optionsParser->getOption<std::string>("queryChromosome"); | ||
optionsParser->setDescription("convert psl alignments into synteny blocks"); | ||
} | ||
catch(std::exception& e) { | ||
std::cerr << e.what() << std::endl; | ||
optionsParser->printUsage(std::cerr); | ||
exit(1); | ||
} | ||
|
||
validateInputOrThrow(queryGenomeName,targetGenomeName,queryChromosome); | ||
try { | ||
auto alignment = openAlignmentOrThrow(halFile, optionsParser); | ||
auto targetGenome = openGenomeOrThrow(alignment, targetGenomeName); | ||
auto queryGenome = openGenomeOrThrow(alignment, queryGenomeName); | ||
|
||
|
||
auto hal2psl = hal::Hal2Psl(); | ||
//std::cout << "reading hal " << std::endl; | ||
auto blocks = hal2psl.convert2psl(alignment, queryGenome, | ||
targetGenome, queryChromosome); | ||
|
||
|
||
//std::cout << "merging " << blocks.size() << " blocks"<< std::endl; | ||
auto merged_blocks = dag_merge(blocks, | ||
minBlockSize, maxAnchorDistance); | ||
//std::cout << "writing psl " << std::endl; | ||
psl_io::write_psl(merged_blocks, outPslPath); | ||
} | ||
catch(std::exception& e) | ||
{ | ||
std::cerr << "Exception caught: " << e.what() << std::endl; | ||
return 1; | ||
} | ||
return 0; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/* | ||
* To change this license header, choose License Headers in Project Properties. | ||
* To change this template file, choose Tools | Templates | ||
* and open the template in the editor. | ||
*/ | ||
#include "psl_io.h" | ||
|
||
namespace psl_io { | ||
std::vector<std::string> split(const std::string &s, char delim) { | ||
std::stringstream ss(s); | ||
std::string item; | ||
std::vector<std::string> elems; | ||
while (std::getline(ss, item, delim)) { | ||
elems.push_back(std::move(item)); | ||
} | ||
return elems; | ||
} | ||
|
||
|
||
std::vector<PslBlock> get_blocks_set(const std::string psl) { | ||
std::ifstream input(psl); | ||
std::vector<PslBlock> blocks; | ||
for (std::string line; std::getline(input, line);) { | ||
auto cur_b = Psl(line).get_blocks(); | ||
blocks.insert(blocks.end(), cur_b.begin(), cur_b.end()); | ||
} | ||
return blocks; | ||
} | ||
|
||
std::vector<int> get_qInserts(const std::vector<PslBlock>& blocks) { | ||
std::vector<int> result; | ||
for (int i=0; i < int(blocks.size())-1; ++i) { | ||
auto dif = blocks[i+1].qStart - blocks[i].qEnd; | ||
if (dif > 0) { | ||
result.push_back(dif); | ||
} | ||
} | ||
return result; | ||
} | ||
|
||
std::vector<int> get_tInserts(const std::vector<PslBlock>& blocks) { | ||
std::vector<int> result; | ||
for (int i=0; i < int(blocks.size())-1; ++i) { | ||
auto dif = blocks[i+1].tStart - blocks[i].tEnd; | ||
if (dif > 0) { | ||
result.push_back(dif); | ||
//std::cout << blocks[i+1].tStart << " " << blocks[i].tEnd << " " << dif << std::endl; | ||
} | ||
} | ||
return result; | ||
} | ||
|
||
Psl construct_psl(std::vector<PslBlock> blocks) { | ||
auto psl = Psl(); | ||
psl.match = std::accumulate(blocks.begin(), blocks.end(), 0, | ||
[] (int total, PslBlock item) | ||
{ return total + item.qEnd - item.qStart; }); | ||
psl.misMatch = 0; | ||
psl.repMatch = 0; | ||
psl.nCount = 0; | ||
auto qInserts = get_qInserts(blocks); | ||
psl.qNumInsert = qInserts.size(); | ||
psl.qBaseInsert = std::accumulate(qInserts.begin(), qInserts.end(), 0); | ||
auto tInserts = get_tInserts(blocks); | ||
psl.tNumInsert = tInserts.size(); | ||
psl.tBaseInsert = std::accumulate(tInserts.begin(), tInserts.end(), 0); | ||
psl.qName = blocks[0].qName; | ||
psl.qSize = blocks[0].qSize; | ||
psl.qStart = blocks[0].qStart; | ||
psl.qEnd = blocks.back().qEnd; | ||
psl.tName = blocks[0].tName; | ||
psl.tSize = blocks[0].tSize; | ||
psl.strand = blocks[0].strand; | ||
psl.blockCount = blocks.size(); | ||
psl.blocks = blocks; | ||
//if (psl.strand == "++") { | ||
psl.tStart = blocks.front().tStart; | ||
psl.tEnd = blocks.back().tEnd; | ||
/*} | ||
else if (psl.strand == "+-") { | ||
psl.tEnd = psl.tSize - blocks[0].tStart; | ||
psl.tStart = psl.tSize - blocks.back().tEnd; | ||
//??for (auto b: psl.blocks) { | ||
// b.tStart = psl.tSize - b.tStart; | ||
//} | ||
//std::reverse(psl.blocks.begin(),psl.blocks.end()); | ||
}*/ | ||
return psl; | ||
} | ||
|
||
void write_psl(const std::vector<std::vector<PslBlock> >& merged_blocks, | ||
const std::string& outFilePath) { | ||
std::ofstream ofs; | ||
ofs.open (outFilePath, std::ofstream::out); | ||
for (auto path : merged_blocks){ | ||
auto psl = construct_psl(path); | ||
ofs << psl << std::endl; | ||
} | ||
ofs.close(); | ||
} | ||
} |
Oops, something went wrong.