Skip to content

Commit

Permalink
added warning about merging with something above cutoff to cluster. w…
Browse files Browse the repository at this point in the history
…orking on chimeras
  • Loading branch information
westcott committed Feb 23, 2010
1 parent 81276c2 commit 9c23307
Show file tree
Hide file tree
Showing 23 changed files with 265 additions and 102 deletions.
3 changes: 2 additions & 1 deletion bellerophon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Bellerophon::Bellerophon(string name, string o) {
}

//***************************************************************************************************************
void Bellerophon::print(ostream& out) {
void Bellerophon::print(ostream& out, ostream& outAcc) {
try {
int above1 = 0;
out << "Name\tScore\tLeft\tRight\t" << endl;
Expand All @@ -38,6 +38,7 @@ void Bellerophon::print(ostream& out) {
//calc # of seqs with preference above 1.0
if (pref[i].score[0] > 1.0) {
above1++;
outAcc << pref[i].name << endl;
mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
}
Expand Down
2 changes: 1 addition & 1 deletion bellerophon.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Bellerophon : public Chimera {
~Bellerophon() {};

int getChimeras();
void print(ostream&);
void print(ostream&, ostream&);

void setCons(string){};
void setQuantiles(string) {};
Expand Down
3 changes: 2 additions & 1 deletion ccode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void Ccode::printHeader(ostream& out) {
out << "For full window mapping info refer to " << mapInfo << endl << endl;
}
//***************************************************************************************************************
void Ccode::print(ostream& out) {
void Ccode::print(ostream& out, ostream& outAcc) {
try {

mothurOutEndLine();
Expand Down Expand Up @@ -112,6 +112,7 @@ void Ccode::print(ostream& out) {

if (results) {
mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
outAcc << querySeq->getName() << endl;
}

//free memory
Expand Down
2 changes: 1 addition & 1 deletion ccode.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Ccode : public Chimera {
~Ccode();

int getChimeras(Sequence* query);
void print(ostream&);
void print(ostream&, ostream&);
void printHeader(ostream&);
private:

Expand Down
10 changes: 4 additions & 6 deletions chimera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ string Chimera::createFilter(vector<Sequence*> seqs, float t) {
vector<int> t; t.resize(seqs[0]->getAligned().length(), 0);
vector<int> g; g.resize(seqs[0]->getAligned().length(), 0);
vector<int> c; c.resize(seqs[0]->getAligned().length(), 0);

filterString = (string(seqs[0]->getAligned().length(), '1'));

//for each sequence
for (int i = 0; i < seqs.size(); i++) {

string seqAligned = seqs[i]->getAligned();

for (int j = 0; j < seqAligned.length(); j++) {
//if this spot is a gap
if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; }
Expand All @@ -47,10 +47,8 @@ string Chimera::createFilter(vector<Sequence*> seqs, float t) {
if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; }

else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) { filterString[i] = '0'; numColRemoved++; }
//cout << "a = " << a[i] << " t = " << t[i] << " g = " << g[i] << " c = " << c[i] << endl;
cout << "a = " << a[i] << " t = " << t[i] << " g = " << g[i] << " c = " << c[i] << endl;
}

//cout << "filter = " << filterString << endl;

mothurOut("Filter removed " + toString(numColRemoved) + " columns."); mothurOutEndLine();
return filterString;
Expand Down Expand Up @@ -95,7 +93,7 @@ vector<Sequence*> Chimera::readSeqs(string file) {
openInputFile(file, in);
vector<Sequence*> container;
int count = 0;
int length = 0;
length = 0;
unaligned = false;

//read in seqs and store in vector
Expand Down
5 changes: 3 additions & 2 deletions chimera.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class Chimera {
virtual void setTemplateSeqs(vector<Sequence*> t) { templateSeqs = t; }
virtual bool getUnaligned() { return unaligned; }
virtual void setTemplateFile(string t) { templateFileName = t; }
virtual int getLength() { return length; }

virtual void setCons(string){};
virtual void setQuantiles(string){};
Expand All @@ -127,14 +128,14 @@ class Chimera {
virtual void printHeader(ostream&){};
virtual int getChimeras(Sequence*){ return 0; }
virtual int getChimeras(){ return 0; }
virtual void print(ostream&){};
virtual void print(ostream&, ostream&){};


protected:

vector<Sequence*> templateSeqs;
bool filter, correction, svg, unaligned;
int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters;
int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, length;
float divR;
string seqMask, quanfile, filterString, name, outputDir, templateFileName;
Sequence* getSequence(string); //find sequence from name
Expand Down
2 changes: 1 addition & 1 deletion chimeracheckrdp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ChimeraCheckRDP::~ChimeraCheckRDP() {
}
}
//***************************************************************************************************************
void ChimeraCheckRDP::print(ostream& out) {
void ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
try {

mothurOut("Processing: " + querySeq->getName()); mothurOutEndLine();
Expand Down
2 changes: 1 addition & 1 deletion chimeracheckrdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ChimeraCheckRDP : public Chimera {
~ChimeraCheckRDP();

int getChimeras(Sequence*);
void print(ostream&);
void print(ostream&, ostream&);
void doPrep();

private:
Expand Down
71 changes: 47 additions & 24 deletions chimeraseqscommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,15 +311,18 @@ int ChimeraSeqsCommand::execute(){
chimera->setIters(iters);
chimera->setTemplateFile(templatefile);


string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
string accnosFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".accnos";


vector<Sequence*> templateSeqs;
if ((method != "bellerophon") && (method != "chimeracheck")) {
templateSeqs = chimera->readSeqs(templatefile);
if (chimera->getUnaligned()) {
mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine();
mothurOut("Your template sequences are different lengths, please correct."); mothurOutEndLine();
//free memory
for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
delete chimera;
return 0;
}

Expand All @@ -329,26 +332,31 @@ int ChimeraSeqsCommand::execute(){
}else if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it
chimera->getChimeras();

string outputFName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
ofstream out;
openOutputFile(outputFName, out);
openOutputFile(outputFileName, out);

chimera->print(out);
ofstream out2;
openOutputFile(accnosFileName, out2);

chimera->print(out, out2);
out.close();
out2.close();

return 0;
}

//some methods need to do prep work before processing the chimeras
chimera->doPrep();

templateSeqsLength = chimera->getLength();

ofstream outHeader;
string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader";
openOutputFile(tempHeader, outHeader);

chimera->printHeader(outHeader);
outHeader.close();

string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";

//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
Expand All @@ -360,7 +368,7 @@ int ChimeraSeqsCommand::execute(){

lines.push_back(new linePair(0, numSeqs));

driver(lines[0], outputFileName, fastafile);
driver(lines[0], outputFileName, fastafile, accnosFileName);

}else{
vector<int> positions;
Expand Down Expand Up @@ -391,14 +399,19 @@ int ChimeraSeqsCommand::execute(){
}


createProcesses(outputFileName, fastafile);
createProcesses(outputFileName, fastafile, accnosFileName);

rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());

rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());

//append alignment and report files
for(int i=1;i<processors;i++){
appendOutputFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());

appendOutputFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());

}
}

Expand All @@ -409,21 +422,23 @@ int ChimeraSeqsCommand::execute(){
inFASTA.close();
lines.push_back(new linePair(0, numSeqs));

driver(lines[0], outputFileName, fastafile);
driver(lines[0], outputFileName, fastafile, accnosFileName);
#endif

//mothurOut("Output File Names: ");
//if ((filter) && (method == "bellerophon")) { mothurOut(
//if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
// else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }

appendOutputFiles(tempHeader, outputFileName);

remove(outputFileName.c_str());
rename(tempHeader.c_str(), outputFileName.c_str());

for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }

for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
delete chimera;

if (method == "chimeracheck") { mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); }
if (method == "chimeracheck") { remove(accnosFileName.c_str()); mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine(); }

mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); mothurOutEndLine();

Expand All @@ -436,11 +451,14 @@ int ChimeraSeqsCommand::execute(){
}
}//**********************************************************************************************************************

int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename){
int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename, string accnos){
try {
ofstream out;
openOutputFile(outputFName, out);

ofstream out2;
openOutputFile(accnos, out2);

ifstream inFASTA;
openInputFile(filename, inFASTA);

Expand All @@ -451,12 +469,16 @@ int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filena
Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);

if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file

//find chimeras
chimera->getChimeras(candidateSeq);

if ((candidateSeq->getAligned().length() != templateSeqsLength) && (method != "chimeracheck")) { //chimeracheck does not require seqs to be aligned
mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); mothurOutEndLine();
}else{
//find chimeras
chimera->getChimeras(candidateSeq);

//print results
chimera->print(out);
//print results
chimera->print(out, out2);
}
}
delete candidateSeq;

Expand All @@ -467,6 +489,7 @@ int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filena
if((line->numSeqs) % 100 != 0){ mothurOut("Processing sequence: " + toString(line->numSeqs)); mothurOutEndLine(); }

out.close();
out2.close();
inFASTA.close();

return 1;
Expand All @@ -479,7 +502,7 @@ int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filena

/**************************************************************************************************/

void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename) {
void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename, string accnos) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
Expand All @@ -493,7 +516,7 @@ void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename)
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
exit(0);
}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
}
Expand All @@ -520,7 +543,7 @@ void ChimeraSeqsCommand::appendOutputFiles(string temp, string filename) {
ifstream input;

openOutputFileAppend(temp, output);
openInputFile(filename, input);
openInputFile(filename, input, "noerror");

while(char c = input.get()){
if(input.eof()) { break; }
Expand Down
6 changes: 3 additions & 3 deletions chimeraseqscommand.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ class ChimeraSeqsCommand : public Command {
vector<int> processIDS; //processid
vector<linePair*> lines;

int driver(linePair*, string, string);
void createProcesses(string, string);
int driver(linePair*, string, string, string);
void createProcesses(string, string, string);
void appendOutputFiles(string, string);

bool abort;
string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir, search;
bool filter, correction, svg, printAll, realign;
int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs;
int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
Chimera* chimera;

Expand Down
3 changes: 2 additions & 1 deletion chimeraslayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ void ChimeraSlayer::printHeader(ostream& out) {
out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
}
//***************************************************************************************************************
void ChimeraSlayer::print(ostream& out) {
void ChimeraSlayer::print(ostream& out, ostream& outAcc) {
try {
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
Expand All @@ -130,6 +130,7 @@ void ChimeraSlayer::print(ostream& out) {
if (chimeraFlag == "yes") {
if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
outAcc << querySeq->getName() << endl;
}
}

Expand Down
2 changes: 1 addition & 1 deletion chimeraslayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ChimeraSlayer : public Chimera {
~ChimeraSlayer();

int getChimeras(Sequence*);
void print(ostream&);
void print(ostream&, ostream&);
void printHeader(ostream&);
void doPrep();

Expand Down
Loading

0 comments on commit 9c23307

Please sign in to comment.