Skip to content

Commit

Permalink
Merge branch 'diffbot-testing' into testing
Browse files Browse the repository at this point in the history
  • Loading branch information
gigablast committed Mar 21, 2016
2 parents 8922b8e + 136b884 commit 48398d0
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 7 deletions.
63 changes: 61 additions & 2 deletions Rdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,23 @@ bool Rdb::dumpTree ( int32_t niceness ) {
// bail if already dumping
//if ( m_dump.isDumping() ) return true;
if ( m_inDumpLoop ) return true;

// don't allow spiderdb and titledb to dump at same time
// it seems to cause corruption in rdbmem for some reason
// if ( m_rdbId == RDB_SPIDERDB && g_titledb.m_rdb.m_inDumpLoop )
// return true;
// if ( m_rdbId == RDB_TITLEDB && g_spiderdb.m_rdb.m_inDumpLoop )
// return true;
// ok, seems to happen if we are dumping any two rdbs at the same
// time. we end up missing tree nodes or something.
// for ( int32_t i = RDB_START ; i < RDB_PLACEDB ; i++ ) {
// Rdb *rdb = getRdbFromId ( i );
// if ( ! rdb )
// continue;
// if ( rdb->m_inDumpLoop )
// return true;
// }

// . if tree is saving do not dump it, that removes things from tree
// . i think this caused a problem messing of RdbMem before when
// both happened at once
Expand Down Expand Up @@ -1475,13 +1492,21 @@ bool Rdb::dumpCollLoop ( ) {
"memory.",base->m_files[m_fn]->getFilename());
base->buryFiles ( m_fn , m_fn+1 );
}
// if it was because a collection got deleted, keep going
if ( g_errno == ENOCOLLREC ) {
log("rdb: ignoring deleted collection and "
"continuing dump");
g_errno = 0;
goto keepGoing;
}
// game over, man
doneDumping();
// update this so we don't try too much and flood the log
// with error messages
s_lastTryTime = getTime();
return true;
}
keepGoing:
// advance for next round
m_dumpCollnum++;

Expand All @@ -1495,7 +1520,9 @@ bool Rdb::dumpCollLoop ( ) {
// skip if empty
if ( ! cr ) continue;
// skip if no recs in tree
if ( cr->m_treeCount == 0 ) continue;
// this is maybe causing us not to dump out all recs
// so comment this out
//if ( cr->m_treeCount == 0 ) continue;
// ok, it's good to dump
break;
}
Expand Down Expand Up @@ -2344,6 +2371,19 @@ bool Rdb::hasRoom ( RdbList *list , int32_t niceness ) {
m_lastReclaim = reclaimed;
}

// if we have data-less records, we do not use RdbMem, so
// return true at this point since there are enough tree nodes
//if ( dataSpace <= 0 ) return true;

// if rdbmem is already 90 percent full, just say no so when we
// dump to disk we have some room to add records that come in
// during the dump, and we have some room for RdbMem::freeDumpedMem()
// to fix things and realloc/move them within the rdb mem
// if ( m_mem.is90PercentFull () &&
// ! m_inDumpLoop &&
// m_rdbId != RDB_DOLEDB )
// return false;

// does m_mem have room for "dataSpace"?
if ( (int64_t)m_mem.getAvailMem() < dataSpace ) return false;
// otherwise, we do have room
Expand Down Expand Up @@ -3521,6 +3561,8 @@ int32_t Rdb::reclaimMemFromDeletedTreeNodes( int32_t niceness ) {
// start scanning the mem pool
char *p = m_mem.m_mem;
char *pend = m_mem.m_ptr1;

char *memEnd = m_mem.m_mem + m_mem.m_memSize;

char *dst = p;

Expand Down Expand Up @@ -3604,7 +3646,22 @@ int32_t Rdb::reclaimMemFromDeletedTreeNodes( int32_t niceness ) {
skipped++;
continue;
}
//
// corrupted? or breach of mem buf?
if ( sreq->isCorrupt() || dst + recSize > memEnd ) {
log("rdb: not readding corrupted doledb1 in scan. "
"deleting from tree.");
// a dup? sanity check
int32_t *nodePtr = (int32_t *)ht.getValue (&oldOffset);
if ( ! nodePtr ) {
log("rdb: strange. not in tree anymore.");
skipped++;
continue;
}
// delete node from doledb tree
m_tree.deleteNode3(*nodePtr,true);//true=freedata
skipped++;
continue;
}
//// re -add with the proper value now
//
// otherwise, copy it over if still in tree
Expand Down Expand Up @@ -3637,6 +3694,8 @@ int32_t Rdb::reclaimMemFromDeletedTreeNodes( int32_t niceness ) {
int32_t reclaimed = inUseOld - inUseNew;

if ( reclaimed < 0 ) { char *xx=NULL;*xx=0; }
if ( inUseNew < 0 ) { char *xx=NULL;*xx=0; }
if ( inUseNew > m_mem.m_memSize ) { char *xx=NULL;*xx=0; }

//if ( reclaimed == 0 && marked ) { char *xx=NULL;*xx=0;}

Expand Down
2 changes: 2 additions & 0 deletions RdbDump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -954,10 +954,12 @@ bool RdbDump::doneReadingForVerify ( ) {
bool s;
if(m_tree) {
s = m_tree->deleteList(m_collnum,m_list,true/*do balancing?*/);
log("dump: tree now has %i nodes",(int)m_tree->m_numUsedNodes);
}
else if(m_buckets) {
s = m_buckets->deleteList(m_collnum, m_list);
}

// problem?
if ( ! s && ! m_tried ) {
m_tried = true;
Expand Down
4 changes: 3 additions & 1 deletion RdbMem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,10 @@ void RdbMem::freeDumpedMem( RdbTree *tree ) {
// so allocData should return m_ptr2 guys
char *newData = (char *)allocData(NULL,size,0);
if ( ! newData ) {
int32_t cn = 0;
if ( tree->m_collnums ) cn = tree->m_collnums[i];
log("rdbmem: failed to alloc %i "
"bytes node %i",(int)size,(int)i);
"bytes node %i (cn=%i)",(int)size,(int)i,(int)cn);
continue;
}
// debug test
Expand Down
4 changes: 2 additions & 2 deletions RdbTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3209,8 +3209,8 @@ void RdbTree::cleanTree ( ) { // char **bases ) {
m_collnums[i] < max &&
g_collectiondb.m_recs[m_collnums[i]] ) continue;
// if it is negtiave, remove it, that is wierd corruption
if ( m_collnums[i] < 0 )
deleteNode3 ( i , true );
// if ( m_collnums[i] < 0 )
// deleteNode3 ( i , true );
// remove it otherwise
// don't actually remove it!!!! in case collection gets
// moved accidentally.
Expand Down
6 changes: 4 additions & 2 deletions Spider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4482,10 +4482,12 @@ bool SpiderColl::scanListForWinners ( ) {
m_cr->m_spiderCorruptCount++;
continue;
}
if ( sreq->m_dataSize > (int32_t)sizeof(SpiderRequest) ) {
if ( sreq->m_dataSize > (int32_t)sizeof(SpiderRequest) ||
sreq->m_dataSize < 0 ) {
if ( m_cr->m_spiderCorruptCount == 0 )
log("spider: got corrupt 11 spiderRequest in "
"scan because rectoobig u=%s (cn=%"INT32")"
"scan because size=%i u=%s (cn=%"INT32")"
,(int)sreq->m_dataSize
,sreq->m_url,(int32_t)m_collnum);
m_cr->m_spiderCorruptCount++;
continue;
Expand Down
3 changes: 3 additions & 0 deletions Url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ void Url::set ( char *t , int32_t tlen , bool addWWW , bool stripSessionId ,
int32_t anchorLen = 0;
for ( int32_t i = 0 ; i < tlen ; i++ ) {
if ( t[i] != '#' ) continue;
// ignore anchor if a ! follows it. 'google hash bang hack'
// which breaks the web and is now deprecated, but, there it is
if ( i+1<tlen && t[i+1] == '!' ) continue;
anchorPos = i;
anchorLen = tlen - i;
if ( stripPound )
Expand Down

0 comments on commit 48398d0

Please sign in to comment.