From 931a1c4bc6c9a63b4f1e26f85aa3704d295d9e9d Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 17 Nov 2014 18:13:36 -0800 Subject: [PATCH] good checkpoint. quite a few fixes. --- Accessdb.cpp | 6 +- Address.cpp | 22 +++--- AutoBan.cpp | 4 +- Bits.cpp | 2 +- CatRec.h | 2 +- Catdb.cpp | 2 +- Clusterdb.cpp | 2 +- Clusterdb.h | 2 +- Collectiondb.cpp | 10 +-- Collectiondb.h | 4 +- Conf.cpp | 2 +- Dates.cpp | 38 +++++----- Diff.cpp | 18 ++--- Diff.h | 2 +- DiskPageCache.cpp | 6 +- Dns.cpp | 2 +- Entities.cpp | 2 +- Errno.cpp | 2 +- Errno.h | 2 +- Facebook.cpp | 6 +- File.cpp | 2 +- File.h | 2 +- HashTableX.cpp | 2 +- Hostdb.h | 2 +- HttpRequest.cpp | 2 +- HttpServer.cpp | 6 +- IndexTable.cpp | 4 +- IndexTable2.cpp | 12 +-- Language.cpp | 8 +- LanguageIdentifier.cpp | 2 +- Linkdb.cpp | 142 ++++++++++++++++++++++++++---------- Linkdb.h | 54 +++++++++++--- Log.cpp | 2 +- Loop.cpp | 8 +- Makefile | 8 +- Matches.cpp | 6 +- Mem.cpp | 2 +- MemPoolTree.cpp | 4 +- Msg0.cpp | 8 +- Msg13.cpp | 2 +- Msg17.cpp | 2 +- Msg2.cpp | 2 +- Msg20.cpp | 2 +- Msg20.h | 2 +- Msg24.cpp | 38 +++++----- Msg3.cpp | 2 +- Msg39.cpp | 6 +- Msg3a.cpp | 2 +- Msg4.cpp | 4 +- Msg40.cpp | 50 +++++++------ Msg40.h | 2 +- Msg5.cpp | 14 ++-- Msg51.cpp | 2 +- Msge0.cpp | 8 +- Msge1.cpp | 2 +- Multicast.cpp | 16 ++-- PageBasic.cpp | 11 ++- PageCrawlBot.cpp | 4 +- PageEvents.cpp | 10 +-- PageGet.cpp | 2 +- PageOverview.cpp | 12 +-- PageParser.cpp | 4 +- PageResults.cpp | 22 +++--- PageStatsdb.cpp | 2 +- PageSubmit.cpp | 2 +- PageTurk.cpp | 2 +- Pages.cpp | 2 +- Parms.cpp | 32 ++++---- PingServer.cpp | 2 +- Posdb.cpp | 12 +-- Proxy.cpp | 6 +- QAClient.cpp | 2 +- Query.cpp | 8 +- Query.h | 2 +- Rdb.cpp | 6 +- RdbBase.cpp | 12 +-- RdbBuckets.cpp | 2 +- RdbCache.cpp | 32 +++++--- RdbDump.cpp | 6 +- RdbList.cpp | 4 +- RdbMap.cpp | 6 +- RdbMem.cpp | 2 +- RdbMerge.cpp | 2 +- RdbMerge.h | 2 +- RdbTree.cpp | 12 +-- Repair.cpp | 4 +- Sections.cpp | 42 +++++------ Sections.h | 2 +- Speller.cpp | 6 +- Spider.cpp | 32 ++++---- SpiderProxy.cpp | 4 +- Statsdb.cpp | 5 +- StopWords.cpp | 2 +- Summary.cpp | 8 +- Syncdb.cpp | 2 +- Tagdb.cpp | 36 ++++----- TcpServer.cpp | 8 +- Thesaurus.cpp | 4 +- Threads.cpp | 6 +- Timedb.cpp | 2 +- Title.cpp | 8 +- TopTree.cpp | 4 +- TopTree.h | 2 +- UdpServer.cpp | 6 +- UdpServer.h | 4 +- UdpSlot.h | 2 +- Url.cpp | 2 +- Users.cpp | 4 +- Vector.cpp | 2 +- Weights.cpp | 6 +- Wiki.cpp | 2 +- XmlDoc.cpp | 162 +++++++++++++++++++++-------------------- XmlNode.cpp | 2 +- animate.cpp | 2 +- blaster2.cpp | 2 +- dmozparse.cpp | 2 +- fastIndexTable.cpp | 2 +- fctypes.cpp | 4 +- iconv.h | 2 +- iostream.h | 2 +- linkspam.cpp | 2 +- main.cpp | 10 +-- streambuf.h | 2 +- 123 files changed, 654 insertions(+), 536 deletions(-) diff --git a/Accessdb.cpp b/Accessdb.cpp index bbdb193c2..8117da4f4 100644 --- a/Accessdb.cpp +++ b/Accessdb.cpp @@ -740,15 +740,15 @@ void handleRequestaa ( UdpSlot *slot , int32_t niceness ) { timestamp -= 7*86400*1000; } - int64_t int32_tTime = 86400LL*365LL*1000LL*10; // 10 years in millisecs + int64_t longTime = 86400LL*365LL*1000LL*10; // 10 years in millisecs // use the 2nd type of key... those have widgetid first and then // the timestamp key128_t startKey = g_accessdb.makeKey2 ( timestamp ,wgid ); - key128_t endKey = g_accessdb.makeKey2 ( timestamp + int32_tTime, wgid); + key128_t endKey = g_accessdb.makeKey2 ( timestamp + longTime, wgid); // widget id of 0 means ANY widget if ( wgid == 0 ) { startKey = g_accessdb.makeKey1(timestamp , 0 ); - endKey = g_accessdb.makeKey1(timestamp + int32_tTime, 0 ); + endKey = g_accessdb.makeKey1(timestamp + longTime, 0 ); } // lookup accessdb records from that time going forward if ( ! st->m_msg5.getList ( RDB_ACCESSDB , diff --git a/Address.cpp b/Address.cpp index e905ed804..bc9b98774 100644 --- a/Address.cpp +++ b/Address.cpp @@ -3074,7 +3074,7 @@ bool Addresses::updateAddresses ( ) { if ( d1 < -.01 ) continue; if ( d2 < -.01 ) continue; // . ok, they are the same i guess - // . prefer the one with the int32_test digits as the orig + // . prefer the one with the longest digits as the orig // and the other as the alias if ( ad->m_street->m_strlen > aj->m_street->m_strlen){ //aj->m_street->m_alias = ad; @@ -4079,7 +4079,7 @@ bool setHashes ( Place *p , Words *ww , int32_t niceness ) { // not a po box i guess if ( k == -1 ) return true; - // xor it in aint32_t with h_po + // xor it in along with h_po p->m_hash = h_po ^ wids[k]; return true; @@ -5679,12 +5679,12 @@ bool Addresses::set2 ( ) { Place *pp = NULL; if ( ! pp ) pp = pc; if ( ! pp ) pp = ps; - // . if tied prefer int32_ter. if length tied prefer state + // . if tied prefer longer. if length tied prefer state // . "California" is both a state and a city if ( pc && ps ) { - // kill state if city int32_ter + // kill state if city longer if ( pc->m_alnumB > ps->m_alnumB ) ps = NULL; - // or kill city is state is int32_ter + // or kill city is state is longer else if ( pc->m_alnumB < ps->m_alnumB ) pc = NULL; } @@ -7179,7 +7179,7 @@ bool Addresses::set2 ( ) { // plays somewhat nicely with menu cruft because // we have to verify the place names with another // website to really make the place name stick, - // so let's no int32_ter use SEC_DUP or'ed in with + // so let's no longer use SEC_DUP or'ed in with // the badFlags. mdw. if ( sp && (sp[i]->m_flags & badFlags ) ) // |SEC_DUP) continue; @@ -10116,7 +10116,7 @@ int64_t *getSynonymWord ( int64_t *h, int64_t *prevId, bool isStreet ) { int64_t *p = (int64_t *)s_syn.getValue64 ( *h ); - // check city aliases table. we no int32_ter store city aliases + // check city aliases table. we no longer store city aliases // in the synonym list // . no! might have "SF Smith" not "Santa Fe Smith" //if ( ! p ) { @@ -12147,7 +12147,7 @@ bool Address::addToTagRec ( TagRec *gr , int32_t ip , int32_t timestamp , char *tagName ) { // - // we are no int32_ter storing contact info addresses + // we are no longer storing contact info addresses // //return true; @@ -12968,7 +12968,7 @@ void Address::printEssentials ( SafeBuf *pbuf , bool forEvents , pbuf->pushChar(';'); //if ( m_ctry->m_str ) // pbuf->javascriptEncode(m_ctry->m_str,m_ctry->m_strlen); - // now we include lat and int32_t, but only if we got both valid + // now we include lat and long, but only if we got both valid if ( m_longitude != NO_LONGITUDE && m_latitude != NO_LONGITUDE ) { pbuf->pushChar(';'); @@ -15839,7 +15839,7 @@ bool Addresses::hashForPlacedb ( int64_t docId , // no intersections if ( a->m_street->m_flags2 & PLF2_INTERSECTION ) good = false; // . skip if not good - // . we no int32_ter add non-inlined addresses cuz those are + // . we no longer add non-inlined addresses cuz those are // not as accurate. many pages have the street address // too far from the city and state, and we use one from the // tag and it ain't right. @@ -18934,7 +18934,7 @@ bool getLatLonFromUserInput ( float *radius, cityA = city2A; cityB = city2B; } - // if both existed, prefer the int32_ter. if tied. prefer + // if both existed, prefer the longer. if tied. prefer // the local one even if its population might be smaller if ( crd && crd2 && city2B > cityB ) { crd = crd2; diff --git a/AutoBan.cpp b/AutoBan.cpp index cbd9b990a..1724823c2 100644 --- a/AutoBan.cpp +++ b/AutoBan.cpp @@ -675,7 +675,7 @@ bool AutoBan::hasPerm(int32_t ip, char c = banRegex[i]; // NULL terminate banRegex[i] = '\0'; - // search for substr (must be int32_ter than 2 chars + // search for substr (must be longer than 2 chars if ( i - start > 2){ if (strnstr2(reqStr, reqLen, &banRegex[start])) gotMatch = true; @@ -1076,7 +1076,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) { // ones to the conf parm; if (banIps) { //ack, the browser puts in crlf when this comes back, so - //we will have a int32_ter string here than the one we sent + //we will have a longer string here than the one we sent //out. trim back all extrainious whitespace before we do //bounds checking. trimWhite(banIps); diff --git a/Bits.cpp b/Bits.cpp index e56b5353d..34195b29e 100644 --- a/Bits.cpp +++ b/Bits.cpp @@ -181,7 +181,7 @@ bool Bits::set ( Words *words , char titleRecVersion , int32_t niceness , // - // pick the int32_test line in a hard section which ends in + // pick the longest line in a hard section which ends in // a period and contains a br tag. then any line that // is 80%+ of that line's number of chars is also a line // where the br should not terminate it as a sentence. diff --git a/CatRec.h b/CatRec.h index e2fc99ec2..d6b7405d8 100644 --- a/CatRec.h +++ b/CatRec.h @@ -91,7 +91,7 @@ class CatRec { char *getData ( ) { return m_data; }; int32_t getDataSize ( ) { return m_dataSize; }; - // aint32_t with coll/collLen identifies a unique xml file + // along with coll/collLen identifies a unique xml file //int32_t getFilenum ( ) { return m_filenum; }; //int32_t getRuleset ( ) { return m_filenum; }; diff --git a/Catdb.cpp b/Catdb.cpp index bb8930f5b..81ec74124 100644 --- a/Catdb.cpp +++ b/Catdb.cpp @@ -53,7 +53,7 @@ bool Catdb::init ( ) { return log("db: Catdb init failed."); // . initialize our own internal rdb - // . i no int32_ter use cache so changes to tagdb are instant + // . i no longer use cache so changes to tagdb are instant // . we still use page cache however, which is good enough! //if ( this == &g_catdb ) if ( ! m_rdb.init ( g_hostdb.m_dir , diff --git a/Clusterdb.cpp b/Clusterdb.cpp index 5e62ef517..2c08e7685 100644 --- a/Clusterdb.cpp +++ b/Clusterdb.cpp @@ -246,7 +246,7 @@ static void clusterRmVfd ( DiskPageCache *pc, // reset rdb void Clusterdb::reset() { m_rdb.reset(); } -// . this no int32_ter maintains an rdb of cluster recs +// . this no longer maintains an rdb of cluster recs // . Msg22 now just uses the cache to hold cluster recs that it computes // from titlteRecs // . clusterRecs are now just TitleRec keys... diff --git a/Clusterdb.h b/Clusterdb.h index e6afafb1e..c2f7d994c 100644 --- a/Clusterdb.h +++ b/Clusterdb.h @@ -1,6 +1,6 @@ // Copyright Matt Wells, Jul 2002 -// . a clusterRec now no int32_ter exists, per se +// . a clusterRec now no longer exists, per se // . it is the same thing as the key of the titleRec in titledb // . titleRecs now contain the site and content hashes in the low bits // of their key. diff --git a/Collectiondb.cpp b/Collectiondb.cpp index ca687b1f8..20dede9a7 100644 --- a/Collectiondb.cpp +++ b/Collectiondb.cpp @@ -324,7 +324,7 @@ bool Collectiondb::addNewColl ( char *coll , // . scan for holes // . i is also known as the collection id //int32_t i = (int32_t)newCollnum; - // no int32_ter fill empty slots because if they do a reset then + // no longer fill empty slots because if they do a reset then // a new rec right away it will be filled with msg4 recs not // destined for it. Later we will have to recycle some how!! //else for ( i = 0 ; i < m_numRecs ; i++ ) if ( ! m_recs[i] ) break; @@ -348,7 +348,7 @@ bool Collectiondb::addNewColl ( char *coll , // return log("admin: Limit of %"INT64" collection reached. " // "Collection not created.",maxColls); //} - // if empty... bail, no int32_ter accepted, use "main" + // if empty... bail, no longer accepted, use "main" if ( ! coll || !coll[0] ) { g_errno = EBADENGINEER; return log("admin: Trying to create a new collection " @@ -359,7 +359,7 @@ bool Collectiondb::addNewColl ( char *coll , if ( gbstrlen(coll) > MAX_COLL_LEN ) { g_errno = ENOBUFS; return log("admin: Trying to create a new collection " - "whose name \"%s\" of %i chars is int32_ter than the " + "whose name \"%s\" of %i chars is longer than the " "max of %"INT32" chars.",coll,gbstrlen(coll), (int32_t)MAX_COLL_LEN); } @@ -1668,7 +1668,7 @@ CollectionRec::CollectionRec() { //m_numAdminIps = 0; memset ( m_bases , 0 , sizeof(RdbBase *)*RDB_END ); // how many keys in the tree of each rdb? we now store this stuff - // here and not in RdbTree.cpp because we no int32_ter have a maximum + // here and not in RdbTree.cpp because we no longer have a maximum // # of collection recs... MAX_COLLS. each is a 32-bit "int32_t" so // it is 4 * RDB_END... memset ( m_numNegKeysInTree , 0 , 4*RDB_END ); @@ -3145,7 +3145,7 @@ bool CollectionRec::hasPermission ( char *p, int32_t plen , int32_t ip ) { return true; // scan the passwords - // MDW: no int32_ter, this is too vulnerable!!! + // MDW: no longer, this is too vulnerable!!! /* for ( int32_t i = 0 ; i < m_numAdminPwds ; i++ ) { int32_t len = gbstrlen ( m_adminPwds[i] ); diff --git a/Collectiondb.h b/Collectiondb.h index 42c8b90aa..9cd42687f 100644 --- a/Collectiondb.h +++ b/Collectiondb.h @@ -428,7 +428,7 @@ class CollectionRec { //char m_newSpideringEnabled ; char m_spideringEnabled ; float m_newSpiderWeight ; - // m_inDeleteMode is no int32_ter used, just a place holder now + // m_inDeleteMode is no longer used, just a place holder now //char m_inDeleteMode ; //char m_restrictTitledbForQuery ; // obsoleted //char m_recycleVotes ; @@ -1058,7 +1058,7 @@ class CollectionRec { //char m_orig [ MAX_PARMS ]; - // we no int32_ter truncate termlists on disk, so this is obsolete + // we no longer truncate termlists on disk, so this is obsolete //int32_t m_indexdbTruncationLimit; // collection name in the other/external cluster from which we diff --git a/Conf.cpp b/Conf.cpp index b3c0cd325..8b19d7835 100644 --- a/Conf.cpp +++ b/Conf.cpp @@ -570,7 +570,7 @@ char *Conf::getDefaultColl ( char *hostname, int32_t hostnameLen ) { if ( cmpLen == 0 || cmpLen > hostnameLen ) continue; // . check the hostname for a match - // this will allow hostname to be int32_ter to allow for + // this will allow hostname to be longer to allow for // a possible port at the end if ( strncmp ( hostname, cmpHostname, diff --git a/Dates.cpp b/Dates.cpp index 5da061961..948e4df6c 100644 --- a/Dates.cpp +++ b/Dates.cpp @@ -3,7 +3,7 @@ // stjohnscollege.edu // - lost event because we changed the implied sections algo and no -// int32_ter adds the address and store hours as a single implied section +// longer adds the address and store hours as a single implied section // - probably should write this one off @@ -169,7 +169,7 @@ // - how did we get "Sunday [[]] 4pm - 6pm" ??? // - brbrtagdelim (double br) should be enough to keep the right dow mapping // to the right tod. -// - bad titles because we think the strong tag portion is part of a int32_ter +// - bad titles because we think the strong tag portion is part of a longer // sentence. so do not make sentence go across the strong or bold tag // or italic or underline tag UNLESS the next word is lower case, etc. // so treat these non-breaking tags as we treat the other breaking tags. @@ -2421,7 +2421,7 @@ bool Dates::setPart1 ( //char *u , // sanity. parseDates() should have set this when XmlDoc // called it explicitly before calling setPart1(). - // well now it no int32_ter needs to call it explicitly since + // well now it no longer needs to call it explicitly since // xmldoc calls getAddresses() before setting the implied // sections. and getAddresses() calls getSimpleDates() which calls // this function, setPart1() which will call parseDates() below. @@ -2604,9 +2604,9 @@ bool Dates::setPart1 ( //char *u , if ( h_open == 0 ) { char *xx=NULL;*xx=0; } // - // now since we no int32_ter set Date::m_section and m_hardSection + // now since we no longer set Date::m_section and m_hardSection // in addDate() and addPtr() we have to make up for it here. we are - // no int32_ter allowed to use the Sections class in Dates::parseDates() + // no longer allowed to use the Sections class in Dates::parseDates() // because Sections::set() calls parseDates() because it uses the dates // to set implied sections that consist of a dom/dow header and tod // subjects. i did hack Date::addPtr() to inherit the m_hardSection, @@ -4299,7 +4299,7 @@ bool Dates::parseDates ( Words *w , dateflags_t defFlags , Bits *bits , // set SF_PLURAL in the supplemental flags if ( to_lower_a(wptrs[i][wlens[i]-1])=='s' && - // no int32_ter count "Friday's" as Fridays though + // no longer count "Friday's" as Fridays though // to fix albertcadabra.com wptrs[i][wlens[i]-2] != '\'' && // fix "tues". wednes thurs @@ -6852,7 +6852,7 @@ bool Dates::parseDates ( Words *w , dateflags_t defFlags , Bits *bits , continue; } - // stupid facebook json format no int32_ter has plain + // stupid facebook json format no longer has plain // unix timestamps, it has trumba style timestamps. // by definition official times are non-fuzzy if ( di->m_flags & DF_OFFICIAL ) continue; @@ -7239,7 +7239,7 @@ bool Dates::parseDates ( Words *w , dateflags_t defFlags , Bits *bits , // actually any single date should not be excluded! bool forsure = false; // . skip if single isolated DOW - // . fixes "Sun City Plumbing" since "Sun" is no int32_ter + // . fixes "Sun City Plumbing" since "Sun" is no longer // considered DF_FUZZY because it has DF_LEFT_BOOKEND set. // and we did that to fix "Sunday Services" for abqcsl.org if ( di->m_type == DT_DOW ) { @@ -9005,7 +9005,7 @@ bool Dates::setPart2 ( Addresses *aa , int32_t minPubDate , int32_t maxPubDate , for ( ; pp ; ) { // ; pp = pp->m_parent ) { // breathe QUICKPOLL ( m_niceness ); - // we no int32_ter need this now because if we are + // we no longer need this now because if we are // comaptible with a date we have to telescope to it, // we can't just skip it. the "s3" algo should fix // the rest of thewoodencow.com in isCompatible() @@ -12104,7 +12104,7 @@ int32_t Dates::isCompatible ( Date *di , // . crap this also breaks signmeup.com // . this breaks thewoodencow.com because the store hours section // contains a list of monthdays below it which is included in acc1 - // . it no int32_ter seems to be needed by peachpundit.com but taking + // . it no longer seems to be needed by peachpundit.com but taking // it out caused a few anomalies. not sure if good or bad really, // so i left this in and fixed thewoodencow.com if ( (acc1 & DT_DAYNUM) && (acc2 & DT_DAYNUM) && @@ -12114,7 +12114,7 @@ int32_t Dates::isCompatible ( Date *di , // a daynum to a range "Dec x - Jan y", so allow range headers // to be compatible !(acc1 & (DT_RANGE|DT_RANGE_MONTHDAY|DT_RANGE_DAYNUM)) && - // aint32_t the same lines allow lists + // along the same lines allow lists // breaks www.missioncvb.org which has // "10:15 pm on both Friday and Saturday night" telescoping to // "Friday, May 9 and Saturday, May 10, 2008" @@ -12293,7 +12293,7 @@ int32_t Dates::isCompatible2 ( Section *s1 , Section *s2 , bool useXors ) { int64_t *ph = (int64_t *)pt->getValue ( &si ); if ( ph && ( phFinal1 ^ *ph ) ) phFinal1 ^= *ph; } - // get any email hash as we go aint32_t + // get any email hash as we go along if ( et ) { int64_t *eh = (int64_t *)et->getValue ( &si ); if ( eh && ( ehFinal1 ^ *eh ) ) ehFinal1 ^= *eh; @@ -12361,7 +12361,7 @@ int32_t Dates::isCompatible2 ( Section *s1 , Section *s2 , bool useXors ) { // . i guess we are compatible then... //if ( ! last1 ) return 1; - // get any phone number hash as we go aint32_t + // get any phone number hash as we go along if ( last1 && useXors ) { phFinal1 = last1->m_phoneXor; ehFinal1 = last1->m_emailXor; @@ -12383,12 +12383,12 @@ int32_t Dates::isCompatible2 ( Section *s1 , Section *s2 , bool useXors ) { // if si contains s1 then stop if ( si->contains ( s1 ) ) break; /* - // get any phone number hash as we go aint32_t + // get any phone number hash as we go along if ( pt ) { int64_t *ph = (int64_t *)pt->getValue ( &si ); if ( ph && ( phFinal2 ^ *ph ) ) phFinal2 ^= *ph; } - // get any email hash as we go aint32_t + // get any email hash as we go along if ( et ) { int64_t *eh = (int64_t *)et->getValue ( &si ); if ( eh && ( ehFinal2 ^ *eh ) ) ehFinal2 ^= *eh; @@ -12446,7 +12446,7 @@ int32_t Dates::isCompatible2 ( Section *s1 , Section *s2 , bool useXors ) { last2 = si; } - // get any phone number hash as we go aint32_t + // get any phone number hash as we go along if ( last2 && useXors ) { phFinal2 = last2->m_phoneXor; ehFinal2 = last2->m_emailXor; @@ -12723,7 +12723,7 @@ HashTableX *Dates::getSubfieldTable ( ) { //} } - // no int32_ter use bitnum, use a list of 32-bit hashes for the fields + // no longer use bitnum, use a list of 32-bit hashes for the fields // we contain. really just using sth (section to hash) table would // be nice. or better yet just make a buffer and store a ptr into // the section class that points into this buffer into a list of @@ -13544,7 +13544,7 @@ bool Dates::addRanges ( Words *words , bool allowOpenEndedRanges ) { if ( *p == ',' ) continue; /* // utf8 hyphen from unm.edu url - // no int32_ter needed since XmlDoc.cpp now + // no longer needed since XmlDoc.cpp now // converts all utf8 hyphens into ascii if ( p[0] == -30 && p[1] == -128 && @@ -26338,7 +26338,7 @@ int32_t Dates::calculateYearBasedOnDOW ( int32_t minYear, int32_t maxYear, Date // . 2. find the smallest monthdayyear interval that contains all dow points // . 3. set the recurring dowbits. i.e. if the dow is monday and every 2nd // monday is empty then zero out that dow bit. 1st/2nd/4rd/4th/5th/last -// . 4. find int32_test time_t interval that covers the necessary recurring dow +// . 4. find longest time_t interval that covers the necessary recurring dow // days without exception. store them all in an array. record a // min and max interval for each one. i.e. the min's endpoints are // the necessary dows. the max's endpoints are past those usually up diff --git a/Diff.cpp b/Diff.cpp index 7811578f2..2aeb22514 100644 --- a/Diff.cpp +++ b/Diff.cpp @@ -134,7 +134,7 @@ void printXmlDiff(Xml *xml1, Xml *xml2, DiffOpt *argOpt){ int32_t seq2 [4096]; int32_t seqLen[4096]; SafeBuf buf; - //int32_t numSeq = int32_testCommonSubsequence(seq1, seq2, seqLen,4096, + //int32_t numSeq = longestCommonSubsequence(seq1, seq2, seqLen,4096, // &xml1-> &xml2->; //printf("lcs length: %"INT32"\n", numSeq); @@ -648,7 +648,7 @@ int32_t lcsXml(int32_t *lcsBuf1, // xml1 indexes of nodes in lcs if (n2<0) n2 = xml2->getNumNodes(); - return int32_testCommonSubseq(lcsBuf1, lcsBuf2, lcsLenBuf,lcsBufLen, + return longestCommonSubseq(lcsBuf1, lcsBuf2, lcsLenBuf,lcsBufLen, (char*)nodes1, (char*)nodes2, opt, start1, start2, n1, n2,rlevel); @@ -678,7 +678,7 @@ int64_testCommonSubseq(int32_t *outbuf1, // out1 indexes of nodes in lcs if (opt->m_debug){ for (int32_t i=0;i b) u = b; else u = a-1; @@ -1089,7 +1089,7 @@ int32_t editPath(char *seq1, char *seq2, int32_t b = v1[IDX(VSIZE, k+1)]; int32_t x; - // select best path aint32_t diagonal k + // select best path along diagonal k if (k == -d || k != d && a < b) x = b; else x = a+1; @@ -1142,7 +1142,7 @@ int32_t editPath(char *seq1, char *seq2, int32_t b = v2[IDX(VSIZE, k+1)]; int32_t u; - // select best path aint32_t diagonal k + // select best path along diagonal k if (k == -d || k != d && a > b) u = b; else u = a-1; diff --git a/Diff.h b/Diff.h index a227fb142..62a04c6eb 100644 --- a/Diff.h +++ b/Diff.h @@ -32,7 +32,7 @@ class Diff{ void diffXmlFiles(char *file1, char *file2, DiffOpt *opt=NULL); void printXmlDiff(Xml *xml1, Xml *xml2, DiffOpt *opt=NULL); -// int32_test common subsequence of 2 xml objects +// longest common subsequence of 2 xml objects int32_t lcsXml(int32_t *lcsBuf1, // xml1 indexes of nodes in lcs int32_t *lcsBuf2, // xml2 indexes of nodes in lcs int32_t *lcsLenBuf, // number of consecutive nodes in each lcsBuf diff --git a/DiskPageCache.cpp b/DiskPageCache.cpp index f6c919d85..241a5e310 100644 --- a/DiskPageCache.cpp +++ b/DiskPageCache.cpp @@ -639,9 +639,9 @@ void DiskPageCache::addPage(int32_t vfd,int32_t pageNum,char *page,int32_t size, poff = m_tailOff; //char *p = getMemPtrFromOff ( poff ); excisePage ( poff ); - // . the file no int32_ter owns him + // . the file no longer owns him // . this is a int32_t ptr to &m_bufOffs[vfd][pageNum] - // . if that vfd no int32_ter exists it should have added all its + // . if that vfd no longer exists it should have added all its // pages to m_avail list //int32_t tmp = -1; int32_t *memOffPtr = NULL; @@ -1290,7 +1290,7 @@ bool DiskPageCache::needsMerge( ){ // 'ipcs -m' will show shared mem in linux void freeAllSharedMem ( int32_t max ) { - // free shared mem whose pid no int32_ter exists + // free shared mem whose pid no longer exists //struct shmid_ds buf; //shmctl ( 0 , SHM_STAT , &buf ); //int shmctl(int shmid, int cmd, struct shmid_ds *buf); diff --git a/Dns.cpp b/Dns.cpp index 617393c56..b6a910afa 100644 --- a/Dns.cpp +++ b/Dns.cpp @@ -1372,7 +1372,7 @@ void returnIp ( DnsState *ds , int32_t ip ) { // like www.castleburyinn.com then we don't wait for a 30 second // timeout 100 times in a row. bool cache = false; - // no int32_ter cache these! i think the spider should evenly sample + // no longer cache these! i think the spider should evenly sample // every other IP address before returning to the timed out IP address... // ideally. plus i added the google public dns 8.8.8.8 as a secondary // dns ip to fallback to in the case of timeouts i guess... so make diff --git a/Entities.cpp b/Entities.cpp index f59420234..c55dab63b 100644 --- a/Entities.cpp +++ b/Entities.cpp @@ -29,7 +29,7 @@ int32_t getEntity_a ( char *s , int32_t maxLen , uint32_t *c ) { // s[len]='\0'; // fprintf(stderr,"got entity %s \n",s); // s[len]=d; - // we don't have entities int32_ter than "¤" + // we don't have entities longer than "¤" if ( len > 10 ) return 0; // all entites are 3 or more chars (>) if ( len < 3 ) return 0; diff --git a/Errno.cpp b/Errno.cpp index f1e89eedf..1b63383bc 100644 --- a/Errno.cpp +++ b/Errno.cpp @@ -116,7 +116,7 @@ case ENOCOLLREC : return "No collection record"; case ESHUTTINGDOWN : return "Shutting down the server"; case EHOSTDEAD : return "Host is marked as dead"; case EBADFILE : return "File is bad"; -case ETOOEARLY : return "Need to wait int32_ter"; +case ETOOEARLY : return "Need to wait longer"; case EFILECLOSED : return "Read on closed file";//close on our thread case ELISTTOOBIG : return "List is too big"; case ECANCELLED : return "Transaction was cancelled"; diff --git a/Errno.h b/Errno.h index ca9aef7d5..d067dbe11 100644 --- a/Errno.h +++ b/Errno.h @@ -120,7 +120,7 @@ enum { ESHUTTINGDOWN , //shutting down the server EHOSTDEAD , // host is dead EBADFILE , //file is bad - ETOOEARLY , //need to wait int32_ter + ETOOEARLY , //need to wait longer EFILECLOSED , //read on closed file? ELISTTOOBIG , //Rdb::addList() calls this ECANCELLED , //transaction was cancelled diff --git a/Facebook.cpp b/Facebook.cpp index 10f835bdd..be3d826b0 100644 --- a/Facebook.cpp +++ b/Facebook.cpp @@ -1493,7 +1493,7 @@ static void doneProcessingWrapper ( void *state ) { int32_t err = g_msgfb.m_errno; // or inherit this. we might have forgotten to set m_errno if ( ! err && g_errno ) err = g_errno; - // no int32_ter in progress + // no longer in progress g_msgfb.m_inProgress = false; g_msgfb.reset(); // note it @@ -2819,7 +2819,7 @@ void Msgfb::queueLoop ( ) { int32_t err = m_errno; // or inherit this. we might have forgotten to set m_errno if ( ! err && g_errno ) err = g_errno; - // no int32_ter in progress + // no longer in progress m_inProgress = false; // this will purge fullreply reset(); @@ -2848,7 +2848,7 @@ void Msgfb::queueLoop ( ) { error: log("facebook: queue fbid %"INT64" had error: %s", m_fbId,mstrerror(g_errno)); - // no int32_ter in progress + // no longer in progress m_inProgress = false; // this will purge fullreply reset(); diff --git a/File.cpp b/File.cpp index 1e2ddd0f6..2b19fb7a6 100644 --- a/File.cpp +++ b/File.cpp @@ -162,7 +162,7 @@ bool File::open ( int flags , int permissions ) { // remember OUR virtual file descriptor for successive calls to // read/write/... m_vfd = i; - // we are not open at this point, but no int32_ter available at least + // we are not open at this point, but no longer available at least s_fds [ m_vfd ] = -1; // open for real, return true on success if ( getfd () >= 0 ) return true; diff --git a/File.h b/File.h index 68d7e7516..b86c794f5 100644 --- a/File.h +++ b/File.h @@ -47,7 +47,7 @@ class File { public: - // aint32_t the same lines as getCloseCount_r() + // along the same lines as getCloseCount_r() //void incCloseCount_r ( ) ; File ( ); diff --git a/HashTableX.cpp b/HashTableX.cpp index afd04d9fe..3d8282c47 100644 --- a/HashTableX.cpp +++ b/HashTableX.cpp @@ -263,7 +263,7 @@ bool HashTableX::addKey ( void *key , void *val , int32_t *slot ) { if ( val ) setValue ( n , val ); // caller sometimes wants this if ( slot ) *slot = n; - // no int32_ter empty + // no longer empty m_flags[n] = 0x01; return true; } diff --git a/Hostdb.h b/Hostdb.h index 530a7fdf4..5a441f173 100644 --- a/Hostdb.h +++ b/Hostdb.h @@ -219,7 +219,7 @@ class Host { // . its redundant twins are always in different sets //int32_t m_group; // was host in gk0 cluster and retired because its twin got - // ssds, so it was no int32_ter really needed. + // ssds, so it was no longer really needed. bool m_retired; // used for logging when a host goes dead for the first time bool m_wasAlive; diff --git a/HttpRequest.cpp b/HttpRequest.cpp index 5f304005e..2812006be 100644 --- a/HttpRequest.cpp +++ b/HttpRequest.cpp @@ -482,7 +482,7 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) { // take this out until it stops losing descriptors and works //m_requestType = RT_CONNECT; //cmdLen = 7; - // we no int32_ter insert section info. emmanuel gets section + // we no longer insert section info. emmanuel gets section // info when injecting a doc now i think in PageInject.cpp. // we do not proxy https requests because we can't // decrypt the page contents to cache them or to insert diff --git a/HttpServer.cpp b/HttpServer.cpp index e37a7d7fa..8718c70bd 100644 --- a/HttpServer.cpp +++ b/HttpServer.cpp @@ -90,7 +90,7 @@ bool HttpServer::init ( int16_t port, &g_conf.m_httpsMaxSockets, true ) ) { // this was required for communicating with an email alert - // web server, but no int32_ter do i use them + // web server, but no longer do i use them //return false; // don't break, just log and don't do SSL log ( "https: SSL Server Failed To Init, Continuing..." ); @@ -1477,7 +1477,7 @@ bool HttpServer::sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin) { // UNLESS s->m_waitingonHandler was true, which should not be the // case, as it is only set to true in TcpServer::readSocketWrapper() // which should never be called by TcpServer::sendMsg() above. - // so let cleanUp know it is no int32_ter valid + // so let cleanUp know it is no longer valid if ( ! f->isOpen() ) f->open( O_RDONLY ); int fd = f->getfd(); cleanUp ( f , NULL/*TcpSocket */ ); @@ -2824,7 +2824,7 @@ TcpSocket *HttpServer::unzipReply(TcpSocket* s) { // have to modify the encoding and content length as we go. // Basically we are unzipping the http reply into a new buffer here, // so we need to rewrite the Content-Length: and the - // Content-Encoding: http mime field values so they are no int32_ter + // Content-Encoding: http mime field values so they are no longer // "gzip" and use the uncompressed content-length. char *ptr1 = NULL; char *ptr2 = NULL; diff --git a/IndexTable.cpp b/IndexTable.cpp index 7e4dec7ee..4cde4f328 100644 --- a/IndexTable.cpp +++ b/IndexTable.cpp @@ -1600,7 +1600,7 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] , explicitBits[nn] |= ebits; // . else if we are using hardCounts for *many* query terms... // . terms not explicitly required will still have ebits > 0 - // in order to support boolean expressions aint32_t side of + // in order to support boolean expressions along side of // hard required terms // . non-required phrases can all share the same ebit when we // have a lot of query terms, so they will not be @@ -2751,7 +2751,7 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] , m_finalNumTopDocIds = nf; // . often the exact count is bigger than the docids we got - // . adjust it down so it's no int32_ter bigger in case we had dup docids + // . adjust it down so it's no longer bigger in case we had dup docids m_finalNumExactExplicitMatches=m_numExactExplicitMatches[m_numTiers-1]; // store implict matches too so we can put the gray bar separating diff --git a/IndexTable2.cpp b/IndexTable2.cpp index a7d9d2c56..2d9c493d8 100644 --- a/IndexTable2.cpp +++ b/IndexTable2.cpp @@ -1073,7 +1073,7 @@ bool IndexTable2::alloc ( ) { // . alloc space for top tree // . returns false and sets g_errno on error // . increase docs to get if doing site clustering - // . this is no int32_ter done in Msg40.cpp for FULL SPLITS, we do it here + // . this is no longer done in Msg40.cpp for FULL SPLITS, we do it here // . the top tree is only needed if m_recopute is true now!! //if ( ! allocTopTree() ) return false; @@ -1200,7 +1200,7 @@ bool IndexTable2::alloc ( ) { // . but now we do need to store a lot of docids (NORAT_TMP_SLOTS) // in order to do our phrase and synonym affinity calculations // . one we determine those affinities we can then determine scores - // for docids as we get them, and we no int32_ter need to store all + // for docids as we get them, and we no longer need to store all // of the result docids into m_tmpDocIdPtrs2[], HOWEVER, we do need // to store the top-scoring docids (usually 100 or so) in the // m_topDocIds[] array, which is easy and efficient. @@ -1731,7 +1731,7 @@ void IndexTable2::addLists_r ( int32_t *totalListSizes , float sortByDateWeight // . follow up calls // . intersect one block at a time into the "active intersection" // . the "active intersection" is stored in the m_tmpDocIdPtrs2[] - // array and is hashed into the hash table aint32_t with the lists + // array and is hashed into the hash table along with the lists // in block #i. // . this is the rat=1 algo that is the reason why it is much faster // than rat=0, because the intersection is ever-shrinking requiring @@ -2748,7 +2748,7 @@ void IndexTable2::addLists2_r ( int32_t numListsToDo , explicitBits[nn] |= ebits; // . else if we are using hardCounts for *many* query terms... // . terms not explicitly required will still have ebits > 0 - // in order to support boolean expressions aint32_t side of + // in order to support boolean expressions along side of // hard required terms // . non-required phrases can all share the same ebit when we // have a lot of query terms, so they will not be @@ -3929,7 +3929,7 @@ bool IndexTable2::eventHashLoop ( int32_t *listIndexes , explicitBits[nn] |= ebits; // . else if we are using hardCounts for *many* query terms... // . terms not explicitly required will still have ebits > 0 - // in order to support boolean expressions aint32_t side of + // in order to support boolean expressions along side of // hard required terms // . non-required phrases can all share the same ebit when we // have a lot of query terms, so they will not be @@ -5328,7 +5328,7 @@ int32_t IndexTable2::fillTopDocIds ( //char **topp , goto loop1; } // is it in progress (and showing in progress is disabled), - // over or no int32_ter in timedb? + // over or no longer in timedb? if ( score == 0 ) { i++; goto loop1; diff --git a/Language.cpp b/Language.cpp index 1544461c7..0a01d2766 100644 --- a/Language.cpp +++ b/Language.cpp @@ -2662,9 +2662,9 @@ bool Language::generateDicts ( int32_t numWordsToDump , char *coll ) { for ( int32_t i = 0 ; i < NUM_CHARS ; i++ ) { char tmp[1024]; // . sort should treat all lower chars as upper - // . sort in reverse order so int32_ter fragments are on top + // . sort in reverse order so longer fragments are on top // of their int16_ter sub fragments so if they have the - // same score in the end, we'll keep the int32_ter fragment + // same score in the end, we'll keep the longer fragment sprintf(tmp,"sort -f -r %stmp/%s/%s.words.%"INT32" > " "%stmp/%s/%s.words.%"INT32".sorted", g_hostdb.m_dir, getLanguageAbbr(m_lang), @@ -2975,7 +2975,7 @@ tr.set ( rec , recSize , false ) ; // owndata? // advance it tmpp += n; - // no int32_ter convert to utf8, cuz title rec + // no longer convert to utf8, cuz title rec // is now already in utf8 by default!! //tmpp += latin1ToUtf8( tmpp, // tmpend - tmpp, @@ -3755,7 +3755,7 @@ bool Language::makeQueryFiles ( ) { } bool inQuotes = qw->m_inQuotes; char fieldCode = qw->m_fieldCode; - // . get int32_test continual fragment that + // . get longest continual fragment that // . starts with word #i. get the following // words that can be in a fragment // that starts with word #i start of the frag diff --git a/LanguageIdentifier.cpp b/LanguageIdentifier.cpp index 2ac07f776..70e8514ea 100644 --- a/LanguageIdentifier.cpp +++ b/LanguageIdentifier.cpp @@ -892,7 +892,7 @@ uint8_t LanguageIdentifier::guessLanguageFreqCount(Xml *xml, if(max == 0) maxidx = 0; #if 0 - // English, British, and Australian are no int32_ter separate + // English, British, and Australian are no longer separate // If it's a toss up between any version of English, go with it. if((max == langEnglish || max == langAustralia || max == langBritish) && (oldmax == langEnglish || oldmax == langAustralia || oldmax == langBritish)) diff --git a/Linkdb.cpp b/Linkdb.cpp index 26f4be09a..ff3dcf5fe 100644 --- a/Linkdb.cpp +++ b/Linkdb.cpp @@ -1531,7 +1531,7 @@ bool Msg25::sendRequests ( ) { //if ( ! m_docIdTable.addKey ( &dh ) ) // return true; - // if it is no int32_ter there, just ignore + // if it is no longer there, just ignore if ( lostDate ) { m_lostLinks++; continue; @@ -1962,7 +1962,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) { // we often restrict link: termlist lookup to indexdb root // file, so we end up including terms from deleted docs... // this we get a lot of ENOTFOUND errors. - // MDW: we no int32_ter do this restriction... + // MDW: we no longer do this restriction... log(LOG_DEBUG, "build: Got error getting link text from one document: " "%s. Will have to restart later. docid=%"INT64".", @@ -2001,7 +2001,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) { // is the inlinker banned? if ( r && good && r->m_isBanned ) { - // it is no int32_ter good + // it is no longer good good = false; // inc the general count, too m_spamLinks++; @@ -2051,7 +2051,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) { // banned by way of ad id? if (r && good&& r->m_adIdHash&&m_adBanTable.getSlot(&r->m_adIdHash)>0){ - // it is no int32_ter good + // it is no longer good good = false; // inc the general count, too m_spamLinks++; @@ -2088,7 +2088,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) { if ( r && good && ! internal && r->m_isLinkSpam && // we can no allow link spam iff it is below the max! ++m_spamCount >= m_maxSpam ) { - // it is no int32_ter good + // it is no longer good good = false; // inc the general count, too m_spamLinks++; @@ -4001,7 +4001,7 @@ LinkInfo *makeLinkInfo ( char *coll , // note it if recycled if ( k.m_recycled ) logf(LOG_DEBUG,"build: recycling Inlink %s for linkee " - "%"INT64"", k.ptr_urlBuf,linkeeDocId); + "%"INT64"", k.getUrl(),linkeeDocId); // advance p += wrote; } @@ -4047,12 +4047,13 @@ Inlink *LinkInfo::getNextInlink ( Inlink *k ) { p->m_numStrings = 4; p->m_firstStrPtrOffset = 64; } + // MDW: now we just use offsets for 64bit conversion so no ptrs... // if latest, return that - if ( p->m_numStrings == p->getBaseNumStrings() && - p->m_firstStrPtrOffset == (char *)&p->ptr_urlBuf - (char *)p ) { - p->updateStringPtrs(NULL); - return p; - } + //if ( p->m_numStrings == p->getBaseNumStrings() && + // p->m_firstStrPtrOffset == (char *)&p->off_urlBuf - (char *)p ) { + // p->updateStringPtrs(NULL); + // return p; + //} // otherwise, set s_inlink to it s_inlink.set2 ( (Inlink *)p ); // preserve p though for next call @@ -4106,7 +4107,7 @@ bool Inlink::setXmlFromRSS ( Xml *xml , int32_t niceness ) { // compute the length (excludes the \0's) int32_t len = size_rssItem - 1; // return false and set g_errno if this fails - return xml->set ( ptr_rssItem , + return xml->set ( getRSSItem(),//ptr_rssItem , len , false , // own data? 0 , // allocSize @@ -4338,7 +4339,7 @@ void Inlink::set ( Msg20Reply *r ) { // . our current version has 5 strings m_numStrings = getBaseNumStrings(); // and our current string offset - m_firstStrPtrOffset = (char *)getFirstStrPtr() - (char *)this; + m_firstStrPtrOffset = (char *)getFirstOffPtr() - (char *)this; // set ourselves now m_ip = r->m_ip; @@ -4379,6 +4380,47 @@ void Inlink::set ( Msg20Reply *r ) { m_hopcount = r->m_hopcount; //m_linkTextScoreWeight = r->m_linkTextScoreWeight; + // MDW: use a new way. construct m_buf. 64-bit stuff. + int32_t poff = 0; + char *p = m_buf; + + off_urlBuf = poff; + memcpy ( p , r->ptr_ubuf , r->size_ubuf ); + poff += r->size_ubuf; + p += r->size_ubuf; + + off_linkText = poff; + memcpy ( p , r->ptr_linkText , r->size_linkText ); + poff += r->size_linkText; + p += r->size_linkText; + + off_surroundingText = poff; + memcpy ( p , r->ptr_surroundingText , r->size_surroundingText ); + poff += r->size_surroundingText; + p += r->size_surroundingText; + + off_rssItem = poff; + memcpy ( p , r->ptr_rssItem , r->size_rssItem ); + poff += r->size_rssItem; + p += r->size_rssItem; + + off_categories = poff; + memcpy ( p , r->ptr_categories , r->size_categories ); + poff += r->size_categories; + p += r->size_categories; + + off_gigabitQuery = poff; + memcpy ( p , r->ptr_gigabitQuery , r->size_gigabitQuery ); + poff += r->size_gigabitQuery; + p += r->size_gigabitQuery; + + off_templateVector = poff; + memcpy ( p , r->ptr_templateVector , r->size_templateVector ); + poff += r->size_templateVector; + p += r->size_templateVector; + + /* + MDW: take this out for 64 bit offset-only conversion ptr_urlBuf = r->ptr_ubuf; ptr_linkText = r->ptr_linkText; ptr_surroundingText = r->ptr_surroundingText; @@ -4386,6 +4428,7 @@ void Inlink::set ( Msg20Reply *r ) { ptr_categories = r->ptr_categories; ptr_gigabitQuery = r->ptr_gigabitQuery; ptr_templateVector = r->ptr_templateVector; + */ size_urlBuf = r->size_ubuf; size_linkText = r->size_linkText; @@ -4397,7 +4440,7 @@ void Inlink::set ( Msg20Reply *r ) { } // Msg25 calls this to make a "fake" msg20 reply for recycling Inlinks -// that are no int32_ter there... preserves rssInfo, etc. +// that are no longer there... preserves rssInfo, etc. void Inlink::setMsg20Reply ( Msg20Reply *r ) { r->m_ip = m_ip; @@ -4432,13 +4475,13 @@ void Inlink::setMsg20Reply ( Msg20Reply *r ) { r->m_hopcount = m_hopcount; //r->m_linkTextScoreWeight = m_linkTextScoreWeight; - r->ptr_ubuf = ptr_urlBuf; - r->ptr_linkText = ptr_linkText; - r->ptr_surroundingText = ptr_surroundingText; - r->ptr_rssItem = ptr_rssItem; - r->ptr_categories = ptr_categories; - r->ptr_gigabitQuery = ptr_gigabitQuery; - r->ptr_templateVector = ptr_templateVector; + r->ptr_ubuf = getUrl();//ptr_urlBuf; + r->ptr_linkText = getLinkText();//ptr_linkText; + r->ptr_surroundingText = getSurroundingText();//ptr_surroundingText; + r->ptr_rssItem = getRSSItem();//ptr_rssItem; + r->ptr_categories = getCategories();//ptr_categories; + r->ptr_gigabitQuery = getGigabitQuery();//ptr_gigabitQuery; + r->ptr_templateVector = getTemplateVector();//ptr_templateVector; r->size_ubuf = size_urlBuf; r->size_linkText = size_linkText; @@ -4450,6 +4493,9 @@ void Inlink::setMsg20Reply ( Msg20Reply *r ) { } // convert offsets back into ptrs +// MDW: no, now they are always offsets since we are 64bits +// this was kinda like Inlink::deserialize() +/* int32_t Inlink::updateStringPtrs ( char *buf ) { // point to our string buffer char *p = buf; @@ -4475,6 +4521,7 @@ int32_t Inlink::updateStringPtrs ( char *buf ) { // return how many bytes we processed return getBaseSize() + (p - getStringBuf()); } +*/ void Inlink::reset ( ) { // clear ourselves out @@ -4487,6 +4534,18 @@ void Inlink::set2 ( Inlink *old ) { // clear ouselves reset(); // copy what is legit to us + int fullSize = sizeof(Inlink); + // add in the sizes of all strings + int32_t *sizePtr = getFirstSizeParm(); // &size_qbuf; + int32_t *sizeEnd = getLastSizeParm (); // &size_displayMetas; + for ( ; sizePtr <= sizeEnd ; sizePtr++ ) + fullSize += *sizePtr; + // return how many bytes we processed + memcpy ( (char *)this , (char *)old , fullSize ); + return; + + // this old way is pre-64bit + /* memcpy ( (char *)this , (char *)old , old->m_firstStrPtrOffset ); // set our offset to the string ptrs m_firstStrPtrOffset = (char *)&ptr_urlBuf - (char *)this; @@ -4512,19 +4571,20 @@ void Inlink::set2 ( Inlink *old ) { // we can't do this sanity check because we cast "old" as an Inlink // whereas before it was an older version of "Inlink" //if ( old->size_urlBuf != size_urlBuf ) { char *xx=NULL;*xx=0; } + */ } int32_t Inlink::getStoredSize ( ) { //int32_t size = (int32_t)sizeof(Msg); //int32_t size = getBaseSize(); int32_t size = m_firstStrPtrOffset; - // add in string AND size ptrs + // add in string offsets AND size ptrs size += 8 * m_numStrings; // add up string buffer sizes //int32_t *sizePtr = getFirstSizeParm(); // &size_qbuf; //int32_t *sizeEnd = getLastSizeParm (); // &size_displayMetas; int32_t *sizePtr = - (int32_t *)((char *)this + m_firstStrPtrOffset + 4*m_numStrings); + (int32_t *)((char *)this + m_firstStrPtrOffset+4*m_numStrings); int32_t *sizeEnd = sizePtr + m_numStrings; for ( ; sizePtr < sizeEnd ; sizePtr++ ) size += *sizePtr; @@ -4550,32 +4610,38 @@ char *Inlink::serialize ( int32_t *retSize , *retSize = need; // copy the easy stuff char *p = buf; + char *pend = buf + need; memcpy ( p , (char *)this , getBaseSize() ); p += getBaseSize(); // then store the strings! int32_t *sizePtr = getFirstSizeParm(); // &size_qbuf; int32_t *sizeEnd = getLastSizeParm (); // &size_displayMetas; - char **strPtr = getFirstStrPtr (); // &ptr_qbuf; + int32_t *offPtr = getFirstOffPtr (); // &ptr_qbuf; for ( ; sizePtr <= sizeEnd ; ) { + if ( p > pend ) { char *xx=NULL;*xx=0; } // if we are NULL, we are a "bookmark", so // we alloc'd space for it, but don't copy into // the space until after this call toe serialize() - if ( ! *strPtr ) goto skip; + // MDW: we can't use NULL now because we are offsets and 0 is + // legit. because of the 64bit conversion. + // well if empty, *sizePtr will be 0... so we don't need this. + //if ( *offPtr == -1 ) goto skip; // sanity check -- cannot copy onto ourselves - if ( p > *strPtr && p < *strPtr + *sizePtr ) { + if ( p > m_buf+*offPtr && p < m_buf+*offPtr + *sizePtr ) { char *xx = NULL; *xx = 0; } // copy the string into the buffer - memcpy ( p , *strPtr , *sizePtr ); - skip: + memcpy ( p , m_buf + *offPtr , *sizePtr ); + //skip: // . make it point into the buffer now // . MDW: why? that is causing problems for the re-call in // Msg3a, it calls this twice with the same "m_r" - if ( makePtrsRefNewBuf ) *strPtr = p; + // . MDW: took out for 64bit + //if ( makePtrsRefNewBuf ) *offPtr = (p-buf); // advance our destination ptr p += *sizePtr; // advance both ptrs to next string sizePtr++; - strPtr++; + offPtr++; } return buf; } @@ -4589,15 +4655,15 @@ bool LinkInfo::print ( SafeBuf *sb , char *coll ) { int32_t count = 1; // loop through the link texts for ( Inlink *k = NULL; (k = getNextInlink(k)) ; count++ ) { - char *s = k->ptr_linkText; + char *s = k->getLinkText();//ptr_linkText; int32_t slen = k->size_linkText - 1; - char *d = k->ptr_surroundingText; + char *d = k->getSurroundingText();//ptr_surroundingText; int32_t dlen = k->size_surroundingText - 1; - char *r = k->ptr_rssItem; + char *r = k->getRSSItem();//ptr_rssItem; int32_t rlen = k->size_rssItem - 1; - char *g = k->ptr_gigabitQuery; + char *g = k->getGigabitQuery();//ptr_gigabitQuery; int32_t glen = k->size_gigabitQuery - 1; - char *c = k->ptr_categories; + char *c = k->getCategories();//ptr_categories; int32_t clen = k->size_categories - 1; if ( slen < 0 ) slen = 0; if ( dlen < 0 ) dlen = 0; @@ -4656,7 +4722,7 @@ bool LinkInfo::print ( SafeBuf *sb , char *coll ) { //(int32_t)k->m_sitePop, (int32_t)k->m_siteNumInlinks, //(int32_t)k->m_isAnomaly, - k->ptr_urlBuf, // the linker url + k->getUrl(),//ptr_urlBuf, // the linker url s, // buf, d, // buf2, buf3b, @@ -4996,7 +5062,7 @@ bool Links::set ( bool useRelNoFollow , static bool s_flag = 1; if ( s_flag ) { s_flag = 0; - log(LOG_INFO, "build: Link len %"INT32" is int32_ter " + log(LOG_INFO, "build: Link len %"INT32" is longer " "than max of %"INT32". Link will not " "be added to spider queue or " "indexed for link: search.", @@ -6197,7 +6263,7 @@ bool Links::hash ( TermTable *table, version );// used for new session id stripping QUICKPOLL(niceness); // . the score depends on some factors: - // . NOTE: these are no int32_ter valid! (see score bitmap above) + // . NOTE: these are no longer valid! (see score bitmap above) // . 4 --> if link has different domain AND has link text // . 3 --> if link has same domain AND has link text // . 2 --> if link has different domain AND no link text diff --git a/Linkdb.h b/Linkdb.h index 10903157f..4295f1b25 100644 --- a/Linkdb.h +++ b/Linkdb.h @@ -833,12 +833,12 @@ class Inlink { // : public Msg { int32_t *getFirstSizeParm () { return &size_urlBuf; }; int32_t *getLastSizeParm () { return &size_rssItem; }; - char **getFirstStrPtr () { return &ptr_urlBuf; }; + int32_t *getFirstOffPtr () { return &off_urlBuf; }; int32_t getBaseSize () { return sizeof(Inlink);}; char *getStringBuf () { return m_buf; }; int32_t getBaseNumStrings() { - return (char **)&size_urlBuf - (char **)&ptr_urlBuf; }; + return (char **)&size_urlBuf - (char **)&off_urlBuf; }; // zero ourselves out void reset() ; @@ -865,7 +865,7 @@ class Inlink { // : public Msg { int32_t userBufSize , bool makePtrsRefNewBuf ) ; - int32_t updateStringPtrs ( char *buf ); + //int32_t updateStringPtrs ( char *buf ); // returns a ptr into a static buffer char *getLinkTextAsUtf8 ( int32_t *len = NULL ) ; @@ -949,6 +949,36 @@ class Inlink { // : public Msg { char m_hopcount ; char m_linkTextScoreWeight ; // 0-100% (was m_inlinkWeight) + char *getUrl ( ) { + if ( size_urlBuf == 0 ) return NULL; + return m_buf + off_urlBuf; + }; + char *getLinkText ( ) { + if ( size_linkText == 0 ) return NULL; + return m_buf + off_linkText; + }; + char *getSurroundingText ( ) { + if ( size_surroundingText == 0 ) return NULL; + return m_buf + off_surroundingText; + }; + char *getRSSItem ( ) { + if ( size_rssItem == 0 ) return NULL; + return m_buf + off_rssItem; + }; + char *getCategories ( ) { + if ( size_categories == 0 ) return NULL; + return m_buf + off_categories; + }; + char *getGigabitQuery ( ) { + if ( size_gigabitQuery == 0 ) return NULL; + return m_buf + off_gigabitQuery; + }; + char *getTemplateVector ( ) { + if ( size_templateVector == 0 ) return NULL; + return m_buf + off_templateVector; + }; + + // // add new non-strings right above this line // @@ -957,13 +987,13 @@ class Inlink { // : public Msg { // . no need to store vector for voting deduping in here because // that use MsgE's Msg20Replies directly // . this is just stuff we want in the title rec - char *ptr_urlBuf ; - char *ptr_linkText ; - char *ptr_surroundingText ; // neighborhoods + int32_t off_urlBuf ; + int32_t off_linkText ; + int32_t off_surroundingText ; // neighborhoods // . this is the rss item that links to us // . if calling Msg25::getLinkInfo() with getLinkerTitles set to // true then this is the title! - char *ptr_rssItem ; + int32_t off_rssItem ; // . zakbot and the turk categorize site roots, and kids inherit // the categories from their parent inlinkers // . we can't really use tagdb cuz that operates on subdirectories @@ -971,20 +1001,20 @@ class Inlink { // : public Msg { // stories are not proper subdirectories...) // . so inherit the category from our inlinkers. "sports", "world", ... // . comma-separated (in ascii) - char *ptr_categories ; + int32_t off_categories ; // . augments our own gigabits vector, used for finding related docs - // . used aint32_t with the template vector for deduping pgs at index time + // . used along with the template vector for deduping pgs at index time // . now we used for finding similar docs AND categorizing // . comma-separated // . each gigabit has a count in []'s. score in body x1, title x5, // and inlink text x5. i.e. "News[10],blue devils[5],... // . always in UTF-8 - char *ptr_gigabitQuery ; + int32_t off_gigabitQuery ; // . the html tag vector. // . used for deduping voters (anti-spam tech) - // . used aint32_t with the gigabit vector for deduping pgs at index time + // . used along with the gigabit vector for deduping pgs at index time // . now we used for finding similar docs and for categorizing (spam) - char *ptr_templateVector ; + int32_t off_templateVector ; // // add new strings right above this line diff --git a/Log.cpp b/Log.cpp index 6480d5fbd..f83ee153f 100644 --- a/Log.cpp +++ b/Log.cpp @@ -489,7 +489,7 @@ bool Log::logLater ( int64_t now, int32_t type, char *format, va_list ap ) { return false; } -// once we're no int32_ter in a sig handler this is called by Loop.cpp +// once we're no longer in a sig handler this is called by Loop.cpp // if g_log.needsPrinting() is true void Log::printBuf ( ) { // not in sig handler diff --git a/Loop.cpp b/Loop.cpp index 094298e42..5db8369d6 100644 --- a/Loop.cpp +++ b/Loop.cpp @@ -502,7 +502,7 @@ void Loop::callCallbacks_ass ( bool forReading , int fd , int64_t now , log(LOG_DEBUG,"loop: enter fd callback fd=%"INT32" " "nice=%"INT32"",(int32_t)fd,(int32_t)s->m_niceness); - // sanity check. -1 no int32_ter supported + // sanity check. -1 no longer supported if ( s->m_niceness < 0 ) { char *xx=NULL;*xx=0; } // save it @@ -650,7 +650,7 @@ void sigHandlerQueue_r ( int x , siginfo_t *info , void *v ) { // wtf is this? g_numSigOthers++; - // the stuff below should no int32_ter be used since we + // the stuff below should no longer be used since we // do not use F_SETSIG now return; @@ -1384,7 +1384,7 @@ bool Loop::runLoop ( ) { //logf(LOG_DEBUG,"loop: sigNum=%"INT32" signo=%"INT32" alrm=%"INT32"", // (int32_t)sigNum,info.si_signo,(int32_t)SIGVTALRM); - // no int32_ter in a wait state... + // no longer in a wait state... //g_inWaitState = false; @@ -1806,7 +1806,7 @@ void Loop::doPoll ( ) { else v.tv_usec = QUICKPOLL_INTERVAL * 1000; // set descriptors we should watch - // MDW: no int32_ter necessary since we have s_selectMaskRead, etc. + // MDW: no longer necessary since we have s_selectMaskRead, etc. // for ( int32_t i = 0 ; i < MAX_NUM_FDS ; i++ ) { // if ( m_readSlots [i] ) { // FD_SET ( i , &readfds ); diff --git a/Makefile b/Makefile index bd69edb49..153a87758 100644 --- a/Makefile +++ b/Makefile @@ -331,11 +331,11 @@ RdbMap.o: RdbBuckets.o: $(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp -Linkdb.o: - $(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp +#Linkdb.o: +# $(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp -XmlDoc.o: - $(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp +#XmlDoc.o: +# $(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp # final gigabit generation in here: Msg40.o: diff --git a/Matches.cpp b/Matches.cpp index e57783bf2..9f137533b 100644 --- a/Matches.cpp +++ b/Matches.cpp @@ -412,7 +412,7 @@ bool Matches::set ( XmlDoc *xd , mf_t flags = MF_LINK; //if ( k->m_isAnomaly ) flags = MF_ALINK; // add it in - if ( ! addMatches ( k->ptr_linkText , + if ( ! addMatches ( k->getLinkText() , k->size_linkText - 1 , flags , xd->m_docId , @@ -426,7 +426,7 @@ bool Matches::set ( XmlDoc *xd , flags = MF_HOOD; //if ( k->m_isAnomaly ) flags = MF_AHOOD; // add it in - if ( ! addMatches ( k->ptr_surroundingText , + if ( ! addMatches ( k->getSurroundingText() , k->size_surroundingText - 1 , flags , xd->m_docId , @@ -1143,7 +1143,7 @@ bool Matches::addMatches ( Words *words , qw->m_wordSign == '\0' && (nq && nq->m_wordId) && // no field names can follow //(qw->m_isQueryStopWord || qw->m_isStopWord ) ) { - // we no int32_ter consider single alnum chars to be + // we no longer consider single alnum chars to be // query stop words as stated in StopWords.cpp to fix // the query 'j. w. eagan' qw->m_isQueryStopWord ) { diff --git a/Mem.cpp b/Mem.cpp index be4fbe8ff..6bd162622 100644 --- a/Mem.cpp +++ b/Mem.cpp @@ -13,7 +13,7 @@ #include "Pages.h" // put me back -//#define EFENCE +#define EFENCE //#define EFENCE_SIZE 50000 // uncomment this for EFENCE to do underflow checks instead of the diff --git a/MemPoolTree.cpp b/MemPoolTree.cpp index d6e476355..07533ff9d 100644 --- a/MemPoolTree.cpp +++ b/MemPoolTree.cpp @@ -103,7 +103,7 @@ MemNode *MemPoolTree::getNextNode ( MemNode *i ) { if ( p->m_left == i ) return p; // otherwise keep getting the parent until it has a bigger key // or until we're the LEFT kid of the parent. that's better - // cuz comparing keys takes int32_ter. loop is 6 cycles per iteration. + // cuz comparing keys takes longer. loop is 6 cycles per iteration. while ( p && p->m_key < i->m_key ) p = p->m_parent; // p will be NULL if none are left return p; @@ -126,7 +126,7 @@ MemNode *MemPoolTree::getPrevNode ( MemNode *i ) { if ( p->m_right == i ) return p; // keep getting the parent until it has a bigger key // or until we're the RIGHT kid of the parent. that's better - // cuz comparing keys takes int32_ter. loop is 6 cycles per iteration. + // cuz comparing keys takes longer. loop is 6 cycles per iteration. while ( p && p->m_key > i->m_key ) p = p->m_parent; // p will be NULL if none are left return p; diff --git a/Msg0.cpp b/Msg0.cpp index 0bdc05814..660eb0b8e 100644 --- a/Msg0.cpp +++ b/Msg0.cpp @@ -72,7 +72,7 @@ void Msg0::reset ( ) { mfree(m_mcasts,sizeof(Multicast),"msg0mcast"); m_mcasts = NULL; } - // no int32_ter do this because we call reset after the msg5 completes + // no longer do this because we call reset after the msg5 completes // and it was destroying our handylist... so just call freelist // in the destructor now //m_handyList.freeList(); @@ -158,11 +158,11 @@ bool Msg0::getList ( int64_t hostId , // host to ask (-1 if none) // . reset hostid if it is dead // . this is causing UOR queries to take forever when we have a dead if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1; - // no int32_ter accept negative minrecsize + // no longer accept negative minrecsize if ( minRecSizes < 0 ) { g_errno = EBADENGINEER; log(LOG_LOGIC, - "net: msg0: Negative minRecSizes no int32_ter supported."); + "net: msg0: Negative minRecSizes no longer supported."); char *xx=NULL;*xx=0; return true; } @@ -1176,7 +1176,7 @@ void handleRequest0 ( UdpSlot *slot , int32_t netnice ) { log(LOG_LOGIC,"net: msg0: check but don't add... rdbid=%"INT32".", (int32_t)rdbId); // . if this request came over on the high priority udp server - // make sure the priority gets passed aint32_t + // make sure the priority gets passed along // . return if this blocks // . we'll call sendReply later if ( ! st0->m_msg5.getList ( rdbId , diff --git a/Msg13.cpp b/Msg13.cpp index bf279a185..8ff5424ea 100644 --- a/Msg13.cpp +++ b/Msg13.cpp @@ -1908,7 +1908,7 @@ void gotHttpReply2 ( void *state , // send reply us->sendReply_ass ( copy,replySize,copy,copyAllocSize, slot ); // now final udp slot will free the reply, so tcp server - // no int32_ter has to. set this tcp buf to null then. + // no longer has to. set this tcp buf to null then. if ( ts && ts->m_readBuf == reply && count == 0 ) ts->m_readBuf = NULL; } diff --git a/Msg17.cpp b/Msg17.cpp index 4740f5369..241c5b319 100644 --- a/Msg17.cpp +++ b/Msg17.cpp @@ -31,7 +31,7 @@ int32_t *g_genericCacheMaxAge[MAX_GENERIC_CACHES] = { //&s_oneMonth // seoresultscache //&g_conf.m_siteLinkInfoMaxCacheAge , // Msg50.cpp now has a dynamic max cache age which is higher - // for higher qualities, since those take int32_ter to recompute and + // for higher qualities, since those take longer to recompute and // are usually much more stable... //&s_noMax // &g_conf.m_siteQualityMaxCacheAge }; diff --git a/Msg2.cpp b/Msg2.cpp index 04ac26bd8..05dfe015e 100644 --- a/Msg2.cpp +++ b/Msg2.cpp @@ -314,7 +314,7 @@ bool Msg2::getLists ( ) { // do not allow it to be re-used since now posdb // calls Msg2::getListGroup() //m_slotNum [i] = i; - // that is no int32_ter the case!! we do a merge now... i + // that is no longer the case!! we do a merge now... i // think we decided it was easier to deal with shit n posdb.cpp // but i don't know how much this matters really //m_avail [i] = false; diff --git a/Msg20.cpp b/Msg20.cpp index d0d5c3a83..6fda9e3f6 100644 --- a/Msg20.cpp +++ b/Msg20.cpp @@ -274,7 +274,7 @@ void gotReplyWrapper20b ( void *state , UdpSlot *slot ) { void Msg20::gotReply ( UdpSlot *slot ) { // we got the reply m_gotReply = true; - // no int32_ter in progress, we got a reply + // no longer in progress, we got a reply m_inProgress = false; // sanity check if ( m_r ) { char *xx = NULL; *xx = 0; } diff --git a/Msg20.h b/Msg20.h index 6e22677f2..862fc8176 100644 --- a/Msg20.h +++ b/Msg20.h @@ -502,7 +502,7 @@ class Msg20Reply { // . if Msg20Request::m_forTurk is true then the ptr_turkForm will // be a little input form that lists every line in the title and - // description of the event aint32_t with controls that allow the turk + // description of the event along with controls that allow the turk // to turn descriptions on/off and pick different titles. // . when they submit their changes then it should basically add // the turk tag hashes of each line to tagdb, but only if changed diff --git a/Msg24.cpp b/Msg24.cpp index 078e21e49..50aaa27fa 100644 --- a/Msg24.cpp +++ b/Msg24.cpp @@ -1344,10 +1344,10 @@ bool getTopics ( State24 *st , // scan down to this score, but not below //int32_t minScore = (scores[i] * 75) / 100 ; int32_t minScore = scores[i] - 25; - // if we get replaced by a int32_ter guy, remember him + // if we get replaced by a longer guy, remember him int32_t replacerj = -1; - // . a int32_ter term than encapsulates us can eliminate us - // . or, if we're the int32_ter, we eliminate the int16_ter + // . a longer term than encapsulates us can eliminate us + // . or, if we're the longer, we eliminate the int16_ter for ( int32_t j = i + 1 ; j < np ; j++ ) { // skip if nuked already if ( lens[j] == 0 ) continue; @@ -1356,7 +1356,7 @@ bool getTopics ( State24 *st , char c2 = ptrs2[j][lens2[j]]; ptrs2[i][lens2[i]] = '\0'; ptrs2[j][lens2[j]] = '\0'; - // if we are the int16_ter, and int32_ter contains us + // if we are the int16_ter, and longer contains us // then it nukes us... unless his score is too low if ( lens2[i] < lens2[j] ) { // if int16_ter is contained @@ -1378,7 +1378,7 @@ bool getTopics ( State24 *st , // un-null term both ptrs2[i][lens2[i]] = c1; ptrs2[j][lens2[j]] = c2; - // even if he's int32_ter, if his score is too + // even if he's longer, if his score is too // low then he cannot nuke us if ( scores[j] < minScore ) continue; // if we were NOT contained by someone below... @@ -1392,7 +1392,7 @@ bool getTopics ( State24 *st , // see if we can nuke other guys at least continue; } - // . otherwise, we are the int32_ter + // . otherwise, we are the longer // . we can nuke any int16_ter below us, all scores char *s; if (isunis[i] == 0 && isunis[j] == 0) @@ -1426,7 +1426,7 @@ bool getTopics ( State24 *st , //rflag = 1; } - // if we got replaced by a int32_ter guy, he replaces us + // if we got replaced by a longer guy, he replaces us // and takes our score if ( replacerj >= 0 ) { ptrs [i] = ptrs [replacerj]; @@ -1451,11 +1451,11 @@ bool getTopics ( State24 *st , // (do not count common words) // . #3 or when all non-query, non-common terms match... pick the - // int32_ter and remove the common words, but keep query words. + // longer and remove the common words, but keep query words. // again2: //char rflag = 0; - // if two terms are close in score, and one is a int32_ter version + // if two terms are close in score, and one is a longer version // of the other, choose it and remove the int16_ter for ( int32_t i = 0 ; i < np - 1 ; i++ ) { // skip if nuked already @@ -1463,10 +1463,10 @@ bool getTopics ( State24 *st , // scan down to this score, but not below //int32_t minScore = (scores[i] * 75) / 100 ; int32_t minScore = scores[i] - 15; - // if we get replaced by a int32_ter guy, remember him + // if we get replaced by a longer guy, remember him int32_t replacerj = -1; - // . a int32_ter term than encapsulates us can eliminate us - // . or, if we're the int32_ter, we eliminate the int16_ter + // . a longer term than encapsulates us can eliminate us + // . or, if we're the longer, we eliminate the int16_ter for ( int32_t j = i + 1 ; j < np ; j++ ) { // skip if nuked already if ( lens[j] == 0 ) continue; @@ -1475,7 +1475,7 @@ bool getTopics ( State24 *st , char c2 = ptrs[j][lens[j]]; ptrs[i][lens[i]] = '\0'; ptrs[j][lens[j]] = '\0'; - // if we are the int16_ter, and int32_ter contains us + // if we are the int16_ter, and longer contains us // then it nukes us... unless his score is too low if ( lens[i] < lens[j] ) { // if int16_ter is contained @@ -1497,7 +1497,7 @@ bool getTopics ( State24 *st , // un-null term both ptrs[i][lens[i]] = c1; ptrs[j][lens[j]] = c2; - // even if he's int32_ter, if his score is too + // even if he's longer, if his score is too // low then he cannot nuke us if ( scores[j] < minScore ) continue; // if we were NOT contained by someone below... @@ -1512,7 +1512,7 @@ bool getTopics ( State24 *st , // see if we can nuke other guys at least continue; } - // . otherwise, we are the int32_ter + // . otherwise, we are the longer // . we can nuke any int16_ter below us, all scores char *s; if (isunis[i] == 0 && isunis[j] == 0) @@ -1549,7 +1549,7 @@ bool getTopics ( State24 *st , //rflag = 1; } - // if we got replaced by a int32_ter guy, he replaces us + // if we got replaced by a longer guy, he replaces us // and takes our score if ( replacerj >= 0 ) { ptrs [i] = ptrs [replacerj]; @@ -1624,8 +1624,8 @@ bool getTopics ( State24 *st , } } QUICKPOLL(niceness); - // now after int32_ter topics replaced the int16_ter topics which they - // contained, remove the int32_ter topics if they have too many words + // now after longer topics replaced the int16_ter topics which they + // contained, remove the longer topics if they have too many words // remove common phrases for ( int32_t i = 0 ; i < np ; i++ ) { // skip if nuked already @@ -2494,7 +2494,7 @@ void hashExcerpt ( Query *q , uint64_t *qids , int32_t *qpops, int32_t nqi, // skip if too far away from all query terms if ( score <= 0 ) continue; - // no int32_ter count closeness to query terms for score, + // no longer count closeness to query terms for score, // just use # times topic is in doc(s) and popularity //score = 1000; diff --git a/Msg3.cpp b/Msg3.cpp index e3df2211a..d8530c6e5 100644 --- a/Msg3.cpp +++ b/Msg3.cpp @@ -145,7 +145,7 @@ bool Msg3::readList ( char rdbId , // . store them all // . what if we merged one of these files (or are merging)??? // . then sync class should not discard syncpoints until no - // int32_ter syncing and we'll know about it + // longer syncing and we'll know about it // . this should compensate for merges by including any files // that are merging a file in m_fileNums m_numFileNums = g_sync.getFileNums ( m_rdbId , diff --git a/Msg39.cpp b/Msg39.cpp index f216e2094..72981f539 100644 --- a/Msg39.cpp +++ b/Msg39.cpp @@ -90,7 +90,7 @@ void sendReply ( UdpSlot *slot , Msg39 *msg39 , char *reply , int32_t replyLen , // sanity if ( hadError && ! g_errno ) { char *xx=NULL;*xx=0; } - // no int32_ter in use. msg39 will be NULL if ENOMEM or something + // no longer in use. msg39 will be NULL if ENOMEM or something if ( msg39 ) msg39->m_inUse = false; // . if we enter from a local call and not from handling a udp slot @@ -1100,7 +1100,7 @@ bool Msg39::addedLists ( ) { // . save some memory,free m_topDocIdPtrs2,m_topScores2,m_topExplicits2 // . the m_topTree should have been filled from the call to - // IndexTable2::fillTopDocIds() and it no int32_ter has ptrs to the + // IndexTable2::fillTopDocIds() and it no longer has ptrs to the // docIds, but has the docIds themselves //m_posdbTable.freeMem(); @@ -1293,7 +1293,7 @@ bool Msg39::gotClusterRecs ( ) { void Msg39::estimateHitsAndSendReply ( ) { - // no int32_ter in use + // no longer in use m_inUse = false; // now this for the query loop on the QueryLogEntries. diff --git a/Msg3a.cpp b/Msg3a.cpp index 581e4163d..8378faf36 100644 --- a/Msg3a.cpp +++ b/Msg3a.cpp @@ -433,7 +433,7 @@ bool Msg3a::gotCacheReply ( ) { ///////////////////////////// // . set timeout based on docids requested! - // . the more docs requested the int32_ter it will take to get + // . the more docs requested the longer it will take to get int32_t timeout = (50 * m_docsToGet) / 1000; // at least 20 seconds if ( timeout < 20 ) timeout = 20; diff --git a/Msg4.cpp b/Msg4.cpp index fe402b763..056f490ea 100644 --- a/Msg4.cpp +++ b/Msg4.cpp @@ -509,7 +509,7 @@ bool Msg4::addMetaList ( char *metaList , char rdbId , // Rebalance.cpp needs to add negative keys to // remove foreign records from where they no - // int32_ter beint32_t because of a new hosts.conf file. + // longer beint32_t because of a new hosts.conf file. // This will be -1 if not be overridden. int32_t shardOverride ) { @@ -1083,7 +1083,7 @@ void storeLineWaiters ( ) { // empty? make tail NULL too then if ( ! s_msg4Head ) s_msg4Tail = NULL; // . if his callback was NULL, then was loaded in loadAddsInProgress() - // . we no int32_ter do that so callback should never be null now + // . we no longer do that so callback should never be null now if ( ! msg4->m_callback ) { char *xx=NULL;*xx=0; } // log this now i guess. seems to happen a lot if not using threads if ( g_conf.m_useThreads ) diff --git a/Msg40.cpp b/Msg40.cpp index a9fa75f78..4cfb91d9c 100644 --- a/Msg40.cpp +++ b/Msg40.cpp @@ -278,7 +278,7 @@ bool Msg40::getResults ( SearchInput *si , m_docsToGetVisible = get; // if site clustering is on, get more than we should in anticipation // that some docIds will be clustered. - // MDW: we no int32_ter do this here for full splits because Msg39 does + // MDW: we no longer do this here for full splits because Msg39 does // clustering on its end now! //if ( m_si->m_doSiteClustering ) get = (get*150LL)/100LL; //if ( m_si->m_doSiteClustering && ! g_conf.m_fullSplit ) @@ -349,7 +349,7 @@ bool Msg40::getResults ( SearchInput *si , char *request = m_si->serializeForMsg40 ( &requestSize ); if ( ! request ) return true; // . set timeout based on docids requested! - // . the more docs requested the int32_ter it will take to get + // . the more docs requested the longer it will take to get // . use 50ms per docid requested int32_t timeout = (50 * m_docsToGet) / 1000; // always wait at least 20 seconds @@ -1823,11 +1823,11 @@ bool Msg40::gotSummary ( ) { if ( ! xx ) break; if ( ! xx->m_r ) break; } - // if not all have come back yet, wait int32_ter... + // if not all have come back yet, wait longer... if ( k < m_needFirstReplies ) break; // now make the csv header and print it printCSVHeaderRow ( sb ); - // and no int32_ter need to do this logic + // and no longer need to do this logic m_needFirstReplies = 0; } @@ -2595,7 +2595,7 @@ bool Msg40::gotSummary ( ) { m_docsToGet = get; // let's do it all from the top! return getDocIds ( true ) ; - // NOTE: we no int32_ter do msg3a re-calls for simplicity + // NOTE: we no longer do msg3a re-calls for simplicity // so all re-calling is done from right here only // MDW: hack it in msg3a too //m_msg3a.m_docsToGet = get; @@ -2864,7 +2864,7 @@ bool Msg40::gotSummary ( ) { else if ( v++ < m_si->m_firstResultNum ) skip = true; // . if skipping a valid msg20, give it a chance to destruct - // . no int32_ter do this because CR_SUMMARY_MERGED needs to keep + // . no longer do this because CR_SUMMARY_MERGED needs to keep // the msg20 reply around so PageResults.cpp can merge the // event descriptions //if ( skip && m_msg20[i] ) m_msg20[i]->destructor(); @@ -2992,8 +2992,8 @@ void Msg40::uncluster ( int32_t m ) { // UNHIDE IT m_msg3a.m_clusterLevels[k] = CR_OK; // we must UN-dedup anything after us because now that we are - // no int32_ter clustered, we could dedup a result below us, - // which deduped another result, which is now no int32_ter deduped + // no longer clustered, we could dedup a result below us, + // which deduped another result, which is now no longer deduped // because its deduped was this unclustered results dup! ;) for ( int32_t i = k+1 ; i < m_msg3a.m_numDocIds ; i++ ) { // get current cluster level @@ -3585,10 +3585,10 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { if ( gi->m_termLen == 0 ) continue; // scan down to this score, but not below //int32_t minScore = scores[i] - 25; - // if we get replaced by a int32_ter guy, remember him + // if we get replaced by a longer guy, remember him //int32_t replacerj = -1; - // . a int32_ter term than encapsulates us can eliminate us - // . or, if we're the int32_ter, we eliminate the int16_ter + // . a longer term than encapsulates us can eliminate us + // . or, if we're the longer, we eliminate the int16_ter for ( int32_t j = i + 1 ; j < numPtrs ; j++ ) { // get it Gigabit *gj = ptrs[j]; @@ -3600,11 +3600,11 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { // if page count not the same let it coexist if ( gi->m_numPages != gj->m_numPages ) continue; - // if we are the int16_ter, nuke the int32_ter guy + // if we are the int16_ter, nuke the longer guy // that contains us because we have a higher score // since ptrs are sorted by score then length. if ( gi->m_termLen < gj->m_termLen ) { - // just null term the int32_ter + // just null term the longer char c1 = gi->m_term[gi->m_termLen]; gi->m_term[gi->m_termLen] = '\0'; char c2 = gj->m_term[gj->m_termLen]; @@ -3612,10 +3612,10 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { // if int16_ter is contained char *s; s = gb_strcasestr (gj->m_term, gi->m_term); - // un-null term int32_ter + // un-null term longer gi->m_term[gi->m_termLen] = c1; gj->m_term[gj->m_termLen] = c2; - // even if he's int32_ter, if his score is too + // even if he's longer, if his score is too // low then he cannot nuke us // MDW: try doing page count! //if ( scores[j] < minScore ) continue; @@ -3654,12 +3654,12 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { } else { - // just null term the int32_ter + // just null term the longer char c1 = gi->m_term[gi->m_termLen]; gi->m_term[gi->m_termLen] = '\0'; char c2 = gj->m_term[gj->m_termLen]; gj->m_term[gj->m_termLen] = '\0'; - // . otherwise, we are the int32_ter + // . otherwise, we are the longer // . we can nuke any int16_ter below us, all // scores char *s; @@ -3695,7 +3695,7 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { } /* - // if we got replaced by a int32_ter guy, he replaces us + // if we got replaced by a longer guy, he replaces us // and takes our score if ( replacerj >= 0 ) { // gigabit #i is now gigabit #j @@ -3710,7 +3710,7 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { msg.safePrintf("\"[%.0f]",gj->m_gbscore); logf(LOG_DEBUG,msg.getBufStart()); - // make us int32_ter then! + // make us longer then! gi->m_termLen = gj->m_termLen; // and nuke him gj->m_termLen = 0; @@ -3745,8 +3745,8 @@ bool Msg40::computeGigabits( TopicGroup *tg ) { gi->m_termLen = 0; } - // now after int32_ter topics replaced the int16_ter topics which they - // contained, remove the int32_ter topics if they have too many words + // now after longer topics replaced the int16_ter topics which they + // contained, remove the longer topics if they have too many words // remove common phrases for ( int32_t i = 0 ; i < numPtrs ; i++ ) { // get it @@ -4527,7 +4527,7 @@ void hashExcerpt ( Query *q , // terms... wi->m_proxScore = proxScore; - // no int32_ter count closeness to query terms for score, + // no longer count closeness to query terms for score, // just use # times topic is in doc(s) and popularity //score = 1000; @@ -5249,7 +5249,8 @@ bool Msg40::computeFastFacts ( ) { // HashTableX gbitTable; char gbuf[30000]; - if ( ! gbitTable.set(8,4,1024,gbuf,30000,false,0,"gbtbl") ) + if ( ! gbitTable.set(8,sizeof(Gigabit *),1024,gbuf,30000, + false,0,"gbtbl") ) return false; int32_t numGigabits = m_gigabitBuf.length()/sizeof(Gigabit); Gigabit *gigabits = (Gigabit *)m_gigabitBuf.getBufStart(); @@ -5279,7 +5280,8 @@ bool Msg40::computeFastFacts ( ) { Query *q = &m_si->m_q; HashTableX queryTable; char qbuf[10000]; - if ( ! queryTable.set(8,4,512,qbuf,10000,false,0,"qrttbl") ) + if ( ! queryTable.set(8,sizeof(QueryTerm *),512,qbuf, + 10000,false,0,"qrttbl") ) return false; for ( int32_t i = 0 ; i < q->m_numTerms ; i++ ) { // int16_tcut diff --git a/Msg40.h b/Msg40.h index fe2d2458c..ea42c9fdf 100644 --- a/Msg40.h +++ b/Msg40.h @@ -102,7 +102,7 @@ class Msg40 { // . set m_numDocIds to number of docIds in m_docIds // . a useCache of -1 means default, 1 means use the cache,0 means dont // . "displayMetas" is a space separated list of meta tag names - // that you want the content for aint32_t with the summary + // that you want the content for along with the summary bool getResults ( class SearchInput *si , bool forward , void *state , diff --git a/Msg5.cpp b/Msg5.cpp index 7ab178270..e4fbef068 100644 --- a/Msg5.cpp +++ b/Msg5.cpp @@ -154,7 +154,7 @@ bool Msg5::getList ( char rdbId , g_errno = ENOCOLLREC; return true; } - // sanity check. we no int32_ter have record caches! + // sanity check. we no longer have record caches! // now we do again for posdb gbdocid:xxx| restricted queries //if ( addToCache || maxCacheAge ) {char *xx=NULL;*xx=0; } // assume no error @@ -180,7 +180,7 @@ bool Msg5::getList ( char rdbId , //if ( startKey > endKey ) return true; if ( KEYCMP((char *)startKey,(char *)endKey,m_ks)>0 ) return true; // log("Msg5::readList: startKey > endKey warning"); - // we no int32_ter allow negative minRecSizes + // we no longer allow negative minRecSizes if ( minRecSizes < 0 ) { log(LOG_LOGIC,"net: msg5: MinRecSizes < 0, using 1."); minRecSizes = 1; @@ -978,7 +978,7 @@ void gotListWrapper ( void *state ) { if ( THIS->m_calledCallback ) { char *xx=NULL;*xx=0; } // set it now THIS->m_calledCallback = 1; - // we are no int32_ter waiting for the list + // we are no longer waiting for the list THIS->m_waitingForList = false; // when completely done call the callback THIS->m_callback ( THIS->m_state , THIS->m_list , THIS ); @@ -995,7 +995,7 @@ static void *mergeListsWrapper_r ( void *state , ThreadEntry *t ) ; // . sets g_errno on error bool Msg5::gotList ( ) { - // we are no int32_ter waiting for the list + // we are no longer waiting for the list //m_waitingForList = false; // debug msg @@ -1265,7 +1265,7 @@ bool Msg5::gotList2 ( ) { // m_diskList.getListSize()); // ensure both lists are legit // there may be negative keys in the tree - // diskList may now also have negative recs since Msg3 no int32_ter + // diskList may now also have negative recs since Msg3 no longer // removes them for fears of delayed positive keys not finding their // negative key because it was merged out by RdbMerge for ( int32_t i = 0 ; i < m_numListPtrs ; i++ ) @@ -1522,7 +1522,7 @@ void threadDoneWrapper ( void *state , ThreadEntry *t ) { if ( THIS->needsRecall() && ! THIS->readList() ) return; // sanity check if ( THIS->m_calledCallback ) { char *xx=NULL;*xx=0; } - // we are no int32_ter waiting for the list + // we are no longer waiting for the list THIS->m_waitingForList = false; // set it now THIS->m_calledCallback = 3; @@ -2113,7 +2113,7 @@ void gotRemoteListWrapper( void *state ) { // , RdbList *list ) { if ( ! THIS->gotRemoteList() ) return; // sanity check if ( THIS->m_calledCallback ) { char *xx=NULL;*xx=0; } - // we are no int32_ter waiting for the list + // we are no longer waiting for the list THIS->m_waitingForList = false; // set it now THIS->m_calledCallback = 4; diff --git a/Msg51.cpp b/Msg51.cpp index c08787f58..bb1dcfef3 100644 --- a/Msg51.cpp +++ b/Msg51.cpp @@ -201,7 +201,7 @@ bool Msg51::sendRequests ( int32_t k ) { // sanity check if ( crecSize != sizeof(key_t) ) { char *xx = NULL; *xx = 0; } m_clusterRecs[m_nexti] = *(key_t *)crecPtr; - // it is no int32_ter CR_UNINIT, we got the rec now + // it is no longer CR_UNINIT, we got the rec now m_clusterLevels[m_nexti] = CR_GOT_REC; // debug msg //logf(LOG_DEBUG,"query: msg51 getRec k.n0=%"UINT64" rec.n0=%"UINT64"", diff --git a/Msge0.cpp b/Msge0.cpp index 4addf90e4..c908d07cb 100644 --- a/Msge0.cpp +++ b/Msge0.cpp @@ -74,8 +74,8 @@ bool Msge0::getTagRecs ( char **urlPtrs , // . include an extra 4 bytes for each one to hold possible errno int32_t need = 4 + // error - 4 + // tag ptr - 4 ; // slab ptr + sizeof(TagRec *) + // tag ptr + sizeof(char *) ; // slab ptr // one per url need *= numUrls; // allocate the buffer to hold all the info we gather @@ -87,8 +87,8 @@ bool Msge0::getTagRecs ( char **urlPtrs , // set the ptrs! char *p = m_buf; m_tagRecErrors = (int32_t *)p ; p += numUrls * 4; - m_tagRecPtrs = (TagRec **)p ; p += numUrls * 4; - m_slab = (char **)p ; p += numUrls * 4; + m_tagRecPtrs = (TagRec **)p ; p += numUrls * sizeof(TagRec *); + m_slab = (char **)p ; p += numUrls * sizeof(char *); // initialize m_numRequests = 0; m_numReplies = 0; diff --git a/Msge1.cpp b/Msge1.cpp index c98ecd092..880e58aeb 100644 --- a/Msge1.cpp +++ b/Msge1.cpp @@ -199,7 +199,7 @@ bool Msge1::launchRequests ( int32_t starti ) { if ( host && is_digit(host[0]) ) ip = atoip ( host , hlen ); // if legit this is non-zero if ( ip ) { - // what is this? i no int32_ter have this bug really - i fixed + // what is this? i no longer have this bug really - i fixed // it - but it did core here probably from a bad dns reply! // so take this out... //if ( ip == 3 ) { char *xx=NULL;*xx=0; } diff --git a/Multicast.cpp b/Multicast.cpp index cb6409c5c..bf7eebd7a 100644 --- a/Multicast.cpp +++ b/Multicast.cpp @@ -277,7 +277,7 @@ void Multicast::sendToGroup ( ) { // . timeout is just the time remaining for the whole groupcast // int32_t timeout = m_startTime + m_totalTimeout - getTime(); // . since we now must get non-error replies from ALL hosts - // in the group we no int32_ter have a "totalTimeout" per se + // in the group we no longer have a "totalTimeout" per se // reset the g_errno for host #i m_errnos [i] = 0; // if niceness is 0, use the higher priority udpServer @@ -975,7 +975,7 @@ bool Multicast::sendToHost ( int32_t i ) { log("net: Had error sending msgtype 0x%hhx to host " "#%"INT32": %s. Not retrying.", m_msgType,h->m_hostId,mstrerror(g_errno)); - // i've seen ENOUDPSLOTS available msg here aint32_t with oom + // i've seen ENOUDPSLOTS available msg here along with oom // condition... //char *xx=NULL;*xx=0; return false; @@ -1162,7 +1162,7 @@ void sleepWrapper1 ( int bogusfd , void *state ) { break; // these tagdb lookups are usually lickety split, should all be in mem case 0x08: if ( elapsed < 10 ) return; break; - // this no int32_ter exists! it uses msg0 + // this no longer exists! it uses msg0 //case 0x8a: if ( elapsed < 200 ) return; break; case 0x8b: if ( elapsed < 10 ) return; break; // don't relaunch anything else unless over 8 secs @@ -1315,7 +1315,7 @@ void Multicast::gotReply1 ( UdpSlot *slot ) { // set m_errnos[i], if any if ( g_errno ) m_errnos[i] = g_errno; - // mark it as no int32_ter in progress + // mark it as no longer in progress m_inProgress[i] = 0; // if he was marked as dead on the secondary cluster, mark him as up @@ -1416,7 +1416,7 @@ void Multicast::gotReply1 ( UdpSlot *slot ) { // or a notfound on the external/secondary cluster if ( g_errno == ENOTFOUND && m_hostdb == &g_hostdb2 ) sendToTwin = false; - // no int32_ter do this for titledb, too common since msg4 + // no longer do this for titledb, too common since msg4 // cached stuff can make us slightly out of sync //if ( g_errno == ENOTFOUND ) // sendToTwin = false; @@ -1583,7 +1583,7 @@ void Multicast::destroySlotsInProgress ( UdpSlot *slot ) { // . if we re-route then we span new msg34 requests, and if we get // back an original reply we need to take out those msg34 requests // because if they get a reply they may try to access a Multicast - // class that no int32_ter exists + // class that no longer exists //if ( m_doDiskLoadBalancing ) m_msg34.destroySlotsInProgress ( ); // do a loop over all hosts in the group for (int32_t i = 0 ; i < m_numHosts ; i++ ) { @@ -1610,7 +1610,7 @@ void Multicast::destroySlotsInProgress ( UdpSlot *slot ) { //int64_t lastSendTime = m_slots[i]->m_lastSendTime; //int64_t now = gettimeofdayInMilliseconds() ; //int64_t tripTime = now - lastSendTime; - // . we no int32_ter stamp hosts here, leave that up to + // . we no longer stamp hosts here, leave that up to // Hostdb::pingHost() // tripTime is always in milliseconds //m_hostdb->stampHost ( hostId , tripTime , true/*timedOut?*/); @@ -1625,7 +1625,7 @@ void Multicast::destroySlotsInProgress ( UdpSlot *slot ) { m_slots[i]->m_readBuf = NULL; // destroy this slot that's in progress us->destroySlot ( m_slots[i] ); - // do not re-destroy. consider no int32_ter in progress. + // do not re-destroy. consider no longer in progress. m_inProgress[i] = 0; } } diff --git a/PageBasic.cpp b/PageBasic.cpp index cddce2015..6c9b02682 100644 --- a/PageBasic.cpp +++ b/PageBasic.cpp @@ -121,7 +121,13 @@ bool updateSiteListBuf ( collnum_t collnum , char *op = cr->m_siteListBuf.getBufStart(); // scan and hash each line in it - for ( ; *op ; op++ ) { + for ( ; ; ) { + // done? + if ( ! *op ) break; + // skip spaces + if ( is_wspace_a(*op) ) op++; + // done? + if ( ! *op ) break; // get end char *s = op; // skip to end of line marker @@ -1560,7 +1566,8 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) { char tmp3[64]; struct tm *timeStruct; - timeStruct = localtime((time_t *)&cr->m_diffbotCrawlStartTime); + time_t tt = (time_t)cr->m_diffbotCrawlStartTime; + timeStruct = localtime(&tt); // Jan 01 1970 at 10:30:00 strftime ( tmp3,64 , "%b %d %Y at %H:%M:%S",timeStruct); sb.safePrintf("Collection Created" diff --git a/PageCrawlBot.cpp b/PageCrawlBot.cpp index 03a560da3..4db93f110 100644 --- a/PageCrawlBot.cpp +++ b/PageCrawlBot.cpp @@ -1810,7 +1810,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) { if ( msg ) { // delColl && cast ) { // this was deleted... so is invalid now name = NULL; - // no int32_ter a delete function, we need to set "name" below + // no longer a delete function, we need to set "name" below //delColl = false;//NULL; // john wants just a brief success reply SafeBuf tmp; @@ -2172,7 +2172,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) { spotsCopy[spotsLen] = '\0'; fprintf(f, "%s", spotsCopy); fclose(f); - mfree(spotsCopy, spotsLen+1, "no int32_ter need copy"); + mfree(spotsCopy, spotsLen+1, "no longer need copy"); } } diff --git a/PageEvents.cpp b/PageEvents.cpp index c5ac9dca2..7dcbfaffd 100644 --- a/PageEvents.cpp +++ b/PageEvents.cpp @@ -976,7 +976,7 @@ bool getResults ( void *state ) { // over the indexing process // . this will copy our passed "query" and "coll" to it's own buffer // . we print out matching docIds to int32_t if m_isDebug is true - // . no int32_ter forward this, since proxy will take care of evenly + // . no longer forward this, since proxy will take care of evenly // distributing its msg 0xfd "forward" requests now st->m_gotResults=st->m_msg40.getResults(si,false,st,gotResultsWrapper); // save error @@ -12144,7 +12144,7 @@ bool sendPageBack ( TcpSocket *s , // log that //if ( cookiePtr ) log("gb: set-cookie=%s",cookiePtr ); - // no int32_ter set cookie here, let the browser set its own cookies + // no longer set cookie here, let the browser set its own cookies // using client-side javascript now. see the above for that. // every button/control/etc should set a cookie to its value when it // is set. fo categories we might want to delete the cookie if it @@ -12223,7 +12223,7 @@ bool gotResults ( void *state ) { CollectionRec *cr = g_collectiondb.getRec ( coll , collLen ); if ( ! cr || cr != si->m_cr ) { log("query: Query failed. " - "Collection no int32_ter exists or was deleted and " + "Collection no longer exists or was deleted and " "recreated."); g_errno = ENOCOLLREC; // should always return true @@ -13725,7 +13725,7 @@ bool printEventTitle ( SafeBuf &sb , Msg20Reply *mr , State7 *st ) { if ( ! first && hadPunct && ! sb.pushChar(' ') ) goto failed2; - // no int32_ter first + // no longer first first = false; // get length str = ss->m_buf; @@ -13849,7 +13849,7 @@ bool printEventSummary ( SafeBuf &sb , Msg20Reply *mr , int32_t width , if ( lastWordPos >= 0 ) sb.pushChar(' '); - // no int32_ter first + // no longer first first = false; // save this lastWordPos = ss->m_alnumPosB; diff --git a/PageGet.cpp b/PageGet.cpp index 987e1da91..d09337970 100644 --- a/PageGet.cpp +++ b/PageGet.cpp @@ -554,7 +554,7 @@ bool processLoop ( void *state ) { uint16_t port = h->m_httpPort; //} //sprintf ( x , "http://%s:%"INT32"/get?q=" , iptoa ( ip ) , port ); - // . we no int32_ter put the port in here + // . we no longer put the port in here // . but still need http:// since we use if (port == 80) sprintf(x,"http://%s/get?q=",iptoa(ip)); else sprintf(x,"http://%s:%hu/get?q=",iptoa(ip),port); diff --git a/PageOverview.cpp b/PageOverview.cpp index fa6704347..bf1aa3c24 100644 --- a/PageOverview.cpp +++ b/PageOverview.cpp @@ -256,7 +256,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) { "

\n" "For performance reasons, most large search engines nowadays only return a rough estimate of the number of search results, but you may desire to get a better approximation or even an exact count. Gigablast allows you to do this, but it may be at the expense of query resonse time." "

\n" -"By using the t0 variable you can tell Gigablast to use a minimum number of docids from each termlist. Typically, t0 defaults to something of around 10,000 docids. Often more docids than that are used, but this is just the minimum. So if Gigablast is forced to use more docids it will take int32_ter to compute the search results on average, but it will give you a more precise hit count. By setting t0 to the truncation limit or higher you will max out the hit count precision." +"By using the t0 variable you can tell Gigablast to use a minimum number of docids from each termlist. Typically, t0 defaults to something of around 10,000 docids. Often more docids than that are used, but this is just the minimum. So if Gigablast is forced to use more docids it will take longer to compute the search results on average, but it will give you a more precise hit count. By setting t0 to the truncation limit or higher you will max out the hit count precision." "

\n" "Example: http://www.gigablast.com/search?q=test&t0=5000000\n" "" @@ -432,7 +432,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) { " # Description of this page as listed in the directory.\n" " <dmozDesc><![CDATA[A Dentist's Home Page]]></dmozDesc>\n" " </dmoz>\n" -" # If the directory is being given aint32_t with the results, this is the number of\n" +" # If the directory is being given along with the results, this is the number of\n" " # stars given to this page based on its quality.\n" " <stars>3</stars>\n" " # Each result may have a sequence of <display> tags if the feed input\n" @@ -725,7 +725,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) { (int32_t)MAX_SPIDERS); break; //case EURLTOOLONG : - //p += sprintf(p,"i - Injected URL was int32_ter than " + //p += sprintf(p,"i - Injected URL was longer than " // "%"INT32" characters.",(int32_t)MAX_URL_LEN); //break; case EBADREPLY: @@ -748,7 +748,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) { "reached its limit."); break; case EBUFTOOSMALL: - p += sprintf(p,"i - Injected URL was int32_ter than " + p += sprintf(p,"i - Injected URL was longer than " "%"INT32" characters. Or the injected " "document was too big to fit in memory, " "so consider increasing " @@ -1188,7 +1188,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) { "the last time each of these databases dumped to disk by looking at the " "timestamp on the corresponding files in the appropriate collection " "subdirectories contained in the working directory. If tfndb was " -"dumped to disk the int32_test time ago, then use its timestamp " +"dumped to disk the longest time ago, then use its timestamp " "to indicate when the last url was successfully added or injected. You might " "want to subtract thirty minutes from that timestamp to make sure because it " "is really the time that that file started being dumped to disk that " @@ -3065,7 +3065,7 @@ z 122 7a { 123 7b\ "# URL Filters pages.\n" "<description>This is the default ruleset used for most urls.</>\n" "\n" -"# If a ruleset is no int32_ter actively used, it is not deleted, but retired.\n" +"# If a ruleset is no longer actively used, it is not deleted, but retired.\n" "# Retired rulesets are not displayed to spam assassins on the Sitedb tool \n" "# and URL Filters pages.\n" "<retired>no</>\n" diff --git a/PageParser.cpp b/PageParser.cpp index 5cbb62cea..d5a02e792 100644 --- a/PageParser.cpp +++ b/PageParser.cpp @@ -159,7 +159,7 @@ bool sendPageParser2 ( TcpSocket *s , st->m_recycle = r->getLong("recycle",0); st->m_recycle2 = r->getLong("recycleimp",0); st->m_render = r->getLong("render" ,0); - // for quality computation... takes way int32_ter cuz we have to + // for quality computation... takes way longer cuz we have to // lookup the IP address of every outlink, so we can get its root // quality using Msg25 which needs to filter out voters from that IP // range. @@ -724,7 +724,7 @@ bool sendPageAnalyze ( TcpSocket *s , HttpRequest *r ) { st->m_recycle2 = r->getLong("recycleimp",0); st->m_render = r->getLong("render" ,0); st->m_recompute = r->getLong("recompute" ,0); - // for quality computation... takes way int32_ter cuz we have to + // for quality computation... takes way longer cuz we have to // lookup the IP address of every outlink, so we can get its root // quality using Msg25 which needs to filter out voters from that IP // range. diff --git a/PageResults.cpp b/PageResults.cpp index 0753cfc39..2781f2820 100644 --- a/PageResults.cpp +++ b/PageResults.cpp @@ -670,7 +670,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) { // over the indexing process // . this will copy our passed "query" and "coll" to it's own buffer // . we print out matching docIds to int32_t if m_isDebug is true - // . no int32_ter forward this, since proxy will take care of evenly + // . no longer forward this, since proxy will take care of evenly // distributing its msg 0xfd "forward" requests now st->m_gotResults=st->m_msg40.getResults(si,false,st,gotResultsWrapper); // save error @@ -3371,11 +3371,11 @@ bool printInlinkText ( SafeBuf *sb , Msg20Reply *mr , SearchInput *si , //int32_t absSum = 0; for ( int32_t i = 0 ; i < numLinks ; i++ ) { k = ptrs[i]; - if ( ! k->ptr_linkText ) continue; + if ( ! k->getLinkText() ) continue; if ( ! si->m_doQueryHighlighting && si->m_format == FORMAT_HTML ) continue; - char *str = k-> ptr_linkText; + char *str = k->getLinkText();//ptr_linkText; int32_t strLen = k->size_linkText; //char tt[1024*3]; //char *ttend = tt + 1024*3; @@ -3416,7 +3416,7 @@ bool printInlinkText ( SafeBuf *sb , Msg20Reply *mr , SearchInput *si , "url=\"", k->m_docId ); // encode it for xml - sb->htmlEncode ( k->ptr_urlBuf, + sb->htmlEncode ( k->getUrl(),//ptr_urlBuf, k->size_urlBuf - 1 , false ); sb->safePrintf("\" " //"hostId=\"%"UINT32"\" " @@ -3472,12 +3472,12 @@ bool printInlinkText ( SafeBuf *sb , Msg20Reply *mr , SearchInput *si , //"page=7&" //"c=%s&" //"d=%"INT64"\">" - //k->ptr_urlBuf); + //k->getUrl()); ,si->m_cr->m_coll ,k->m_docId); if ( ! sb->safeMemcpy(&hb) ) return false; int32_t hostLen = 0; - char *host = getHostFast(k->ptr_urlBuf,&hostLen,NULL); + char *host = getHostFast(k->getUrl(),&hostLen,NULL); sb->safePrintf(""); if ( host ) sb->safeMemcpy(host,hostLen); sb->safePrintf("%"INT32"",(int32_t)k->m_siteRank); @@ -4391,7 +4391,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) { //char *send; // do the normal summary str = mr->ptr_displaySum; - // sometimes the summary is int32_ter than requested because for + // sometimes the summary is longer than requested because for // summary deduping purposes (see "pss" parm in Parms.cpp) we do not // get it as int16_t as request. so use mr->m_sumPrintSize here // not mr->size_sum @@ -5518,7 +5518,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) { si->m_sameLangWeight);//SAMELANGMULT); // the actual min then sb->safePrintf(" * %.03f",minScore); - // no int32_ter list all the scores + // no longer list all the scores //sb->safeMemcpy ( &ft ); sb->safePrintf(//")" "]]>" @@ -5885,7 +5885,7 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps , LinkInfo *info = (LinkInfo *)mr->ptr_linkInfo;//inlinks; Inlink *k = info->getNextInlink(NULL); for (;k&&hg1==HASHGROUP_INLINKTEXT ; k=info->getNextInlink(k)){ - if ( ! k->ptr_linkText ) continue; + if ( ! k->getLinkText() ) continue; if ( k->m_wordPosStart > wp1 ) continue; if ( k->m_wordPosStart + 50 < wp1 ) continue; // got it. we HACKED this to put the id @@ -5897,7 +5897,7 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps , k = info->getNextInlink(NULL); for (;k&&hg2==HASHGROUP_INLINKTEXT ; k=info->getNextInlink(k)){ - if ( ! k->ptr_linkText ) continue; + if ( ! k->getLinkText() ) continue; if ( k->m_wordPosStart > wp2 ) continue; if ( k->m_wordPosStart + 50 < wp2 ) continue; // got it. we HACKED this to put the id @@ -6528,7 +6528,7 @@ bool printSingleScore ( SafeBuf *sb , Inlink *k = info->getNextInlink(NULL); for ( ; k && ss->m_hashGroup==HASHGROUP_INLINKTEXT ; k=info->getNextInlink(k)){ - if ( ! k->ptr_linkText ) continue; + if ( ! k->getLinkText() ) continue; if ( k->m_wordPosStart > ss->m_wordPos ) continue; if ( k->m_wordPosStart + 50 < ss->m_wordPos ) continue; // got it. we HACKED this to put the id diff --git a/PageStatsdb.cpp b/PageStatsdb.cpp index 22d4e1401..36d8aca14 100644 --- a/PageStatsdb.cpp +++ b/PageStatsdb.cpp @@ -122,7 +122,7 @@ bool sendPageGraph ( TcpSocket *s, HttpRequest *r ) { } // - // this is no int32_ter a gif, but an html graph in g_statsdb.m_sb + // this is no longer a gif, but an html graph in g_statsdb.m_sb // if ( ! g_statsdb.makeGIF ( st->m_endDateR , st->m_startDateR , diff --git a/PageSubmit.cpp b/PageSubmit.cpp index 41da8163b..dd53881b7 100644 --- a/PageSubmit.cpp +++ b/PageSubmit.cpp @@ -4,7 +4,7 @@ // instead of inserting default values into input tags in processLoop24() // generate an HTTP reply and send it back. we have to use //
to support the uploading of images -// so that each input tag will be it's own Content-Type:... aint32_t with +// so that each input tag will be it's own Content-Type:... along with // a boundary delimeter. make a fake form in the tag in here // with that enctype and you can see the multipart formatting. // diff --git a/PageTurk.cpp b/PageTurk.cpp index a6a6a6246..c31b9597d 100644 --- a/PageTurk.cpp +++ b/PageTurk.cpp @@ -2426,7 +2426,7 @@ bool gotResults ( State61 *st ) { if ( st->m_i >= numResults && st->m_isTurkSpecialQuery ) { // tell them if ( ! sb->safePrintf("Sorry, the event that you evaluated " - "no int32_ter exists in the index. Most " + "no longer exists in the index. Most " "likely the web page was updated and " "the event was dropped by the " "webmaster." diff --git a/Pages.cpp b/Pages.cpp index 598a5f7c4..3bac2ced2 100644 --- a/Pages.cpp +++ b/Pages.cpp @@ -615,7 +615,7 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , int32_t page ) { //////////////////// //////////////////// - // no int32_ter, we let anyone snoop around to check out the gui + // no longer, we let anyone snoop around to check out the gui //char guest = r->getLong("guest",0); //if ( ! publicPage && ! isRootAdmin && ! guest ) diff --git a/Parms.cpp b/Parms.cpp index a7b45f9df..b521d7ab1 100644 --- a/Parms.cpp +++ b/Parms.cpp @@ -3214,7 +3214,7 @@ void Parms::setParm ( char *THIS , Parm *m , int32_t mm , int32_t j , char *s , *(int32_t *)(THIS + m->m_plen) = len ; goto changed; } changed: - // tell gigablast the value is EXPLICITLY given -- no int32_ter based + // tell gigablast the value is EXPLICITLY given -- no longer based // on default.conf //if ( m->m_obj == OBJ_COLL ) ((CollectionRec *)THIS)->m_orig[mm] = 2; @@ -6947,7 +6947,7 @@ void Parms::init ( ) { m->m_title = "language detection samplesize"; m->m_desc = "Language detection size. Higher values" - " mean more accuracy, but int32_ter processing time." + " mean more accuracy, but longer processing time." " Suggested values are 300-1000"; m->m_cgi = "lmsamples"; m->m_off = (char *)&cr.m_languageSamples - x; @@ -6959,7 +6959,7 @@ void Parms::init ( ) { m->m_title = "language detection spider samplesize"; m->m_desc = "Language detection page sample size. " - "Higher values mean more accuracy, but int32_ter " + "Higher values mean more accuracy, but longer " "spider time." " Suggested values are 3000-10000"; m->m_cgi = "lpsamples"; @@ -9429,7 +9429,7 @@ void Parms::init ( ) { m->m_desc = "Recompute the quality of the root urls of each " "search result in order to compute the quality of that " "search result, since it depends on its root quality. This " - "can take a lot int32_ter when enabled."; + "can take a lot longer when enabled."; m->m_off = (char *)&si.m_artr - y; m->m_type = TYPE_LONG; m->m_def = "0"; @@ -12150,7 +12150,7 @@ void Parms::init ( ) { m->m_cgi = "mct"; m->m_off = (char *)&g_conf.m_maxCpuThreads - g; m->m_type = TYPE_LONG; - // make it 3 for new gb in case one query takes way int32_ter + // make it 3 for new gb in case one query takes way longer // than the others m->m_def = "6"; // "2"; m->m_units = "threads"; @@ -12443,7 +12443,7 @@ void Parms::init ( ) { m->m_title = "merge buf size"; m->m_desc = "Read and write this many bytes at a time when merging " "files. Smaller values are kinder to query performance, " - " but the merge takes int32_ter. Use at least 1000000 for " + " but the merge takes longer. Use at least 1000000 for " "fast merging."; m->m_cgi = "mbs"; m->m_off = (char *)&g_conf.m_mergeBufSize - g; @@ -12533,7 +12533,7 @@ void Parms::init ( ) { m->m_title = "sequential profiling."; m->m_desc = "Produce a LOG_TIMING log message for each " - "callback called, aint32_t with the time it took. " + "callback called, along with the time it took. " "Profiler must be enabled."; m->m_cgi = "ensp"; m->m_off = (char *)&g_conf.m_sequentialProfiling - g; @@ -13249,7 +13249,7 @@ void Parms::init ( ) { m++; m->m_title = "max respider wait (days)"; - m->m_desc = "Do not wait int32_ter than this before attempting to " + m->m_desc = "Do not wait longer than this before attempting to " "respider."; m->m_cgi = "xr"; m->m_xml = "spiderPriorityMaxRespiderWait"; @@ -15639,7 +15639,7 @@ void Parms::init ( ) { */ m->m_title = "display indexed date"; - m->m_desc = "Display the indexed date aint32_t with results."; + m->m_desc = "Display the indexed date along with results."; m->m_cgi = "didt"; m->m_off = (char *)&cr.m_displayIndexedDate - x; m->m_type = TYPE_BOOL; @@ -15650,7 +15650,7 @@ void Parms::init ( ) { m++; m->m_title = "display last modified date"; - m->m_desc = "Display the last modified date aint32_t with results."; + m->m_desc = "Display the last modified date along with results."; m->m_cgi = "dlmdt"; m->m_off = (char *)&cr.m_displayLastModDate - x; m->m_type = TYPE_BOOL; @@ -15662,7 +15662,7 @@ void Parms::init ( ) { m++; m->m_title = "display published date"; - m->m_desc = "Display the published date aint32_t with results."; + m->m_desc = "Display the published date along with results."; m->m_cgi = "dipt"; m->m_off = (char *)&cr.m_displayPublishDate - x; m->m_type = TYPE_BOOL; @@ -16557,7 +16557,7 @@ void Parms::init ( ) { m->m_title = "delete 404s"; m->m_desc = "Should pages be removed from the index if they are no " - "int32_ter accessible on the web?"; + "longer accessible on the web?"; m->m_cgi = "dnf"; m->m_off = (char *)&cr.m_delete404s - x; m->m_type = TYPE_BOOL; @@ -17504,7 +17504,7 @@ void Parms::init ( ) { m->m_desc = "This is in pixels and limits the size of the thumbnail. " "Gigablast tries to make at least the width or the height " "equal to this maximum, but, unless the thumbnail is sqaure, " - "one side will be int32_ter than the other."; + "one side will be longer than the other."; m->m_cgi = "mtwh"; m->m_off = (char *)&cr.m_thumbnailMaxWidthHeight - x; m->m_type = TYPE_LONG; @@ -18778,7 +18778,7 @@ void Parms::init ( ) { m++; m->m_title = "log query time threshold"; - m->m_desc = "If query took this many millliseconds or int32_ter, then log the " + m->m_desc = "If query took this many millliseconds or longer, then log the " "query and the time it took to process."; m->m_cgi = "lqtt"; m->m_off = (char *)&g_conf.m_logQueryTimeThreshold- g; @@ -20133,7 +20133,7 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList, if ( cr ) parmCollnum = cr->m_collnum; // turn the collnum into an ascii string for providing as args - // when &reset=1 &restart=1 &delete=1 is given aint32_t with a + // when &reset=1 &restart=1 &delete=1 is given along with a // &c= or a &name=/&token= pair. char oldCollName[MAX_COLL_LEN+1]; oldCollName[0] = '\0'; @@ -21657,7 +21657,7 @@ bool Parms::updateParm ( char *rec , WaitEntry *we ) { } //return true; // sanity - // we no int32_ter include the \0 in the dataSize...so a dataSize + // we no longer include the \0 in the dataSize...so a dataSize // of 0 means empty string... //if ( data[dataSize-1] != '\0' ) { char *xx=NULL;*xx=0; } } diff --git a/PingServer.cpp b/PingServer.cpp index 08d4109e7..31d7411b1 100644 --- a/PingServer.cpp +++ b/PingServer.cpp @@ -552,7 +552,7 @@ void gotReplyWrapperP ( void *state , UdpSlot *slot ) { int32_t hid = h->m_hostId; // if host 0 special case. //if ( hid == 0 && g_sendingToHost0 ) { - // // no int32_ter sending to him + // // no longer sending to him // g_sendingToHost0 = false; // // if he sent a reply, don't bother him so much any more // if ( ! g_errno ) g_host0Replied = true; diff --git a/Posdb.cpp b/Posdb.cpp index 07d1ebf95..c32c735de 100644 --- a/Posdb.cpp +++ b/Posdb.cpp @@ -3149,7 +3149,7 @@ void PosdbTable::getTermPairScoreForNonBody ( int32_t i, int32_t j, if ( p1 <= p2 ) { // . skip the pair if they are in different hashgroups - // . we no int32_ter allow either to be in the body in this + // . we no longer allow either to be in the body in this // algo because we handle those cases in the sliding window // algo! if ( ! s_isCompatible[hg1][hg2] ) goto skip1; @@ -3251,7 +3251,7 @@ void PosdbTable::getTermPairScoreForNonBody ( int32_t i, int32_t j, } else { // . skip the pair if they are in different hashgroups - // . we no int32_ter allow either to be in the body in this + // . we no longer allow either to be in the body in this // algo because we handle those cases in the sliding window // algo! if ( ! s_isCompatible[hg1][hg2] ) goto skip2; @@ -4987,7 +4987,7 @@ void PosdbTable::addDocIdVotes ( QueryTermInfo *qti , int32_t listGroupNum ) { if ( isRangeTerm ) { // a new docid i guess inRange = false; - // no int32_ter in range + // no longer in range if ( isInRange2(cursor[mini],cursorEnd[mini],qt)) inRange = true; } @@ -5223,7 +5223,7 @@ void PosdbTable::intersectLists10_r ( ) { for ( ; p < pend ; ) { // . first key is the full size // . uses the w,G,s,v and F bits to hold this - // . this is no int32_ter necessarily sitehash, but + // . this is no longer necessarily sitehash, but // can be any val, like now FacetStats is using // it for the innerHtml sentence content hash32 int32_t sh32 = g_posdb.getFacetVal32 ( p ); @@ -7037,7 +7037,7 @@ void PosdbTable::intersectLists10_r ( ) { // . first key is the full size // . uses the w,G,s,v and F bits to hold this - // . this is no int32_ter necessarily sitehash,but + // . this is no longer necessarily sitehash,but // can be any val, like now SectionStats is // using it for the innerHtml sentence // content hash32 @@ -7173,7 +7173,7 @@ void PosdbTable::intersectLists10_r ( ) { // if we have not supplanted anyone yet, be on our way for ( ; sx < sxEnd ; sx += sizeof(DocIdScore) ) { si = (DocIdScore *)sx; - // if top tree no int32_ter has this docid, we must + // if top tree no longer has this docid, we must // remove its associated scoring info so we do not // breach our scoring info bufs if ( ! m_topTree->hasDocId( si->m_docId ) ) break; diff --git a/Proxy.cpp b/Proxy.cpp index 59e25cfd8..a813962b6 100644 --- a/Proxy.cpp +++ b/Proxy.cpp @@ -1027,7 +1027,7 @@ bool Proxy::handleRequest (TcpSocket *s){ // get host #0 h = g_hostdb.getHost ( 0 ); /* - no int32_ter - flurbit root page is the search page... + no longer - flurbit root page is the search page... else if ( n == PAGE_ADDURL || pathLen == 1 || ( pathLen == 11 && strncmp ( path , "/index.html" ,11 ) == 0 ) ){ int32_t numTries = 0; @@ -2165,7 +2165,7 @@ UserInfo *Proxy::getUserInfoForFeedAccess ( HttpRequest *hr ) { g_errno = 0; //char *user = hr->getString("user",NULL); - // we also store the username aint32_t with session id + // we also store the username along with session id //if ( ! user ) user = r->getStringFromCookie("user",NULL); int32_t userId32 = hr->getLong("userid",0); @@ -4200,7 +4200,7 @@ bool Proxy::printEditForm ( StateUser *su ) { "color=red>%s",su->m_fcError); else sb.safePrintf("
" - "A secret code used aint32_t with your userid " + "A secret code used along with your userid " "to access the XML feeds." ""); diff --git a/QAClient.cpp b/QAClient.cpp index adec0ced7..aeae6eec6 100644 --- a/QAClient.cpp +++ b/QAClient.cpp @@ -638,7 +638,7 @@ void QADiffTest::xmlDiff() { int32_t seqLen[4096]; - int32_t lcsLen = int32_testCommonSubsequence(seq1, seq2, seqLen,4096, + int32_t lcsLen = longestCommonSubsequence(seq1, seq2, seqLen,4096, &xml1, &xml2); printf("lcs length: %"INT32"\n", lcsLen); lcsLen = lcsXml(seq1, seq2, seqLen,4096, diff --git a/Query.cpp b/Query.cpp index 5a5cd49a7..0b6abdff8 100644 --- a/Query.cpp +++ b/Query.cpp @@ -2101,7 +2101,7 @@ bool Query::setQWords ( char boolFlag , fieldCode = 0; fieldLen = 0; field = NULL; - // we no int32_ter have to ignore for link: et al + // we no longer have to ignore for link: et al ignoreTilSpace = false; } // . maintain inQuotes and quoteStart @@ -2155,7 +2155,7 @@ bool Query::setQWords ( char boolFlag , fieldCode = 0; fieldLen = 0; field = NULL; - // we no int32_ter have to ignore for link: et al + // we no longer have to ignore for link: et al ignoreTilSpace = false; } // skip if we should @@ -2873,7 +2873,7 @@ bool Query::setQWords ( char boolFlag , // . now since we may have prevented pairing across certain things // we need to set D_CAN_START_PHRASE for stop words whose left - // punct word can no int32_ter be paired across + // punct word can no longer be paired across // . "dancing in the rain" is fun --> will include phrase "is fun". // . title:"is it right"? --> will include phrase "is it" for ( int32_t i = 1 ; i < numWords ; i++ ) { @@ -2904,7 +2904,7 @@ bool Query::setQWords ( char boolFlag , // break the "quote", if any qs = -1; continue; } // if he is punctuation and qs is -1, skip him, - // punctuation words can no int32_ter start a quote + // punctuation words can no longer start a quote if ( words.isPunct(j) && qs == -1 ) continue; // uningore him if we should if ( keepAllSingles ) m_qwords[j].m_ignoreWord = 0; diff --git a/Query.h b/Query.h index 682efdb45..2f7ddbbd2 100644 --- a/Query.h +++ b/Query.h @@ -502,7 +502,7 @@ class QueryTerm { char m_fieldCode ; bool isSplit(); // . weights and affinities calculated in IndexTable2 - // . do not store in here, just pass aint32_t as a separate vector + // . do not store in here, just pass along as a separate vector // . analogous to how Phrases is to Words is to Bits, etc. //float m_termWeight; //float m_phraseAffinity; diff --git a/Rdb.cpp b/Rdb.cpp index 9646573a3..f44aca5e2 100644 --- a/Rdb.cpp +++ b/Rdb.cpp @@ -1323,7 +1323,7 @@ bool Rdb::dumpTree ( int32_t niceness ) { if ( bufSize > 400*1024 ) bufSize = 400*1024; if ( bufSize < 200*1024 ) bufSize = 200*1024; */ - // ok, no int32_ter need token to dump!!! + // ok, no longer need token to dump!!! /* @@ -1359,7 +1359,7 @@ void gotTokenForDumpWrapper ( void *state ) { // returns false and sets g_errno on error bool Rdb::gotTokenForDump ( ) { - // no int32_ter waiting for it + // no longer waiting for it m_waitingForTokenForDump = false; */ // debug msg @@ -1681,7 +1681,7 @@ void Rdb::doneDumping ( ) { if ( ! m_dumpErrno ) m_mem.freeDumpedMem(); // . tell RdbDump it is done // . we have to set this here otherwise RdbMem's memory ring buffer - // will think the dumping is no int32_ter going on and use the primary + // will think the dumping is no longer going on and use the primary // memory for allocating new titleRecs and such and that is not good! m_inDumpLoop = false; // . on g_errno the dumped file will be removed from "sync" file and diff --git a/RdbBase.cpp b/RdbBase.cpp index f38e97e7e..8bf3a0778 100644 --- a/RdbBase.cpp +++ b/RdbBase.cpp @@ -88,7 +88,7 @@ void RdbBase::reset ( ) { //m_numNetReadAdd = 0 ; //m_numRepliesAdd = 0 ; //m_numNetSentAdd = 0 ; - // we no int32_ter need to be saved + // we no longer need to be saved //m_needsSave = false; //m_inWaiting = false; // we're not in urgent merge mode yet @@ -1232,7 +1232,7 @@ void RdbBase::doneWrapper4 ( ) { } - // . we are no int32_ter unlinking + // . we are no longer unlinking // . this is so Msg3.cpp can avoid reading the [a,b) files m_isUnlinking = false; // file #x is the merge file @@ -1242,7 +1242,7 @@ void RdbBase::doneWrapper4 ( ) { // sanity check if ( m_numFilesToMerge != (b-a) ) { log(LOG_LOGIC,"db: Bury oops."); char *xx = NULL; *xx = 0; } - // we no int32_ter have a merge file + // we no longer have a merge file m_hasMergeFile = false; // now unset m_mergeUrgent if we're close to our limit if ( m_mergeUrgent && m_numFiles - 14 < m_minToMerge ) { @@ -2185,7 +2185,7 @@ void RdbBase::gotTokenForMerge ( ) { m_pc , mint /*maxTargetFileSize*/ , m_ks ) ) return; - // hey, we're no int32_ter merging i guess + // hey, we're no longer merging i guess m_isMerging = false; // decerment this count m_rdb->m_numMergesOut--; @@ -2377,13 +2377,13 @@ int32_t RdbBase::getFileNumFromId2 ( int32_t id2 ) { //log("Rdb:getFileNumFromId2: id2 of %"INT32" is invalid. returning " // "startFileNum of %"INT32".",id2,prev,id2); log("db: titledb*-%"INT32".dat file in collection \"%s\" " - "is referenced but no int32_ter exists. " + "is referenced but no longer exists. " "To fix this do a tight merge on titledb; you may have to delete " "tfndb* and regenerate it using the 'gb gendbs' command after the " "tight merge completes if the document is indeed missing. Cause " "may have been an improper shutdown, or not saving tfndb or " "titledb, or a missing document in titledb.",id2,m_coll); - //log("DISK: titledb*-%"INT32".dat file is referenced but no int32_ter exists." + //log("DISK: titledb*-%"INT32".dat file is referenced but no longer exists." // " See section on Database Repair in overview.html to fix it.",id2); return -1; // prev; } diff --git a/RdbBuckets.cpp b/RdbBuckets.cpp index e649eb602..8a32a6286 100644 --- a/RdbBuckets.cpp +++ b/RdbBuckets.cpp @@ -215,7 +215,7 @@ bool RdbBucket::sort() { //turn quickpoll off while we sort, //because we do not know what sort of indeterminate state //we will be in while sorting - // MDW: this no int32_ter disables it since it is based on g_niceness + // MDW: this no longer disables it since it is based on g_niceness // now, but what is the point, does it use static vars or what? //bool canQuickpoll = g_loop.m_canQuickPoll; //g_loop.m_canQuickPoll = false; diff --git a/RdbCache.cpp b/RdbCache.cpp index f8167508f..56ce35bc2 100644 --- a/RdbCache.cpp +++ b/RdbCache.cpp @@ -787,7 +787,7 @@ bool RdbCache::addRecord ( collnum_t collnum , need += 4; // . trailing 0 collnum_t, key and trailing time stamp // . this DELIMETER tells us to go to the next buf - //need += sizeof(collnum_t) + sizeof(key_t) + 4 ; + //need += sizeof(collnum_t) + sizeof(key_t) + 4 ; // timestamp need += sizeof(collnum_t) + m_cks + 4 ; // and size, if not fixed or we support lists if ( m_fixedDataSize == -1 || m_supportLists ) need += 4; @@ -890,7 +890,7 @@ bool RdbCache::addRecord ( collnum_t collnum , *(int32_t *)p = timestamp; p += 4; // then dataSize if we need to if ( m_fixedDataSize == -1 || m_supportLists ) { - *(int32_t *)p = recSize1+recSize2; p +=4; } + *(int32_t *)p = recSize1+recSize2; p +=4; } //datasize // sanity : check if the recSizes add up right else if ( m_fixedDataSize != recSize1 + recSize2 ){ char *xx = NULL; *xx = 0; } @@ -1034,7 +1034,7 @@ bool RdbCache::deleteRec ( ) { if ( timestamp == 0 && KEYCMP(k,KEYMIN(),m_cks)==0 ) { // if we wrap around back to first buffer then // change the "wrapped" state to false. that means - // we are no int32_ter directly in front of the write + // we are no longer directly in front of the write // head, but behind him again. if ( ++bufNum >= m_numBufs ) { bufNum = 0; @@ -1206,7 +1206,7 @@ void RdbCache::removeKey ( collnum_t collnum , char *key , char *rec ) { // clear it m_ptrs[n] = NULL; m_numPtrsUsed--; - m_memOccupied -= 4; + m_memOccupied -= sizeof(char *);//4; // advance through list after us now if ( ++n >= m_numPtrsMax ) n = 0; // keep looping until we hit an empty slot @@ -1620,16 +1620,24 @@ bool RdbCache::load ( char *dbname ) { if ( ! m_ptrs ) return false; // load 'em all in int32_t total = sizeof(char *) * m_numPtrsMax ; - n = f.read ( m_ptrs , total , off ); off += total; + + SafeBuf fix; + fix.reserve ( total ); + + //n = f.read ( m_ptrs , total , off ); off += total; + n = f.read ( fix.getBufStart() , total , off ); off += total; if ( n != total ) return false; + int32_t *poff = (int32_t *)fix.getBufStart(); + // convert back to absolute - for ( int32_t i = 0 ; i < m_numPtrsMax ; i++ ) { - SPTRTYPE j = (SPTRTYPE) m_ptrs[i]; + for ( int32_t i = 0 ; i < m_numPtrsMax ; i++ , poff++ ) { + //uint32_t j = (SPTRTYPE) m_ptrs[i]; // is it a NULL? - if ( j == -1 ) { m_ptrs[i] = NULL; continue; } + //if ( j == -1 ) { m_ptrs[i] = NULL; continue; } + if ( *poff == -1 ) { m_ptrs[i] = NULL; continue; } // get buffer - int32_t bufNum = j / BUFSIZE; - char *p = m_bufs[bufNum] + j % BUFSIZE ; + int32_t bufNum = (*poff) / BUFSIZE; + char *p = m_bufs[bufNum] + (*poff) % BUFSIZE ; // re-assign m_ptrs[i] = p; // debug msg @@ -1675,14 +1683,14 @@ void RdbCache::removeKeyRange ( collnum_t collnum , int32_t rem = n; m_ptrs[rem] = NULL; m_numPtrsUsed--; - m_memOccupied -= 4; + m_memOccupied -= sizeof(char *); if ( ++rem >= m_numPtrsMax ) rem = 0; // keep looping until we hit an empty slot while ( m_ptrs[rem] ) { char *ptr = m_ptrs[rem]; m_ptrs[rem] = NULL; m_numPtrsUsed--; - m_memOccupied -= 4; + m_memOccupied -= sizeof(char *); char k[MAX_KEY_BYTES]; KEYSET(k,ptr+sizeof(collnum_t),m_cks); addKey ( *(collnum_t *)ptr , diff --git a/RdbDump.cpp b/RdbDump.cpp index 1168c918f..4b8222175 100644 --- a/RdbDump.cpp +++ b/RdbDump.cpp @@ -186,7 +186,7 @@ bool RdbDump::set ( //char *coll , // . start dumping the tree // . return false if it blocked if ( ! dumpTree ( false ) ) return false; - // no int32_ter dumping + // no longer dumping doneDumping(); // return true since we didn't block return true; @@ -476,7 +476,7 @@ bool RdbDump::dumpList ( RdbList *list , int32_t niceness , bool recall ) { #endif // before calling RdbMap::addList(), always reset list ptr - // since we no int32_ter call this in RdbMap::addList() so we don't + // since we no longer call this in RdbMap::addList() so we don't // mess up the possible HACK below m_list->resetListPtr(); @@ -859,7 +859,7 @@ bool RdbDump::doneReadingForVerify ( ) { // debug msg //log("RdbDump:: deleting list"); int64_t t1 = gettimeofdayInMilliseconds(); - // convert to number, this is -1 if no int32_ter exists + // convert to number, this is -1 if no longer exists //collnum_t collnum = g_collectiondb.getCollnum ( m_coll ); //if ( collnum < 0 && m_rdb->m_isCollectionLess ) { // collnum = 0; diff --git a/RdbList.cpp b/RdbList.cpp index be28d7e29..ee5c4f2dc 100644 --- a/RdbList.cpp +++ b/RdbList.cpp @@ -17,7 +17,7 @@ ///// // -// we no int32_ter do ALLOW_SCALE! now user can click "rebalance shards" +// we no longer do ALLOW_SCALE! now user can click "rebalance shards" // to scan all rdbs of every coll and move the recs to the appropriate // shard in real time. // @@ -1566,7 +1566,7 @@ bool RdbList::constrain ( char *startKey , // . all provided lists must have their recs in [startKey,endKey] // so you should have called RdbList::constrain() on them // . should only be used by Msg5 to merge diskLists (Msg3) and treeList -// . we no int32_ter do annihilation, instead the newest key, be it negative +// . we no longer do annihilation, instead the newest key, be it negative // or positive, will override all the others // . the logic would have been much simpler had we chosen to use distinct // keys for distinct titleRecs, but that would hurt our incremental updates diff --git a/RdbMap.cpp b/RdbMap.cpp index 1f5b5d256..de5a43a55 100644 --- a/RdbMap.cpp +++ b/RdbMap.cpp @@ -104,7 +104,7 @@ bool RdbMap::writeMap ( ) { // on success, we don't need to write it anymore if ( status ) m_needToWrite = false; // . close map - // . no int32_ter since we use BigFile + // . no longer since we use BigFile //m_file.close ( ); // return status return status; @@ -204,7 +204,7 @@ bool RdbMap::readMap ( BigFile *dataFile ) { m_file.getFilename(),mstrerror(g_errno)); bool status = readMap2 ( ); // . close map - // . no int32_ter since we use BigFile + // . no longer since we use BigFile // . no, we have to close since we will hog all the fds // . we cannot call BigFile::close() because then RdbMap::unlink() will // not work because BigFile::m_maxParts gets set to 0, and that is @@ -1300,7 +1300,7 @@ bool RdbMap::chopHead ( int32_t fileHeadSize ) { int32_t segNum = (fileHeadSize / m_pageSize) / PAGES_PER_SEGMENT; // . must match exactly // . not any more i guess, we can still have a segment that - // corresponds in part to a PART file no int32_ter with us + // corresponds in part to a PART file no longer with us //if ( fileHeadSize * m_pageSize * PAGES_PER_SEGMENT != segNum ) //return log("RdbMap::chopHead: file head isn't multiple"); // return true if nothing to delete diff --git a/RdbMem.cpp b/RdbMem.cpp index ef443603b..2c925b661 100644 --- a/RdbMem.cpp +++ b/RdbMem.cpp @@ -170,6 +170,6 @@ void RdbMem::freeDumpedMem() { // reset secondary (old primary mem was dumped out to disk) if ( m_ptr2 > m_ptr1 ) m_ptr2 = m_mem + m_memSize; else m_ptr2 = m_mem; - // no int32_ter 90% full + // no longer 90% full m_is90PercentFull = false; } diff --git a/RdbMerge.cpp b/RdbMerge.cpp index f78ea9e4d..c67910078 100644 --- a/RdbMerge.cpp +++ b/RdbMerge.cpp @@ -638,7 +638,7 @@ void RdbMerge::doneMerging ( ) { // . do not call this if "list" is empty // . remove records whose keys don't beint32_t // . when we split the db cuz we scaled to more groups this will rid us -// of data we no int32_ter control +// of data we no longer control // . a split is done by turning on the next bit in m_groupMask starting // at the highest bit going down // . this spiderdb thang is a HACK diff --git a/RdbMerge.h b/RdbMerge.h index bf6ebc535..97ed5ab29 100644 --- a/RdbMerge.h +++ b/RdbMerge.h @@ -108,7 +108,7 @@ class RdbMerge { // private: // . used when growing the database - // . removes keys that would no int32_ter be stored by us + // . removes keys that would no longer be stored by us //void filterList ( RdbList *list ) ; // . we get the units from the master and the mergees from the units diff --git a/RdbTree.cpp b/RdbTree.cpp index 3c30908fd..2aebfbe0d 100644 --- a/RdbTree.cpp +++ b/RdbTree.cpp @@ -249,7 +249,7 @@ int32_t RdbTree::clear ( ) { for ( int32_t i = 0 ; i < m_minUnusedNode ; i++ ) { // skip node if parents is -2 (unoccupied) if ( m_parents[i] == -2 ) continue; - // we no int32_ter count the overhead of this node as occupied + // we no longer count the overhead of this node as occupied m_memOccupied -= m_overhead; // make the ith node available for occupation m_parents[i] = -2; @@ -419,7 +419,7 @@ int32_t RdbTree::getNextNode ( int32_t i ) { if ( m_left[p] == i ) return p; // otherwise keep getting the parent until it has a bigger key // or until we're the LEFT kid of the parent. that's better - // cuz comparing keys takes int32_ter. loop is 6 cycles per iteration. + // cuz comparing keys takes longer. loop is 6 cycles per iteration. while ( p >= 0 && (m_collnums[p] < m_collnums[i] || ( m_collnums[p] == m_collnums[i] && KEYCMP(m_keys,p,m_keys,i,m_ks) < 0 )) ) @@ -446,7 +446,7 @@ int32_t RdbTree::getPrevNode ( int32_t i ) { if ( m_right[p] == i ) return p; // keep getting the parent until it has a bigger key // or until we're the RIGHT kid of the parent. that's better - // cuz comparing keys takes int32_ter. loop is 6 cycles per iteration. + // cuz comparing keys takes longer. loop is 6 cycles per iteration. while ( p >= 0 && (m_collnums[p] > m_collnums[i] || ( m_collnums[p] == m_collnums[i] && KEYCMP(m_keys,p,m_keys,i,m_ks) > 0 )) ) @@ -1701,7 +1701,7 @@ bool RdbTree::getList ( collnum_t collnum , if ( ! list->growList ( growth ) ) return log("db: Failed to grow list to %"INT32" bytes for storing " "records from tree: %s.",growth,mstrerror(g_errno)); - // similar to above algorithm but we have data aint32_t with the keys + // similar to above algorithm but we have data along with the keys int32_t dataSize; // if a niceness 0 msg4 tries to add to the tree, return ETRYAGAIN @@ -1714,7 +1714,7 @@ bool RdbTree::getList ( collnum_t collnum , // to 0. and it deleted a record from the tree that we had just read // from the tree and added to the list. so then when RdbDump.cpp // called deleteList() after dumping that list to disk, one of the - // recs was no int32_ter in the tree! that then caused a core. now we + // recs was no longer in the tree! that then caused a core. now we // don't core, but i think i fixed it here. m_gettingList++; @@ -2772,7 +2772,7 @@ bool RdbTree::fastLoad ( BigFile *f , RdbMem *stack ) { if ( ! checkTree( false ) ) return fixTree ( ); } */ - // no int32_ter needs save + // no longer needs save m_needsSave = false; //printTree(); return true; diff --git a/Repair.cpp b/Repair.cpp index d3c331d2b..70231d1dd 100644 --- a/Repair.cpp +++ b/Repair.cpp @@ -1969,7 +1969,7 @@ bool Repair::injectTitleRec ( ) { } mnew ( xd , sizeof(XmlDoc),"xmldocpr"); - // clear out first since set2 no int32_ter does + // clear out first since set2 no longer does //xd->reset(); if ( ! xd->set2 ( titleRec , -1 , m_coll , NULL , MAX_NICENESS ) ) { m_recsetErrors++; @@ -2655,7 +2655,7 @@ bool saveAllRdbs ( void *state , void (* callback)(void *state) ) { // set it s_savingAll = true; // TODO: why is this called like 100x per second when a merge is - // going on? why don't we sleep int32_ter in between? + // going on? why don't we sleep longer in between? //bool close ( void *state , // void (* callback)(void *state ) , // bool urgent , diff --git a/Sections.cpp b/Sections.cpp index 7a6064cde..d81ef7d6c 100644 --- a/Sections.cpp +++ b/Sections.cpp @@ -104,7 +104,7 @@ class Tagx { }; // i lowered from 1000 to 300 so that we more sensitive to malformed pages -// because typically they seem to take int32_ter to parse. i also added some +// because typically they seem to take longer to parse. i also added some // new logic for dealing with table tr and td back tags that allow us to // pop off the other contained tags right away rather than delaying it until // we are done because that will often breach this stack. @@ -379,7 +379,7 @@ bool Sections::set ( Words *w , // for debug g_sections = this; - // Sections are no int32_ter 1-1 with words, just with front tags + // Sections are no longer 1-1 with words, just with front tags for ( int32_t i = 0 ; i < nw ; i++ ) { // breathe QUICKPOLL ( m_niceness ); @@ -713,7 +713,7 @@ bool Sections::set ( Words *w , /* // if our parent got closed before "sn" closed because // of an out-of-order back tag issue, then if it - // no int32_ter contains sn->m_a, then reparent "sn" + // no longer contains sn->m_a, then reparent "sn" // to its grandparent. Section *ps = sn->m_parent; for ( ; ps && ps->m_b >= 0 && ps->m_b <= sn->m_a ; @@ -738,7 +738,7 @@ bool Sections::set ( Words *w , // back tag to another front tag on the stack Section *ps = sn->m_parent; for ( ; ps != rootSection ; ps = ps->m_parent ) { - // skip if parent no int32_ter contains us! + // skip if parent no longer contains us! if ( ps->m_b <= sn->m_a ) continue; // skip if this parent is still open if ( ps->m_b <= 0 ) continue; @@ -1199,7 +1199,7 @@ bool Sections::set ( Words *w , // are open ended if ( si->m_b != -1 && si->m_b <= end ) continue; // this might constrain someone's parent such that - // that someone no int32_ter can use that parent!! + // that someone no longer can use that parent!! si->m_b = end; // . get our tag type // . use int32_t instead of nodeid_t so we can re-set this @@ -3130,7 +3130,7 @@ void initGenericTable ( int32_t niceness ) { // and should be excluded from titles. // . we set the GENERIC bit for each word or phrase in the sentence // that matches one in this list of generic words/phrases - // . then we ignore the int32_test generic phrase that is two words or + // . then we ignore the longest generic phrase that is two words or // more, within the sentence, for the purposes of forming titles // . so if we had "buy tickets for Spiderman" we would ignore // "buy tickets for" and the title would just be Spiderman @@ -4063,7 +4063,7 @@ bool Sections::setSentFlagsPart1 ( ) { // title prevention for americantowns.com (m_wlens[i] >= 3 || firstWord) ) lowerCount++; - // no int32_ter first word in sentence + // no longer first word in sentence firstWord = false; } // does it end in period? slight penalty for that since @@ -7122,8 +7122,8 @@ int32_t hasTitleWords ( sentflags_t sflags , "|bowling", "*$bowl", // orange bowl, super bowl "|singing", // Children's+Choirs+Singing - "|sing aint32_t", // Messiah+Sing-Aint32_t - "|singaint32_t", + "|sing along", // Messiah+Sing-Aint32_t + "|singalong", "^sing", // community sing "$singers", // Lakeside+Singers+at+NCC "|soapmaking", // Girls+Spa+Day:+Lip+balm,Perfume&Soapmaking: @@ -8396,7 +8396,7 @@ int32_t Sections::addImpliedSections3 ( ) { // to group tods with doms/dows. // this was hurting blackbirdbuvette.com which needed the // hr partition to split up some sections so it got - // "Geeks Who Drink" as the title because it would no int32_ter + // "Geeks Who Drink" as the title because it would no longer // have the multevent penalty! //if ( count1 + count2 == 2 * bothCount ) continue; @@ -8850,7 +8850,7 @@ int32_t Sections::getDelimScore ( Section *bro , int32_t inserts = 0; int32_t skips = 0; - // no int32_ter allow dups, keep a count of each hash now + // no longer allow dups, keep a count of each hash now char vhtbuf[92000]; HashTableX vht; vht.set ( 4, 4 ,256,vhtbuf,92000,false,m_niceness,"vhttab"); @@ -8917,7 +8917,7 @@ int32_t Sections::getDelimScore ( Section *bro , // the secCount at 0, and normally it starts at 1 (see below) // but if we have like an hr tag delimeter then we only // need two sections above it. this fixed cabq.gov so it - // got the first implied section and no int32_ter missed it. + // got the first implied section and no longer missed it. //if ( firstTime ) need = 2; // update this for insertSubSection() @@ -10116,7 +10116,7 @@ bool Sections::addSentenceSections ( ) { static int64_t h_on; static int64_t h_under; static int64_t h_with; - static int64_t h_aint32_t; + static int64_t h_along; static int64_t h_from; static int64_t h_by; static int64_t h_of; @@ -10157,7 +10157,7 @@ bool Sections::addSentenceSections ( ) { h_on = hash64n("on"); h_under = hash64n("under"); h_with = hash64n("with"); - h_aint32_t = hash64n("aint32_t"); + h_along = hash64n("along"); h_from = hash64n("from"); h_by = hash64n("by"); h_of = hash64n("of"); @@ -10387,8 +10387,8 @@ bool Sections::addSentenceSections ( ) { // m_wids[aw] == h_midnight ) if ( tid == TAG_P && isLower && - // Oscar G

aint32_t with xxxx - m_wids[aw] != h_aint32_t && + // Oscar G

along with xxxx + m_wids[aw] != h_along && m_wids[aw] != h_with ) isLower = false; @@ -12276,7 +12276,7 @@ bool SectionVotingTable::addVote3 ( int32_t turkTagHash , } /* -// . no int32_ter use single bit flags, sec_t +// . no longer use single bit flags, sec_t // . just use enumerated section types now // . each section type has a score and number sampled to get that score // . returns -1 if no data @@ -13029,7 +13029,7 @@ bool Sections::print ( SafeBuf *sbuf , // first few words of section int32_t a = sn->m_a; int32_t b = sn->m_b; - // -1 means an unclosed tag!! should no int32_ter be the case + // -1 means an unclosed tag!! should no longer be the case if ( b == -1 ) { char *xx=NULL;*xx=0; }//b=m_words->m_numWords; sbuf->safePrintf(""); @@ -15209,7 +15209,7 @@ bool Sections::swoggleTable ( int32_t dn , Section *ts ) { // . just the voting info for passing into diffbot in json -// . aint32_t w/ the title/summary/etc. we can return this json blob for each search result +// . along w/ the title/summary/etc. we can return this json blob for each search result bool Sections::printVotingInfoInJSON ( SafeBuf *sb ) { // save ptrs @@ -15393,7 +15393,7 @@ bool Sections::print2 ( SafeBuf *sbuf , // first few words of section int32_t a = sn->m_a; int32_t b = sn->m_b; - // -1 means an unclosed tag!! should no int32_ter be the case + // -1 means an unclosed tag!! should no longer be the case if ( b == -1 ) { char *xx=NULL;*xx=0; }//b=m_words->m_numWords; sbuf->safePrintf(""); @@ -16404,7 +16404,7 @@ bool Sections::setRegistrationBits ( ) { //if ( sk->m_tagId == TAG_TITLE ) break; // . stop if this section is in a list of other - // . we should hash each sections tag hash aint32_t with + // . we should hash each sections tag hash along with // their parent section ptr for this //if ( sk->m_container ) break; diff --git a/Sections.h b/Sections.h index 11681c3a5..50ae57e5f 100644 --- a/Sections.h +++ b/Sections.h @@ -481,7 +481,7 @@ class Section { // div and span tags, etc. to make them unique uint32_t m_baseHash; - // just hash the "class=" value aint32_t with the tagid + // just hash the "class=" value along with the tagid uint32_t m_turkBaseHash; // kinda like m_baseHash but for xml tags and only hashes the diff --git a/Speller.cpp b/Speller.cpp index 635cbc502..236c7772c 100644 --- a/Speller.cpp +++ b/Speller.cpp @@ -320,7 +320,7 @@ bool Speller::getRecommendation ( Query *q, bool inQuotes = qw->m_inQuotes; char fieldCode = qw->m_fieldCode; - // . get int32_test continual fragment that starts with word #i + // . get longest continual fragment that starts with word #i // . get the following words that can be in a fragment // that starts with word #i // . start of the frag @@ -903,7 +903,7 @@ void Speller::gotFrags( void *state ){ } bool inQuotes = qw->m_inQuotes; char fieldCode = qw->m_fieldCode; - // . get int32_test continual fragment that starts with word #i + // . get longest continual fragment that starts with word #i // . get the following words that can be in a fragment // that starts with word #i // . start of the frag @@ -1489,7 +1489,7 @@ int32_t Speller::getPhrasePopularity ( char *str, uint64_t h, // is showing up as porn because it has 'anal' in the hostname. So try to // find a combination of words such that they are NOT porn. // try this only after isAdult() succeeds. -// Always tries to find int32_ter words first. so 'montanalinux' is split as +// Always tries to find longer words first. so 'montanalinux' is split as // 'montana' and 'linux' and not as 'mont', 'analinux' // if it finds a seq of words leading upto a porn word, then it returns true // eg. shall split montanalinux into 'mont', 'anal', and return true without diff --git a/Spider.cpp b/Spider.cpp index 37c4edfa4..2ad5d60c8 100644 --- a/Spider.cpp +++ b/Spider.cpp @@ -2670,7 +2670,7 @@ int32_t SpiderColl::getNextIpFromWaitingTree ( ) { // ok, we got one firstIp = (k->n0) & 0xffffffff; - // sometimes we take over for a dead host, but if he's no int32_ter + // sometimes we take over for a dead host, but if he's no longer // dead then we can remove his keys. but first make sure we have had // at least one ping from him so we do not remove at startup. // if it is in doledb or in the middle of being added to doledb @@ -2970,7 +2970,7 @@ void SpiderColl::populateWaitingTreeFromSpiderdb ( bool reentry ) { m_numBytesScanned = 0; // reset for next scan m_nextKey2.setMin(); - // no int32_ter need rebuild + // no longer need rebuild m_waitingTreeNeedsRebuild = false; } @@ -3227,7 +3227,7 @@ static void doledWrapper ( void *state ) { "somehow reduce doleiptable score now...", mstrerror(g_errno)); - // no int32_ter populating doledb. we also set to false in + // no longer populating doledb. we also set to false in // gotSpiderListWrapper //THIS->m_isPopulating = false; @@ -3626,7 +3626,7 @@ bool SpiderColl::readListFromSpiderdb ( ) { if ( g_conf.m_logDebugSpider ) log("spider: back from msg5 spiderdb read of %"INT32" bytes", m_list.m_listSize); - // no int32_ter getting list + // no longer getting list m_gettingList1 = false; // got it without blocking. maybe all in tree or in cache @@ -4856,7 +4856,7 @@ bool SpiderColl::addWinnersIntoDoledb ( ) { if ( ! addToDoleTable ( sreq3 ) ) return true; // this logic is now in addToDoleTable() - // . if it was empty it is no int32_ter + // . if it was empty it is no longer // . we have this flag here to avoid scanning empty doledb // priorities because it saves us a msg5 call to doledb in // the scanning loop @@ -4866,7 +4866,7 @@ bool SpiderColl::addWinnersIntoDoledb ( ) { //m_isDoledbEmpty [ bp ] = 0; } - // and the whole thing is no int32_ter empty + // and the whole thing is no longer empty //m_allDoledbPrioritiesEmpty = 0;//false; //m_lastEmptyCheck = 0; @@ -6532,7 +6532,7 @@ bool SpiderLoop::gotDoledbList2 ( ) { // // sanity check. verify the spiderrequest also exists in our - // spidercache. we no int32_ter store doled out spider requests in our + // spidercache. we no longer store doled out spider requests in our // cache!! they are separate now. // //if ( g_conf.m_logDebugSpider ) { @@ -6846,7 +6846,7 @@ bool SpiderLoop::spiderUrl9 ( SpiderRequest *sreq , // if we already have the lock then forget it. this can happen // if spidering was turned off then back on. - // MDW: TODO: we can't do this anymore since we no int32_ter have + // MDW: TODO: we can't do this anymore since we no longer have // the lockTable check above because we do not control our own // lock now necessarily. it often is in another group's lockTable. //if ( g_spiderLoop.m_lockTable.isInTable(&lockKey) ) { @@ -6979,7 +6979,7 @@ bool SpiderLoop::spiderUrl2 ( ) { coll , pbuf , MAX_NICENESS ) ) { - // i guess m_coll is no int32_ter valid? + // i guess m_coll is no longer valid? mdelete ( m_docs[i] , sizeof(XmlDoc) , "Doc" ); delete (m_docs[i]); m_docs[i] = NULL; @@ -7146,7 +7146,7 @@ bool SpiderLoop::indexedDoc ( XmlDoc *xd ) { // . but only the first time we spider it... /* if ( ! strcmp(xd->m_coll,"qatest123") && ! respider && - // no int32_ter need this when qa testing spider, not parser + // no longer need this when qa testing spider, not parser g_conf.m_testParserEnabled ) { // save the buffers //saveTestBuf(); @@ -7648,7 +7648,7 @@ bool Msg12::gotLockReply ( UdpSlot *slot ) { // no need to remove them if none were granted because another // host in our group might have it 100% locked. if ( m_grants == 0 ) { - // no int32_ter in locks operation mode + // no longer in locks operation mode m_gettingLocks = false; // ok, they are all back, resume loop //if ( ! m_callback ) g_spiderLoop.spiderUrl2 ( ); @@ -10446,7 +10446,7 @@ int32_t getUrlFilterNum2 ( SpiderRequest *sreq , //if ( (bool)sreq->m_urlIsDocId==val ) continue; if ( (bool)sreq->m_isPageReindex==val ) continue; // skip - p += 10; + p += 12; // skip to next constraint p = strstr(p, "&&"); // all done? @@ -10462,7 +10462,7 @@ int32_t getUrlFilterNum2 ( SpiderRequest *sreq , //if ( (bool)sreq->m_urlIsDocId==val ) continue; if ( (bool)sreq->m_isPageReindex==val ) continue; // skip - p += 10; + p += 9; // skip to next constraint p = strstr(p, "&&"); // all done? @@ -11875,7 +11875,7 @@ bool SpiderColl::printStats ( SafeBuf &sb ) { // sandwiched together just right because we only compare to the previous // SpiderRequest we added when looking for dups. just need to hash the // relevant input bits and use that for deduping. -// . TODO: we can store ufn/priority/spiderTime in the SpiderRequest aint32_t +// . TODO: we can store ufn/priority/spiderTime in the SpiderRequest along // with the date now, so if url filters do not change then // gotSpiderdbList() can assume those to be valid and save time. BUT it does // have siteNumInlinks... @@ -12421,7 +12421,7 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) { if ( ! stats->m_hasUrlsReadyToSpider ) continue; // inc the count otherwise gi->m_hasUrlsReadyToSpider++; - // . no int32_ter initializing? + // . no longer initializing? // . sometimes other shards get the spider // requests and not us!!! if ( cr->m_spiderStatus == SP_INITIALIZING ) @@ -12462,7 +12462,7 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) { //bool has = cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider; if ( hadUrlsReady && - // and it no int32_ter does now... + // and it no longer does now... ! cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider ) { log("spider: all %"INT32" hosts report %s (%"INT32") has no " "more urls ready to spider", diff --git a/SpiderProxy.cpp b/SpiderProxy.cpp index 25591f4d0..f62070807 100644 --- a/SpiderProxy.cpp +++ b/SpiderProxy.cpp @@ -660,7 +660,7 @@ void handleRequest54 ( UdpSlot *udpSlot , int32_t niceness ) { // this table maps a url's current IP to a possibly MULTIPLE slots // which tell us what proxy is downloading a page from that IP. // so we can try to find a proxy that is not download a url from - // this IP currently, or hasn't been for the int32_test time... + // this IP currently, or hasn't been for the longest time... int32_t hslot = s_loadTable.getSlot ( &urlIp ); // scan all proxies that have this urlip outstanding for ( int32_t i = hslot ; i >= 0 ; i = s_loadTable.getNextSlot(i,&urlIp)){ @@ -826,7 +826,7 @@ void handleRequest54 ( UdpSlot *udpSlot , int32_t niceness ) { sp->m_lastTimeUsedForThisIp >= oldest ) continue; - // pick the spider proxy used int32_test ago + // pick the spider proxy used longest ago oldest = sp->m_lastTimeUsedForThisIp; minCount = sp->m_countForThisIp; // got a new winner diff --git a/Statsdb.cpp b/Statsdb.cpp index 9178c674a..85baef752 100644 --- a/Statsdb.cpp +++ b/Statsdb.cpp @@ -151,8 +151,9 @@ bool Statsdb::init ( ) { m_niceness = 0; // init the label table - static char s_buf[576]; - if ( ! m_labelTable.set(4,4,64,s_buf,576,false,0,"statcolors") ) + static char s_buf[832]; + if ( ! m_labelTable.set(4,sizeof(Label *),64, + s_buf,832,false,0,"statcolors") ) return false; // stock the table int32_t n = (int32_t)sizeof(s_labels)/ sizeof(Label); diff --git a/StopWords.cpp b/StopWords.cpp index 0e1c60b13..9aab55a3a 100644 --- a/StopWords.cpp +++ b/StopWords.cpp @@ -1848,7 +1848,7 @@ static char *s_commonWords[] = { "ever", "every", "become", - "aint32_t", + "along", "tion", // broken words "ture", // broken words diff --git a/Summary.cpp b/Summary.cpp index bd2e39d1d..918c7e97e 100644 --- a/Summary.cpp +++ b/Summary.cpp @@ -993,7 +993,7 @@ bool Summary::getDefaultSummary ( Xml *xml, int scoreMult = 1; char *pend = m_summary + maxSummaryLen - 2; int32_t start = -1, numConsecutive = 0; - int32_t bestStart = -1, bestEnd = -1, int32_testConsecutive = 0; + int32_t bestStart = -1, bestEnd = -1, longestConsecutive = 0; int32_t lastAlnum = -1; // google seems to index SEC_MARQUEE, so i took that out of here int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE; @@ -1009,7 +1009,7 @@ bool Summary::getDefaultSummary ( Xml *xml, if (start > 0 && bestStart == start && ( words->m_words[i] - words->m_words[start] ) >= ( maxSummaryLen - 8 )){ - int32_testConsecutive = numConsecutive; + longestConsecutive = numConsecutive; bestStart = start; bestEnd = lastAlnum;//i-1; break; @@ -1056,8 +1056,8 @@ bool Summary::getDefaultSummary ( Xml *xml, else if ( ! wids[i] ) continue; // end of consecutive words - if ( numConsecutive > int32_testConsecutive ) { - int32_testConsecutive = numConsecutive; + if ( numConsecutive > longestConsecutive ) { + longestConsecutive = numConsecutive; bestStart = start; bestEnd = i-1; } diff --git a/Syncdb.cpp b/Syncdb.cpp index f8d73fd68..0ec5ad0e9 100644 --- a/Syncdb.cpp +++ b/Syncdb.cpp @@ -393,7 +393,7 @@ bool Syncdb::gotList ( ) { // . add it using msg4.cpp::addMetaList() // . sets g_errno and returns false on error if ( ! addMetaList ( rec ) ) return false; - // we no int32_ter have to add it! + // we no longer have to add it! m_qt.deleteNode ( 0 , (char *)&k , true ); // point to next m_ia++; diff --git a/Tagdb.cpp b/Tagdb.cpp index a5087b372..d50301eda 100644 --- a/Tagdb.cpp +++ b/Tagdb.cpp @@ -1107,7 +1107,7 @@ bool TagRec::setFromHttpRequest ( HttpRequest *r, TcpSocket *s ) { // the ST_SITE field anyway... if ( ! ufu && ! us ) return true; - // make it null terminated since we no int32_ter do this automatically + // make it null terminated since we no longer do this automatically fou.pushChar('\0'); // normalize it @@ -1628,7 +1628,7 @@ static TagDesc s_tagDesc[] = { {"sitenuminlinksuniquecblock" ,0x00,0}, {"sitenuminlinkstotal" ,0x00,0}, - // keep these although no int32_ter used + // keep these although no longer used {"sitepop" ,0x00,0}, {"sitenuminlinksfresh" ,0x00,0}, @@ -1754,11 +1754,11 @@ int32_t getTagTypeFromStr( char *tagname , int32_t tagnameLen ) { char *getTagStrFromType ( int32_t tagType ) { // make sure table is valid if ( ! s_initialized ) g_tagdb.setHashTable(); - TagDesc *td = (TagDesc *)s_ht.getValue ( &tagType ); + TagDesc **ptd = (TagDesc **)s_ht.getValue ( &tagType ); // sanity check - if ( ! td ) { char *xx=NULL;*xx=0; } + if ( ! ptd ) { char *xx=NULL;*xx=0; } // return it - return td->m_name; + return (*ptd)->m_name; } // a global class extern'd in .h file @@ -1782,7 +1782,7 @@ bool Tagdb::setHashTable ( ) { s_initialized = true; // the hashtable of TagDescriptors //if ( ! s_ht.set ( 1024 ) ) - if ( ! s_ht.set ( 4,sizeof(char *),1024,NULL,0,false,0,"tgdbtb" ) ) + if ( ! s_ht.set ( 4,sizeof(TagDesc *),1024,NULL,0,false,0,"tgdbtb" ) ) return log("tagdb: Tagdb hash init failed."); // stock it int32_t n = (int32_t)sizeof(s_tagDesc)/(int32_t)sizeof(TagDesc); @@ -1793,14 +1793,14 @@ bool Tagdb::setHashTable ( ) { // use the same algo that Words.cpp computeWordIds does int32_t h = hash64Lower_a ( s , slen ); // call it a bad name if already in there - TagDesc *etd = (TagDesc *)s_ht.getValue ( &h ); - if ( etd ) + TagDesc **petd = (TagDesc **)s_ht.getValue ( &h ); + if ( petd ) return log("tagdb: Tag %s collides with old tag %s", - td->m_name,etd->m_name); + td->m_name,(*petd)->m_name); // set the type td->m_type = h; // add it - s_ht.addKey ( &h , (TagDesc **)&td ); + s_ht.addKey ( &h , &td ); } return true; } @@ -1839,7 +1839,7 @@ bool Tagdb::init ( ) { // return log("tagdb: lock table init failed."); // . initialize our own internal rdb - // . i no int32_ter use cache so changes to tagdb are instant + // . i no longer use cache so changes to tagdb are instant // . we still use page cache however, which is good enough! return m_rdb.init ( g_hostdb.m_dir , "tagdb" , @@ -1867,7 +1867,7 @@ bool Tagdb::init2 ( int32_t treeMem ) { // . NOTE: 32 bytes of the 82 are overhead int32_t maxTreeNodes = treeMem / 82; // . initialize our own internal rdb - // . i no int32_ter use cache so changes to tagdb are instant + // . i no longer use cache so changes to tagdb are instant // . we still use page cache however, which is good enough! return m_rdb.init ( g_hostdb.m_dir , "tagdbRebuild" , @@ -2503,7 +2503,7 @@ int32_t Tagdb::getMatchPoints ( Url *recUrl , Url *url ) { if ( strncmp ( upath , rpath , rplen ) != 0 ) return 0; // . now we got a solid match // . add 1 pt for each char in recUrl's path - // . so the int32_ter recUrl's path the better the match (more specific) + // . so the longer recUrl's path the better the match (more specific) // . this allows us to override TagRecs for deeper sub urls pts += rplen; // add in host size of the matching recUrl @@ -2972,7 +2972,7 @@ void Msg8a::gotAllReplies ( ) { cx.set ( 4,0,64,cbuf,2048,false,m_niceness,"tagtypetab"); // . loop over all tags in all lists in order by key // . each list should be from a different suburl? - // . the first list should be the narrowest/int32_test? + // . the first list should be the narrowest/longest? for ( ; tag ; tag = m_tagRec->getNextTag ( tag ) ) { // breathe QUICKPOLL(m_niceness); @@ -4049,7 +4049,7 @@ class State12 { //bool m_isRootAdmin; //bool m_isAssassin; // . Commented by Gourav - // . Reason:user perm no int32_ter used + // . Reason:user perm no longer used //char m_userType; HttpRequest m_r; //char *m_username; @@ -4111,7 +4111,7 @@ bool sendPageTagdb ( TcpSocket *s , HttpRequest *req ) { //st->m_isRootAdmin = isAdmin; //st->m_isAssassin = isAssassin; // . Commented by Gourav - // . Reason:user perm no int32_ter used + // . Reason:user perm no longer used //st->m_userType = g_pages.getUserType ( s , req ); // assume we've nothing to add st->m_adding = false; @@ -4405,7 +4405,7 @@ bool sendReply ( void *state ) { false , sizeof(key128_t) ); - // no int32_ter adding + // no longer adding st->m_adding = false; // . just use TagRec::m_msg1 now @@ -4888,7 +4888,7 @@ bool isTagTypeIndexable ( int32_t tt ) { /* bool isTagTypeString ( int32_t tt ) { // look up in hash table - TagDesc *td = (TagDesc *)s_ht.getValue ( tt ); + TagDesc *td = (TagDesc **)s_ht.getValue ( tt ); // if none, that is crazy if ( ! td ) { char *xx=NULL;*xx=0; } // return diff --git a/TcpServer.cpp b/TcpServer.cpp index 0dfb936f5..958b2565d 100644 --- a/TcpServer.cpp +++ b/TcpServer.cpp @@ -1038,7 +1038,7 @@ TcpSocket *TcpServer::wrapSocket ( int sd , int32_t niceness , bool isIncoming ) hadError: log("tcp: Had error preparing socket: %s.",mstrerror(g_errno)); m_tcpSockets [ sd ] = NULL; - // clear it, this means no int32_ter in use + // clear it, this means no longer in use s->m_startTime = 0LL; //mfree ( s , sizeof(TcpSocket) ,"TcpServer" ); // uncount @@ -1130,7 +1130,7 @@ bool TcpServer::closeLeastUsed ( int32_t maxIdleTime ) { // // . "available" means not being used but still connected // // . return false if we could not close any cuz they're all used // bool TcpServer::closeLeastUsed ( ) { -// // . see who hasn't been used in the int32_test time +// // . see who hasn't been used in the longest time // // . only check the available sockets (m_state == ST_AVAILABLE) // int64_t minTime = (int64_t) 0x7fffffffffffffffLL; // int32_t mini = -1; @@ -1454,7 +1454,7 @@ int32_t TcpServer::readSocket ( TcpSocket *s ) { if ( s->m_totalToRead <= 0 || s->m_readOffset < s->m_totalToRead ) goto loop; // return 0; // . if it was a reply, keep looping until we read 0 byte packet - // since we no int32_ter support keep-alive + // since we no longer support keep-alive // . NO! i think the linksys befsr81 nat/dsl router is blocking // some FINs so we never get that freakin 0 byte packet, so // let's force the close ourselves @@ -2055,7 +2055,7 @@ void TcpServer::destroySocket ( TcpSocket *s ) { //log("unregistering sd=%"INT32"",sd); // discount if it was an incoming connection if ( s->m_isIncoming ) m_numIncomingUsed--; - // clear it, this means no int32_ter in use + // clear it, this means no longer in use s->m_startTime = 0LL; // count # of destroys in case a function is still referencing diff --git a/Thesaurus.cpp b/Thesaurus.cpp index 53fb78854..3d0579d20 100644 --- a/Thesaurus.cpp +++ b/Thesaurus.cpp @@ -1750,7 +1750,7 @@ static void gotAffinityDoc(void *state, TcpSocket *socket) { Xml xml; group->m_recv++; aff->m_recv++; - // the stuff below might no int32_ter be valid (synonyms specifically) + // the stuff below might no longer be valid (synonyms specifically) if (!aff->m_thes->m_rebuilding) { // do cleanup buildAffinityGroup(group); @@ -2589,7 +2589,7 @@ bool Thesaurus::load() { if (unknown) log(LOG_INIT, "build: %"INT32" synonyms with missing/" "invalid type", unknown); - // this no int32_ter resets m_synonymTable, why did we + // this no longer resets m_synonymTable, why did we // want to do that anyway??? MDW reset(); diff --git a/Threads.cpp b/Threads.cpp index d96f42c41..5ca9e6689 100644 --- a/Threads.cpp +++ b/Threads.cpp @@ -1133,7 +1133,7 @@ bool ThreadQueue::timedCleanUp ( int32_t maxNiceness ) { } } - //since we need finer grained control in loop, we no int32_ter collect + //since we need finer grained control in loop, we no longer collect //the callbacks, sort, then call them. we now call them right away //that way we can break out if we start taking too int32_t and //give control back to udpserver. @@ -1513,7 +1513,7 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , int32_t maxNiceness ) { log(LOG_DEBUG, "threads: took %"INT64" ms to callback %"INT32" " "callbacks, nice: %"INT32"", took2, numCallbacks, maxNiceness); - //since we need finer grained control in loop, we no int32_ter collect + //since we need finer grained control in loop, we no longer collect //the callbacks, sort, then call them. we now call them right away //that way we can break out if we start taking too int32_t and //give control back to udpserver. @@ -1889,7 +1889,7 @@ bool ThreadQueue::launchThread2 ( ThreadEntry *te ) { // return false; // . if the thread to launch has niceness > lowest launched then bail // . i.e. don't launch a low-priority thread if we have highs running - // . we no int32_ter let a niceness of 1 prevent a niceness of 2 from + // . we no longer let a niceness of 1 prevent a niceness of 2 from // launching, this way we can launch merge threads at a niceness // of 1 w/o hurting the spidering too much, but still giving the // merge some preferential treatment over the disk so we don't diff --git a/Timedb.cpp b/Timedb.cpp index 1712b0e02..089459487 100644 --- a/Timedb.cpp +++ b/Timedb.cpp @@ -627,7 +627,7 @@ bool addTimedbKey ( key128_t *kp , uint32_t nowGlobal , HashTableX *ht ) { // . ok, nuke it i guess that was it // . PROBLEM: revdb negative keys are added after the latest // timedb keys for a doc, so if the best time was deleted - // because event changed times, then it will no int32_ter have + // because event changed times, then it will no longer have // any time in this table! ht->removeKey(&key64); return true; diff --git a/Title.cpp b/Title.cpp index 30ebf1bbc..b11555a21 100644 --- a/Title.cpp +++ b/Title.cpp @@ -335,7 +335,7 @@ bool Title::setTitle4 ( XmlDoc *xd , if ( k->size_rssItem > 10 && ++rcount >= 20 ) continue; // set Url Url u; - u.set ( k->ptr_urlBuf , k->size_urlBuf ); + u.set ( k->getUrl() , k->size_urlBuf ); // is it the same host as us? bool sh = true; // the title url @@ -353,15 +353,15 @@ bool Title::setTitle4 ( XmlDoc *xd , // set the words to it //if ( ! k->setXmlFromLinkText ( &tx[ti] ) ) // return false; - char *p = k-> ptr_linkText; + char *p = k->getLinkText(); int32_t plen = k->size_linkText - 1; if ( ! verifyUtf8 ( p , plen ) ) { log("title: set4 bad link text from url=%s", - k->ptr_urlBuf); + k->getUrl()); continue; } // now the words. - if ( ! tw[ti].set ( k->ptr_linkText , + if ( ! tw[ti].set ( k->getLinkText() , k->size_linkText-1, // len TITLEREC_CURRENT_VERSION , true , // computeIds diff --git a/TopTree.cpp b/TopTree.cpp index 2627a2114..9ad751f07 100644 --- a/TopTree.cpp +++ b/TopTree.cpp @@ -688,7 +688,7 @@ int32_t TopTree::getPrev ( int32_t i ) { if ( i == m_lowNode ) return -1; // keep getting the parent until it has a bigger key // or until we're the RIGHT kid of the parent. that's better - // cuz comparing keys takes int32_ter. loop is 6 cycles per iteration. + // cuz comparing keys takes longer. loop is 6 cycles per iteration. //while ( p >= 0 && m_keys(p) > m_keys(i) ) p = PARENT(p); while ( p >= 0 && LEFT(p) == i ) { i = p; p = PARENT(p); } // p will be -1 if none are left @@ -717,7 +717,7 @@ int32_t TopTree::getNext ( int32_t i ) { //if ( i == m_highNode ) return -1; // otherwise keep getting the parent until it has a bigger key // or until we're the LEFT kid of the parent. that's better - // cuz comparing keys takes int32_ter. loop is 6 cycles per iteration. + // cuz comparing keys takes longer. loop is 6 cycles per iteration. //while ( p >= 0 && m_keys[p] < m_keys[i] ) p = m_parents[p]; while ( p >= 0 && RIGHT(p) == i ) { i = p; p = PARENT(p); } // p will be -1 if none are left diff --git a/TopTree.h b/TopTree.h index 8d2e616d9..cfc875fd3 100644 --- a/TopTree.h +++ b/TopTree.h @@ -26,7 +26,7 @@ class TopNode { char m_clusterLevel; key_t m_clusterRec; - // no int32_ter needed, Msg3a does not need, it has already + // no longer needed, Msg3a does not need, it has already //unsigned char m_tier ; float m_score ; int64_t m_docId; diff --git a/UdpServer.cpp b/UdpServer.cpp index 9a170b042..c82924068 100644 --- a/UdpServer.cpp +++ b/UdpServer.cpp @@ -1116,7 +1116,7 @@ void UdpServer::process_ass ( int64_t now , int32_t maxNiceness) { if(elapsed < 10) { // we did not call any, so resort to nice callbacks makeCallbacks_ass ( /*niceness level*/ 1 ) ; - // no int32_ter need to be called + // no longer need to be called // if we did anything loop back up // . but only if we haven't been looping forever, // . if so we need to relinquish control to loop. @@ -1819,7 +1819,7 @@ void UdpServer::resume ( ) { // debug msg if ( g_conf.m_logDebugUdp ) log(LOG_DEBUG,"udp: RESUMING UDPSERVER."); - // we are no int32_ter suspened + // we are no longer suspened m_isSuspended = false; // get time now int64_t now = gettimeofdayInMillisecondsLocal(); @@ -3171,7 +3171,7 @@ UdpSlot *UdpServer::getEmptyUdpSlot_ass ( key_t k ) { //m_head2 = slot; // put the used slot at the tail so older slots are at the head and // makeCallbacks() can take care of the callbacks that have been - // waiting the int32_test first... + // waiting the longest first... if ( m_tail2 ) { slot->m_next2 = NULL; slot->m_prev2 = m_tail2; diff --git a/UdpServer.h b/UdpServer.h index 7a4179c0d..83dd1fe05 100644 --- a/UdpServer.h +++ b/UdpServer.h @@ -109,7 +109,7 @@ class UdpServer { // on the remote machine // . backoff is how int32_t to wait for an ACK in ms before we resend // . we double backoff each time we wait w/o getting any ACK - // . don't wait int32_ter than maxWait for a resend + // . don't wait longer than maxWait for a resend // . if we try to resend a request dgram MORE than "maxResends" times, // we do not resend it and we returns with g_errno set to ENOACK, // indicating we have not gotten ANY ack for a dgram. if a host dies @@ -143,7 +143,7 @@ class UdpServer { // . the "msg" will be freed unless slot->m_sendBufAlloc is set to NULL // . backoff is how int32_t to wait for an ACK in ms before we resend // . we double backoff each time we wait w/o getting any ACK - // . don't wait int32_ter than maxWait for a resend + // . don't wait longer than maxWait for a resend void sendReply_ass (char *msg , int32_t msgSize , char *alloc , diff --git a/UdpSlot.h b/UdpSlot.h index b3b4c1534..4643d9970 100644 --- a/UdpSlot.h +++ b/UdpSlot.h @@ -404,7 +404,7 @@ class UdpSlot { // now caller can decide initial backoff, doubles each time no ack rcvd int16_t m_backoff; - // don't wait int32_ter than this, however + // don't wait longer than this, however int16_t m_maxWait; // save cpu by not having to call memset() on m_sentBits et al diff --git a/Url.cpp b/Url.cpp index 19e4c2045..32a416b4b 100644 --- a/Url.cpp +++ b/Url.cpp @@ -466,7 +466,7 @@ void Url::set ( char *t , int32_t tlen , bool addWWW , bool stripSessionId , // NULL terminate for strchr() m_host [ m_hlen ] = '\0'; // . common mistake: if hostname has no '.' in it append a ".com" - // . now that we use hosts in /etc/hosts we no int32_ter do this + // . now that we use hosts in /etc/hosts we no longer do this //if ( m_hlen > 0 && strchr ( m_host ,'.' ) == NULL ) { // memcpy ( &m_host[m_hlen] , ".com" , 4 ); // m_hlen += 4; diff --git a/Users.cpp b/Users.cpp index 03c37432a..7904ba5b6 100644 --- a/Users.cpp +++ b/Users.cpp @@ -494,7 +494,7 @@ void Users::setDatum ( char *data, int32_t column, User *user, bool hasStar){ case 6:{ // save the user permission // only one user is allowed - // user permission keyword no int32_ter used + // user permission keyword no longer used /*if ( ! user->m_permissions & 0xff ){ if (strcmp(data,"master")==0) user->m_permissions = USER_MASTER; @@ -724,7 +724,7 @@ bool Users::hasPermission ( HttpRequest *r, int32_t page , TcpSocket *s ) { Host *h = g_hostdb.getHostByIp(s->m_ip); // we often ssh tunnel in through router0 which is also // the proxy, but now the proxy uses msg 0xfd to forward - // http requests, so we no int32_ter have to worry about this + // http requests, so we no longer have to worry about this // being a security hazard //Host *p = g_hostdb.getProxyByIp(s->m_ip); //if ( h && ! p ) return true; diff --git a/Vector.cpp b/Vector.cpp index c4960c25b..2e9764c76 100644 --- a/Vector.cpp +++ b/Vector.cpp @@ -295,7 +295,7 @@ int cmp ( const void *h1 , const void *h2 ) { /* // . TODO: use links->getDomHash(i) not getLinkHash() DOES NOT WORK NO MORE!!! -// . get the 20 int32_test links on this page +// . get the 20 longest links on this page // . do not include links from the same domain // . "links" class must have been set with "setLinkHashes" set to true bool Vector::setLinkHashes ( Links *links , Url *url ) { diff --git a/Weights.cpp b/Weights.cpp index 2c621301e..d878ba0e3 100644 --- a/Weights.cpp +++ b/Weights.cpp @@ -1,5 +1,5 @@ // TODO: pass spam class to weight class and modify weights based on the spam -// then we can just serialize the weight vector in the title rec aint32_t +// then we can just serialize the weight vector in the title rec along // with ptr offsets to the words that we index. carver can just scan // through the word ptrs rather than 1 char at a time. summary generator // can just use the weights to score each sample then. @@ -105,7 +105,7 @@ void Weights::reset() { // Demote the weight of the words and phrases in repeated sentence fragments. // Fixes message boards which include the same msg over again in the reply. // The first title and first header tag have amnesty, those often repeat -// anyway. What about int32_t titles? The demotion will be more the int32_ter +// anyway. What about int32_t titles? The demotion will be more the longer // the repeated fragment. TODO: Fragments have to have a minimum length of // 5 words unless they are surrounded by breaking tags. Hey, but we will demote // those words for being in a small section via RULE #6. @@ -919,7 +919,7 @@ bool Weights::set2 ( Words *words , // wid1 is "mexico" // pid2 is "mexico good" // wid2 is "good" -// . we store sliderParm in titleRec so we can update it aint32_t +// . we store sliderParm in titleRec so we can update it along // with title and header weights on the fly from the spider controls void getWordToPhraseRatioWeights ( int64_t pid1 , // pre phrase int64_t wid1 , diff --git a/Wiki.cpp b/Wiki.cpp index 3f998b19c..abbdcd23b 100644 --- a/Wiki.cpp +++ b/Wiki.cpp @@ -417,7 +417,7 @@ bool Wiki::setPhraseAffinityWeights ( Query *q , float *affWeights , if ( qw[i].m_rawWordId == 0LL ) continue; // loop count loopCount++; - // no int32_ter one title? + // no longer one title? if ( loopCount > 1 && oneTitle && i >lasti ) *oneTitle = false; // count it int32_t count = 0; diff --git a/XmlDoc.cpp b/XmlDoc.cpp index 57a601506..b67da7aab 100644 --- a/XmlDoc.cpp +++ b/XmlDoc.cpp @@ -1580,7 +1580,8 @@ bool XmlDoc::set2 ( char *titleRec , char *upend = m_ubuf + m_ubufSize; // how many XmlDoc::ptr_* members do we have? set "np" to that - int32_t np = ((char *)&size_firstUrl - (char *)&ptr_firstUrl) / 4; + int32_t np = ((char *)&size_firstUrl - (char *)&ptr_firstUrl) ; + np /= sizeof(char *); // point to the first ptr char **pd = (char **)&ptr_firstUrl; @@ -2659,7 +2660,7 @@ bool XmlDoc::indexDoc2 ( ) { } - // make sure our msg4 is no int32_ter in the linked list! + // make sure our msg4 is no longer in the linked list! if (m_msg4Waiting && isInMsg4LinkedList(&m_msg4)){char *xx=NULL;*xx=0;} if ( m_msg4Waiting && g_conf.m_testSpiderEnabled ) @@ -3233,9 +3234,9 @@ int32_t *XmlDoc::getIndexCode2 ( ) { break; if ( k1->m_siteNumInlinks != k2->m_siteNumInlinks ) goto changed; - s1 = k1->ptr_linkText; + s1 = k1->getLinkText(); len1 = k1->size_linkText - 1; // exclude \0 - s2 = k2->ptr_linkText; + s2 = k2->getLinkText(); len2 = k2->size_linkText - 1; // exclude \0 if ( len1 != len2 ) goto changed; @@ -3790,7 +3791,8 @@ bool XmlDoc::setTitleRecBuf ( SafeBuf *tbuf, int64_t docId, int64_t uh48 ){ // data ptr, consider a NULL to mean empty too! char **pd = (char **)&ptr_firstUrl; // how many XmlDoc::ptr_* members do we have? set "np" to that - int32_t np = ((char *)&size_firstUrl - (char *)&ptr_firstUrl) / 4; + int32_t np = ((char *)&size_firstUrl - (char *)&ptr_firstUrl) ; + np /= sizeof(char *); // count up total we need to alloc int32_t need1 = m_headerSize; // clear these @@ -4076,7 +4078,7 @@ SafeBuf *XmlDoc::getTitleRecBuf ( ) { } if ( ! m_utf8ContentValid ) { char *xx=NULL;*xx=0; } if ( ! m_datesValid ) { char *xx=NULL;*xx=0; } - // why do we need valid sections for a titlerec? we no int32_ter user + // why do we need valid sections for a titlerec? we no longer user // ptr_sectiondbData... //if ( ! m_sectionsValid ) { char *xx=NULL;*xx=0; } //if ( ! m_sectionsReplyValid ) { char *xx=NULL;*xx=0; } @@ -6256,7 +6258,7 @@ Sections *XmlDoc::getImpliedSections ( ) { // . This was called for the benefit of Sections::addImpliedSections() // but now getAddresses() which we call below ends up calling // getSimpleDates() which calls m_dates.setPart1() which calls - // m_dates.parseDates() so this is no int32_ter needed i guess. + // m_dates.parseDates() so this is no longer needed i guess. /* if ( ! m_dates.parseDates ( words , DF_FROM_BODY , bits, sections, m_niceness , &m_firstUrl , @@ -6499,7 +6501,7 @@ SectionVotingTable *XmlDoc::getNewSectionVotingTable ( ) { // . only add the date votes, not the taghash/contenthash keys // from the root, since we add those from the root voting table // into m_osvt directly! - // . we no int32_ter have root voting table! + // . we no longer have root voting table! // . this adds keys of the hash of each tag xpath // . and it adds keys of the hash of each tag path PLUS its innerhtml if ( ! ss->addVotes ( &m_nsvt , *tph ) ) return NULL; @@ -7306,7 +7308,7 @@ SectionVotingTable *XmlDoc::getOldSectionVotingTable ( ) { recall = true; } - // no int32_ter bother re-calling, because facebook is way slow... + // no longer bother re-calling, because facebook is way slow... if ( limitSectiondb ) recall = false; // . returns false and sets g_errno on error @@ -7710,17 +7712,17 @@ HashTableX *XmlDoc::getCountTable ( ) { char *p; int32_t plen; // hash link text (was hashPwids()) - p = k-> ptr_linkText; + p = k-> getLinkText(); plen = k->size_linkText - 1; if ( ! verifyUtf8 ( p , plen ) ) { log("xmldoc: bad link text 3 from url=%s for %s", - k->ptr_urlBuf,m_firstUrl.m_url); + k->getUrl(),m_firstUrl.m_url); continue; } if ( ! hashString_ct ( ct , p , plen ) ) return (HashTableX *)NULL; // hash this stuff (was hashPwids()) - p = k-> ptr_surroundingText; + p = k->getSurroundingText(); plen = k->size_surroundingText - 1; if ( ! hashString_ct ( ct , p , plen ) ) return (HashTableX *)NULL; @@ -9083,18 +9085,19 @@ char *XmlDoc::getGigabitQuery ( ) { // add gigabits from link info for ( Inlink *k=NULL ; info1 && (k=info1->getNextInlink(k)) ; ) { // sanity check - char *txt = k->ptr_linkText; + char *txt = k->getLinkText(); int32_t tlen = k->size_linkText; if ( tlen > 0 ) tlen--; if ( ! verifyUtf8 ( txt , tlen ) ) { log("xmldoc: bad link text 0 from url=%s for %s", - k->ptr_urlBuf,m_firstUrl.m_url); + k->getUrl(),m_firstUrl.m_url); continue; } // add those in if (!addGigabits(txt, *d, *langId ) ) return NULL; // add in neighborhoods - if(!addGigabits(k->ptr_surroundingText,*d,*langId))return NULL; + if(!addGigabits(k->getSurroundingText(),*d,*langId)) + return NULL; } // add in gigabits for meta keywords @@ -9888,7 +9891,7 @@ Url **XmlDoc::getRedirUrl() { cu->getUrlLen() , true , // addwww? true ); // strip sessid? - // if it no int32_ter has the session id, force redirect it + // if it no longer has the session id, force redirect it if ( ! gb_strcasestr( tt->getUrl(), "sessionid") && ! gb_strcasestr( tt->getUrl(), "oscsid") ) { m_redirUrlValid = true; @@ -10134,7 +10137,7 @@ Url **XmlDoc::getRedirUrl() { // simplifiedRedir = false; // special hack for nytimes.com. do not consider simplified redirs - // because it uses a cookie aint32_t with redirs to get to the final + // because it uses a cookie along with redirs to get to the final // page. char *dom2 = m_firstUrl.getDomain(); int32_t dlen2 = m_firstUrl.getDomainLen(); @@ -10768,7 +10771,7 @@ XmlDoc **XmlDoc::getRootXmlDoc ( int32_t maxCacheAge ) { } /* -// no int32_ter access Revdb to get the old metalist, now re-compute +// no longer access Revdb to get the old metalist, now re-compute RdbList *XmlDoc::getOldMetaList ( ) { // if valid return that if ( m_oldMetaListValid ) return &m_oldMetaList; @@ -11394,7 +11397,7 @@ int32_t getIsContacty ( Url *url , // skip if not local to site //if ( ! internal ) continue; // get the text - char *txt = k->ptr_linkText; + char *txt = k->getLinkText(); // get length of link text int32_t tlen = k->size_linkText; if ( tlen > 0 ) tlen--; @@ -11403,7 +11406,7 @@ int32_t getIsContacty ( Url *url , // 2+ bytes and breaching the buffer if ( ! verifyUtf8 ( txt , tlen ) ) { log("xmldoc: bad link text 1 from url=%s for %s", - k->ptr_urlBuf,url->m_url); + k->getUrl(),url->m_url); continue; } // convert into words i guess @@ -12504,7 +12507,7 @@ int32_t *XmlDoc::getSiteNumInlinks ( ) { // . well now we use the "ownershipchanged" tag to indicate that //if (tag && age>14*3600*24) valid=false; // . we also expire it periodically to keep the info uptodate - // . the higher quality the site, the int32_ter the expiration date + // . the higher quality the site, the longer the expiration date int32_t ns = 0; int32_t maxAge = 0; int32_t sni = -1; @@ -13274,7 +13277,7 @@ bool *XmlDoc::getIsAllowed ( ) { //char *xx=NULL;*xx=0; m_isAllowed = true; m_isAllowedValid = true; - // since ENOMIME is no int32_ter causing the indexCode + // since ENOMIME is no longer causing the indexCode // to be set, we are getting a core because crawlDelay // is invalid in getNewSpiderReply() m_crawlDelayValid = true; @@ -13447,7 +13450,7 @@ char *XmlDoc::getIsWWWDup ( ) { m_isWWWDupValid = true; return &m_isWWWDup; } - // get the FIRST URL... (no int32_ter current url after redirects) + // get the FIRST URL... (no longer current url after redirects) Url *u = getFirstUrl(); // CurrentUrl(); // if we are NOT a DOMAIN-ONLY url, then no need to do this dup check if ( u->getDomainLen() != u->getHostLen() ) { @@ -13596,7 +13599,7 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) { //} // . now search for some link info for this url/doc - // . this queries the search engine to get linking docIds aint32_t + // . this queries the search engine to get linking docIds along // with their termIds/scores from anchor text and then compiles // it all into one IndexList // . if we have no linkers to this url then we set siteHash, etc. @@ -13778,7 +13781,7 @@ LinkInfo **XmlDoc::getLinkInfo2 ( ) { if ( *od ) oldLinkInfo2 = *(*od)->getLinkInfo2(); // . now search for some link info for this url/doc - // . this queries the search engine to get linking docIds aint32_t + // . this queries the search engine to get linking docIds along // with their termIds/scores from anchor text and then compiles // it all into one IndexList // . if we have no linkers to this url then we set siteHash, etc. @@ -14769,7 +14772,7 @@ SafeBuf *XmlDoc::getDiffbotReply ( ) { // or not enough! (size includes \0) if ( k->size_linkText <= 1 ) continue; // sanity check - char *txt = k->ptr_linkText; + char *txt = k->getLinkText(); int32_t tlen = k->size_linkText; if ( tlen > 0 ) tlen--; // this seems to happen sometimes.. @@ -14777,7 +14780,7 @@ SafeBuf *XmlDoc::getDiffbotReply ( ) { // if anchor text has \0 skip it if ( gbstrlen(txt) != tlen ) continue; // or if surrounding text has \0 skip as well - char *surStr = k->ptr_surroundingText; + char *surStr = k->getSurroundingText(); int32_t surLen = k->size_surroundingText; if ( surLen > 0 ) surLen--; if ( surStr && gbstrlen(surStr) != surLen ) continue; @@ -14793,7 +14796,7 @@ SafeBuf *XmlDoc::getDiffbotReply ( ) { if ( ! headers.safePrintf("X-referring-url: ") ) return NULL; // do not include the terminating \0, so -1 - if ( ! headers.safeMemcpy(k->ptr_urlBuf , k->size_urlBuf-1 )) + if ( ! headers.safeMemcpy(k->getUrl() , k->size_urlBuf-1 )) return NULL; // and link text if ( ! headers.safePrintf("\r\nX-anchor-text: ") ) @@ -15827,7 +15830,7 @@ HttpMime *XmlDoc::getMime () { // . 304 is not modified since // . >= 300 should only happen if redirect chain was too int32_t to follow //int32_t httpStatus = m_mime.getHttpStatus(); - // sanity check, these must be reserved! no int32_ter, we have + // sanity check, these must be reserved! no longer, we have // a separate m_httpStatus in the SpiderReply class now //if ( mstrerror(httpStatus) ) { char *xx=NULL;*xx=0; } // sanity check @@ -16624,7 +16627,7 @@ char *XmlDoc::getIsBinary ( ) { // . the fctypes.cpp isBinary array takes into account // that people mix windows 1254 characters into // latin-1. windows 1254 is a superset of latin-1. - // so the more common quotes and dashes are no int32_ter + // so the more common quotes and dashes are no longer // counted as binary characters, but some of the // rarer ones are! however, the "diff" count // contraint helps us make up for that. @@ -17745,7 +17748,7 @@ char **XmlDoc::getUtf8Content ( ) { // know they are xml tags. because stuff like <br> will // become
and will be within its xml tag like // or . - // MDW: 9/28/2014. no int32_ter do this since i added hashXmlFields(). + // MDW: 9/28/2014. no longer do this since i added hashXmlFields(). /* if ( m_contentType == CT_XML ) { // count the xml tags @@ -17819,7 +17822,7 @@ char **XmlDoc::getUtf8Content ( ) { // utf8 chars so that Xml::set(), etc. still work properly and don't // add any more html tags than it should // . this will decode in place - // . MDW: 9/28/2014. no int32_ter do for xml docs since i added + // . MDW: 9/28/2014. no longer do for xml docs since i added // hashXmlFields() int32_t n = m_expandedUtf8ContentSize - 1; if ( m_contentType != CT_XML ) @@ -17837,7 +17840,7 @@ char **XmlDoc::getUtf8Content ( ) { // now rss has crap in it like "&nbsp;" so we have to do another // decoding pass - // . MDW: 9/28/2014. no int32_ter do for xml docs since i added + // . MDW: 9/28/2014. no longer do for xml docs since i added // hashXmlFields() // if ( m_contentType == CT_XML ) // isRSSExt ) // n = htmlDecode(m_expandedUtf8Content,//ptr_utf8Content, @@ -18543,7 +18546,7 @@ TagRec ***XmlDoc::getOutlinkTagRecVector () { // set to those m_fakeTagRec.reset(); // just make a bunch ptr to empty tag rec - int32_t need = links->m_numLinks * 4; + int32_t need = links->m_numLinks * sizeof(TagRec *); if ( ! m_fakeTagRecPtrBuf.reserve ( need ) ) return NULL; // make them all point to the fake empty tag rec TagRec **grv = (TagRec **)m_fakeTagRecPtrBuf.getBufStart(); @@ -19297,7 +19300,7 @@ char *XmlDoc::getIsSpam() { // . "u" must be NORMALIZED. i.e. start with http:// or https:// etc. // . we call this on outlinks as well -// . we no int32_ter look at the old and newip to determine ownership change, +// . we no longer look at the old and newip to determine ownership change, // because that is not reliable enough // . we now maybe rely on a major change to the site root page... bool XmlDoc::isSpam ( char *u , @@ -20453,7 +20456,7 @@ bool XmlDoc::verifyMetaList ( char *p , char *pend , bool forDelete ) { // must always be negative if deleteing // spiderdb is exempt because we add a spiderreply that is // positive and a spiderdoc - // no, this is no int32_ter the case because we add spider + // no, this is no longer the case because we add spider // replies to the index when deleting or rejecting a doc. //if ( m_deleteFromIndex && ! del && rdbId != RDB_SPIDERDB) { // char *xx=NULL;*xx=0; } @@ -21258,7 +21261,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) { ksr.m_avoidSpiderLinks = 1; // avoid EDOCUNCHANGED ksr.m_ignoreDocUnchangedError = 1; - // no int32_ter docid based we set it to parentUrl + // no longer docid based we set it to parentUrl ksr.m_urlIsDocId = 0; // but consider it a manual add. this should already be set. ksr.m_isPageReindex = 1; @@ -21983,7 +21986,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) { bool addSectionVotes = false; if ( nd ) addSectionVotes = true; if ( ! m_useSectiondb ) addSectionVotes = false; - // to save disk space no int32_ter add the roots! nto only saves sectiondb + // to save disk space no longer add the roots! nto only saves sectiondb // but also saves space in revdb //if ( nd && *isRoot ) addSectionVotes = true; if ( addSectionVotes ) { @@ -22754,14 +22757,14 @@ char *XmlDoc::getMetaList ( bool forDelete ) { // ADD NOSPLIT INDEXDB/DATEDB TERMS // /* - we added these now in hashAll() to tt1, no int32_ter ns1 since we + we added these now in hashAll() to tt1, no longer ns1 since we have the sharded by termid bit in the actual posdb key now so Rebalance.cpp works setStatus ( "adding posdb shardByTermId terms"); // checkpoint saved = m_p; - // no int32_ter anything special now since the + // no longer anything special now since the // Posdb::isShardedyTermId() bit // is in the key now so Rebalance.cpp can work if ( m_usePosdb && ! addTable144 ( &ns1 )) return NULL; @@ -23220,7 +23223,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) { saved = m_p; /* - See comment under DOLEDB above! this approach is no int32_ter used. + See comment under DOLEDB above! this approach is no longer used. // . remove from doledb if we had a valid key // . DO THIS BEFORE adding the SpiderReply since @@ -23281,7 +23284,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) { // m_urlBuf and the associated orderTree,ipTree, etc. and now // since we are un-doling (undoling) it we need to re-add and this // is the easiest way. it really was never removed from spiderdb - // but it will no int32_ter be in the spider's cache since we delete + // but it will no longer be in the spider's cache since we delete // it from there when we add it to doledb. so this is just a quick // way of getting it back into the cache. // now, we add this first since now Rdb.cpp calls evaluateAllReqeusts() @@ -23358,9 +23361,10 @@ char *XmlDoc::getMetaList ( bool forDelete ) { //dt12.set ( 12,1,2048,dbuf12,30000,false,m_niceness); //dt16.set ( 16,1,2048,dbuf16,40000,false,m_niceness); HashTableX dt8; - char dbuf8[30000]; + char dbuf8[34900]; // value is the ptr to the rdbId/key in the oldList - dt8.set ( 8,4,2048,dbuf8,30000,false,m_niceness,"dt8-tab"); + dt8.set ( 8,sizeof(char *),2048,dbuf8,34900, + false,m_niceness,"dt8-tab"); // just for linkdb: //HashTableX dt9; //char dbuf9[30000]; @@ -26893,7 +26897,7 @@ bool XmlDoc::hashLinks ( HashTableX *tt ) { // breathe QUICKPOLL(m_niceness); // . the score depends on some factors: - // . NOTE: these are no int32_ter valid! (see score bitmap above) + // . NOTE: these are no longer valid! (see score bitmap above) // . 4 --> if link has different domain AND has link text // . 3 --> if link has same domain AND has link text // . 2 --> if link has different domain AND no link text @@ -27671,11 +27675,11 @@ bool XmlDoc::hashIncomingLinkText ( HashTableX *tt , int32_t tlen = k->size_linkText; if ( tlen > 0 ) tlen--; // get the text - char *txt = k->ptr_linkText; + char *txt = k->getLinkText(); // sanity check if ( ! verifyUtf8 ( txt , tlen ) ) { log("xmldoc: bad link text 2 from url=%s for %s", - k->ptr_urlBuf,m_firstUrl.m_url); + k->getUrl(),m_firstUrl.m_url); continue; } // if it is anomalous, set this, we don't @@ -27771,7 +27775,7 @@ bool XmlDoc::hashNeighborhoods ( HashTableX *tt ) { if ( (k->m_ip&0x0000ffff)==(m_ip&0x0000ffff) ) goto loop; // get the left and right texts and hash both - char *s = k->ptr_surroundingText; + char *s = k->getSurroundingText(); if ( ! s || k->size_surroundingText <= 1 ) goto loop; //int32_t inlinks = *getSiteNumInlinks(); @@ -28667,7 +28671,7 @@ Url *XmlDoc::getBaseUrl ( ) { if ( ! xml || xml == (Xml *)-1 ) return (Url *)xml; Url *cu = getCurrentUrl(); if ( ! cu || cu == (void *)-1 ) return (Url *)cu; - // no int32_ter set addWWW to true since tmblr.co has an IP but + // no longer set addWWW to true since tmblr.co has an IP but // www.tmblr.co does not m_baseUrl.set ( cu , false ); // addWWW = true // look for base url @@ -29574,7 +29578,7 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) { //reply->m_proximityScore = s->getProximityScore(); reply-> ptr_displaySum = hsum;//s->getSummary(); reply->size_displaySum = hsumLen+1;//sumSize;//s->getSummaryLen - // this is unhighlighted for deduping, and it might be int32_ter + // this is unhighlighted for deduping, and it might be longer // . seems like we are not using this for deduping but using // the gigabit vector in Msg40.cpp, so take out for now //reply-> ptr_dedupSum = s->m_summary; @@ -30938,7 +30942,7 @@ char *XmlDoc::getHighlightedSummary ( ) { // // This will get samples surrounding all the query terms for purposes // of gigabits generation. We don't just generate gigabits from the -// WHOLE document because it takes much int32_ter?? is that still true? +// WHOLE document because it takes much longer?? is that still true? // We assume that the first call to getTopLines() above set // matches/numMatches. We use those arrays to // skip directly to just the query terms in the document and save time. @@ -31959,7 +31963,7 @@ bool XmlDoc::hashSingleTerm ( char *s , // HACK: mangle the key if its a gbsitehash:xxxx term // used for doing "facets" like stuff on section xpaths. // - // no int32_ter do this because we just hash the term + // no longer do this because we just hash the term // gbxpathsitehash1234567 where 1234567 is that hash. // but // @@ -33330,7 +33334,7 @@ char *XmlDoc::getIsErrorPage ( ) { // default LinkInfo *li = info1; - //we have to be more sophisticated with int32_ter pages because they + //we have to be more sophisticated with longer pages because they //are could actually be talking about an error message. //if(xml->getContentLen() > 4096) return false; @@ -33379,7 +33383,7 @@ char *XmlDoc::getIsErrorPage ( ) { //if we can index some link text from the page, then do it //if(nli > 5) return false; //for ( int32_t i = 0 ; i < nli ; i++ ) { - s = k->ptr_linkText; + s = k->getLinkText(); len2 = k->size_linkText - 1; // exclude \0 //if(!s) break; //allow error msg to contain link text or vice versa @@ -35333,13 +35337,13 @@ SafeBuf *XmlDoc::getInlineSectionVotingBuf ( ) { // store mime first then content if ( ! m_utf8ContentValid ) { char *xx=NULL;*xx=0; } - // we no int32_ter use this through a proxy, so take this out + // we no longer use this through a proxy, so take this out //sb->safeMemcpy ( m_httpReply , mime->getMimeLen() ); // but hack the Content-Length: field to something alien // because we markup the html and the lenght will be different... //sb->nullTerm(); - // we no int32_ter use this through a proxy so take this out + // we no longer use this through a proxy so take this out //char *cl = strstr(sb->getBufStart(),"\nContent-Length:"); //if ( cl ) cl[1] = 'Z'; @@ -36421,11 +36425,11 @@ char **XmlDoc::getTitleBuf ( ) { int32_t tlen = k->size_linkText; if ( tlen > 0 ) tlen--; // get the text - char *txt = k->ptr_linkText; + char *txt = k->getLinkText(); // skip corrupted if ( ! verifyUtf8 ( txt , tlen ) ) { log("xmldoc: bad link text 4 from url=%s for %s", - k->ptr_urlBuf,m_firstUrl.m_url); + k->getUrl(),m_firstUrl.m_url); continue; } // store these @@ -38098,7 +38102,7 @@ float g_wtab[30][30]; // wid1 is "mexico" // pid2 is "mexico good" // wid2 is "good" -// . we store sliderParm in titleRec so we can update it aint32_t +// . we store sliderParm in titleRec so we can update it along // with title and header weights on the fly from the spider controls void getWordToPhraseRatioWeights ( int64_t pid1 , // pre phrase int64_t wid1 , @@ -39687,21 +39691,21 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) { if ( k->size_linkText <= 1 ) continue; // set Url Url u; - u.set ( k->ptr_urlBuf , k->size_urlBuf ); + u.set ( k->getUrl() , k->size_urlBuf ); // do not allow anomalous link text to match query //if ( k->m_isAnomaly ) continue; - char *p = k-> ptr_linkText; + char *p = k-> getLinkText(); int32_t plen = k->size_linkText - 1; if ( ! verifyUtf8 ( p , plen ) ) { log("title: set4 bad link text from url=%s", - k->ptr_urlBuf); + k->getUrl()); continue; } // debug - //log("seo: counttable for link text '%s'",k->ptr_linkText); + //log("seo: counttable for link text '%s'",k->getLinkText()); // now the words. Words ww2; - if ( ! ww2.set ( k->ptr_linkText , + if ( ! ww2.set ( k->getLinkText() , k->size_linkText-1, // len TITLEREC_CURRENT_VERSION , true , // computeIds @@ -39733,19 +39737,19 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) { if ( k->size_linkText <= 1 ) continue; // set Url Url u; - u.set ( k->ptr_urlBuf , k->size_urlBuf ); + u.set ( k->getUrl() , k->size_urlBuf ); // do not allow anomalous link text to match query //if ( k->m_isAnomaly ) continue; - char *p = k-> ptr_linkText; + char *p = k-> getLinkText(); int32_t plen = k->size_linkText - 1; if ( ! verifyUtf8 ( p , plen ) ) { log("title: set4 bad link text from url=%s", - k->ptr_urlBuf); + k->getUrl()); continue; } // now the words. Words ww2; - if ( ! ww2.set ( k->ptr_linkText , + if ( ! ww2.set ( k->getLinkText() , k->size_linkText-1, // len TITLEREC_CURRENT_VERSION , true , // computeIds @@ -40375,7 +40379,7 @@ bool XmlDoc::checkCachedb ( ) { QUICKPOLL(m_niceness); int32_t qkOff = *(int32_t *)p; QueryLink *qr = (QueryRel *)(base+qkOff); - // no, int32_ter, it is more complicated because + // no, longer, it is more complicated because // if m_uniqueRound scoring addition //if ( lastqr && // lastqr->m_totalRelatedQueryImportance < @@ -42423,7 +42427,7 @@ SafeBuf *XmlDoc::getRelatedDocIds ( ) { QUICKPOLL(m_niceness); // limit to top MAX_RELATED_DOCIDS related docids - // will take int32_ter to get titles/urls and related queries the + // will take longer to get titles/urls and related queries the // higher this number is, but we will have more competitor backlinks // and terms etc. int32_t maxLen = sizeof(RelatedDocId) * MAX_RELATED_DOCIDS; @@ -42601,7 +42605,7 @@ SafeBuf *XmlDoc::getRelatedDocIdsScored ( ) { QUICKPOLL(m_niceness); // limit to top MAX_RELATED_DOCIDS related docids - // will take int32_ter to get titles/urls and related queries the + // will take longer to get titles/urls and related queries the // higher this number is, but we will have more competitor // backlinks and terms etc. less space in cachedb too! int32_t maxLen = MAX_RELATED_DOCIDS * sizeof(RelatedDocId); @@ -43324,7 +43328,7 @@ void XmlDoc::gotMsg98Reply ( UdpSlot *slot ) { // . THIS OVERWRITES the g_qbuf offset that was in there!!! qk->m_queryStringOffset = stringOff; // to indicate that this QueryLink::m_queryStringOffset is now - // an offset into m_relatedQueryStringBuf and no int32_ter an + // an offset into m_relatedQueryStringBuf and no longer an // offset into g_qbuf of the specific hostid, we set hostid // to -1 qk->m_queryHostId = -1; @@ -43821,7 +43825,7 @@ bool XmlDoc::sendBin ( int32_t i ) { // . THEN it can create a 'gbdocid:xxxx | ' query which // it will send to a host in the network. // . it will try to keep each host in the network answering 5 such queries -// at any one time. bins are no int32_ter used. +// at any one time. bins are no longer used. // . we need to implement heavy termlist caching remotely and locally to // ensure optimal speed // . returns false if blocked, true otherwise @@ -44023,7 +44027,7 @@ bool XmlDoc::scoreDocIdRestrictedQueries ( Msg99Reply **replyPtrs , return false; } } - // i guess no int32_ter out + // i guess no longer out if ( linkPtrs && m_newxd2->m_loaded ) m_newxd2Blocked = false; @@ -44073,7 +44077,7 @@ bool XmlDoc::scoreDocIdRestrictedQueries ( Msg99Reply **replyPtrs , goto sendLoop; } } - // i guess no int32_ter out + // i guess no longer out if ( linkPtrs ) { //log("debug: newxd2 UNblocked in termlistbuf"); m_newxd2Blocked = false; @@ -44381,7 +44385,7 @@ void XmlDoc::gotMsg3fReply ( Bin *bin ) { // Multicast *mcast ) { ,qe->m_topSERPScore // of a docid slice on 1 host ); // - // no int32_ter used queryrel! + // no longer used queryrel! // // if we are scoring QueryLinks then we add a QueryRel //QueryRel qr; @@ -44430,7 +44434,7 @@ void XmlDoc::gotMsg3fReply ( Bin *bin ) { // Multicast *mcast ) { m_masterLoop ( m_masterState ); // if not done, just return... otherwise we double enter - // scoreDocIdRestrictedQueries() aint32_t with it's call to + // scoreDocIdRestrictedQueries() along with it's call to // getTermListBuf()... and all hell breaks loose return; } @@ -46553,7 +46557,7 @@ SafeBuf *XmlDoc::getRecommendedLinksBuf ( ) { int64_t docId = g_linkdb.getLinkerDocId_uk ( &key ); //int32_t discovered = g_linkdb.getDiscoveryDate_uk(&key); - // skip if no int32_ter there on page, we keep these + // skip if no longer there on page, we keep these // only to graph lost links over time int32_t lostDate = g_linkdb.getLostDate_uk ( &key ); if ( lostDate ) @@ -46669,7 +46673,7 @@ SafeBuf *XmlDoc::getRecommendedLinksBuf ( ) { // free the mem and the handylist now that we've processed them msg0->reset(); } - // no int32_ter need the msg0s and linkdb lists (Msg0::m_handyLists) + // no longer need the msg0s and linkdb lists (Msg0::m_handyLists) m_tmpMsg0Buf.purge(); diff --git a/XmlNode.cpp b/XmlNode.cpp index dea8a3927..3668eb28e 100644 --- a/XmlNode.cpp +++ b/XmlNode.cpp @@ -157,7 +157,7 @@ NodeType g_nodes[] = { {"STOP" , 0, 1, 1, 0,0, TAG_STOP ,0}, // . i added these tags for faisal, but don't really need them // since our XML tag condition handles this case - // . we can no int32_ter treat as a generic XML tags since faisal wanted + // . we can no longer treat as a generic XML tags since faisal wanted // the strip=2 option {"SPAN" , 1, 0, 1, 2,1, TAG_SPAN ,0}, // not breaking! {"LEGEND" , 1, 1, 1, 2,1, TAG_LEGEND ,0}, diff --git a/animate.cpp b/animate.cpp index 3e011d7ca..82b2d03ae 100644 --- a/animate.cpp +++ b/animate.cpp @@ -519,7 +519,7 @@ void main2 ( char *dirname ) { } // if ANY one of our brothers is too close to the current time, - // then wait a little int32_ter + // then wait a little longer if ( abort ) continue; // if we increased our range, keep going diff --git a/blaster2.cpp b/blaster2.cpp index dc93d33d3..e9b718671 100644 --- a/blaster2.cpp +++ b/blaster2.cpp @@ -355,7 +355,7 @@ void startSpidering ( ) { } void gotDocWrapper ( void *state , TcpSocket *s ) { - // no int32_ter launched + // no longer launched s_launched--; char* url = (char*)state; // bail if got cut off diff --git a/dmozparse.cpp b/dmozparse.cpp index b90bfce86..73865be84 100644 --- a/dmozparse.cpp +++ b/dmozparse.cpp @@ -178,7 +178,7 @@ int32_t rdfParse ( char *tagName ) { do { int32_t matchPos = 0; // move to the next tag - // . quotes are no int32_ter escaped out in the newer + // . quotes are no longer escaped out in the newer // dmoz files in oct 2013... so take that out. i do // this < is < though.. perhaps only check for // quotes when in a tag? diff --git a/fastIndexTable.cpp b/fastIndexTable.cpp index 694f75921..191e333c4 100644 --- a/fastIndexTable.cpp +++ b/fastIndexTable.cpp @@ -440,7 +440,7 @@ bool IndexTable::setTopDocIds ( int32_t topn , bool forcePhrases ) { // get new lowest parms getLowestTopDocId ( &minBitScore , &minScore , &minDocId ); // timing debug - // this chunk of code takes the int32_test (1-4 seconds), the bubble sort + // this chunk of code takes the longest (1-4 seconds), the bubble sort // is only 15ms log("setTopDocIds: phase 1 took %"INT64" ms", gettimeofdayInMilliseconds() - startTime ); diff --git a/fctypes.cpp b/fctypes.cpp index 969db4e97..b00a034a2 100644 --- a/fctypes.cpp +++ b/fctypes.cpp @@ -771,7 +771,7 @@ const char g_map_is_vowel[] = { 0,0,0,0,0,0,0,0, // 176 0,0,0,0,0,0,0,0, - // we are no int32_ter necessarily latin-1!! + // we are no longer necessarily latin-1!! 0,0,0,0,0,0,0,0, // 192 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, // 208 @@ -1251,7 +1251,7 @@ int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial , // . make < and > special so Xml::set() still works // . and make & special so we do not screw up summaries if ( doSpecial ) { - // no int32_ter use this! + // no longer use this! //char *xx=NULL;*xx=0; if ( c == '<' ) { // using [ and ] looks bad in event titles... diff --git a/iconv.h b/iconv.h index a91b55465..6fa0de8be 100644 --- a/iconv.h +++ b/iconv.h @@ -12,7 +12,7 @@ Library General Public License for more details. You should have received a copy of the GNU Library General Public - License aint32_t with the GNU LIBICONV Library; see the file COPYING.LIB. + License along with the GNU LIBICONV Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ diff --git a/iostream.h b/iostream.h index a8f37724b..85e7a3e2f 100644 --- a/iostream.h +++ b/iostream.h @@ -13,7 +13,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -aint32_t with this library; see the file COPYING. If not, write to the Free +along with this library; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As a special exception, if you link this library with files diff --git a/linkspam.cpp b/linkspam.cpp index 356107907..534fc2b3c 100644 --- a/linkspam.cpp +++ b/linkspam.cpp @@ -1314,7 +1314,7 @@ bool isLinkChain ( Xml *xml , Url *linker , Url *linkee , int32_t linkNode , // if we hit a forward tag and inLink was false... we had // no corresponding back tag, so disconsider any text if ( ! inLink ) rightText = false; - // no int32_ter in an tag + // no longer in an tag inLink = false; // ok, get the url from this anchor tag diff --git a/main.cpp b/main.cpp index 9fc80714d..d6ca9483d 100644 --- a/main.cpp +++ b/main.cpp @@ -752,7 +752,7 @@ int main2 ( int argc , char *argv[] ) { //"gentfndb [hostId]\n\tgenerate missing tfndb. " //"titledb disk dumps and tight merges are no " - //"int32_ter necessary. Also " + //"longer necessary. Also " //"generates tfndb from spiderdb. tfndb-saved.dat " //"and all tfndb* files in the collection subdir " //"must not exist, so move them to a temp dir.\n\n" @@ -5744,7 +5744,7 @@ bool mainShutdown2 ( ) { // . if easydown is true, we broadcast to all others and wait to complete // the necessary transactions in each udpServer bool mainShutdown ( bool urgent ) { - // no int32_ter allow threads to do this + // no longer allow threads to do this if ( g_threads.amThread() ) return true; // hack for now //log("FIX THIS HACK"); @@ -5847,7 +5847,7 @@ bool doneShutdownServer ( ) { // return false if blocked, true otherwise bool closeAll ( void *state , void (* callback)(void *state) ) { // TODO: why is this called like 100x per second when a merge is - // going on? why don't we sleep int32_ter in between? + // going on? why don't we sleep longer in between? g_tagdb.getRdb()->close(state,callback,s_urgent,true); g_catdb.getRdb()->close(state,callback,s_urgent,true); g_indexdb.getRdb()->close(state,callback,s_urgent,true); @@ -11660,7 +11660,7 @@ void dumpSectiondb(char *coll,int32_t startFileNum,int32_t numFiles, if ( k->n1 == lastk.n1 && k->n0 < lastk.n0 ) { char *xx=NULL;*xx=0; } } - // no int32_ter a first key + // no longer a first key firstKey = false; // copy it memcpy ( &lastk , k , sizeof(key128_t) ); @@ -11786,7 +11786,7 @@ void dumpRevdb(char *coll,int32_t startFileNum,int32_t numFiles, bool includeTre if ( k->n1 == lastk.n1 && k->n0 < lastk.n0 ) { char *xx=NULL;*xx=0; } } - // no int32_ter a first key + // no longer a first key firstKey = false; // copy it memcpy ( &lastk , k , sizeof(key_t) ); diff --git a/streambuf.h b/streambuf.h index 35317ed1e..c29178c31 100644 --- a/streambuf.h +++ b/streambuf.h @@ -13,7 +13,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -aint32_t with this library; see the file COPYING. If not, write to the Free +along with this library; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As a special exception, if you link this library with files