From 1f40841432583b718d761ead2e59fa04358947f9 Mon Sep 17 00:00:00 2001 From: Mihai Preda Date: Thu, 25 Oct 2018 11:03:21 +0200 Subject: [PATCH] drop savefile load old version --- Gpu.cpp | 66 ++++++++++++++++++++++++++------------------ Gpu.h | 10 +++---- checkpoint.cpp | 75 +------------------------------------------------- checkpoint.h | 2 -- 4 files changed, 45 insertions(+), 108 deletions(-) diff --git a/Gpu.cpp b/Gpu.cpp index 4d47dabb..ca5b2b18 100644 --- a/Gpu.cpp +++ b/Gpu.cpp @@ -325,8 +325,19 @@ bool Gpu::doCheck(int blockSize) { return equalNotZero(bufCheck, bufAux); } -u32 Gpu::dataLoopAcc(u32 kBegin, u32 kEnd, const unordered_set &kset) { +static u32 countOnBits(const vector &bits) { + u32 n = 0; + for (bool b : bits) { n += b; } + return n; +} + +u32 Gpu::dataLoopAcc(u32 kBegin, u32 kEnd, const vector &kset) { assert(kEnd > kBegin); + vector accs(kset.begin() + kBegin, kset.begin() + kEnd); + dataLoopAcc(accs); + return countOnBits(accs); +} +/* vector accs; u32 nAcc = 0; for (u32 k = kBegin; k < kEnd; ++k) { @@ -338,11 +349,7 @@ u32 Gpu::dataLoopAcc(u32 kBegin, u32 kEnd, const unordered_set &kset) { modSqLoopAcc(bufData, accs); return nAcc; } - -void Gpu::dataLoopMul(const vector &muls) { modSqLoopMul(bufData, muls); } - -u64 Gpu::dataResidue() { return bufResidue(bufData); } -u64 Gpu::checkResidue() { return bufResidue(bufCheck); } +*/ void Gpu::logTimeKernels() { ::logTimeKernels({&carryFused, &carryFusedMul, &fftP, &fftW, &fftH, &fftMiddleIn, &fftMiddleOut, @@ -481,11 +488,11 @@ u64 Gpu::bufResidue(Buffer &buf) { return residueFromRaw(E, N, readBuf); } -static string makeLogStr(int E, int k, u64 res, const StatsInfo &info, u32 nIters = 0) { - int end = nIters ? nIters : (((E - 1) / 1000 + 1) * 1000); - float percent = 100 / float(end); +static string makeLogStr(int E, int k, u64 res, const StatsInfo &info, u32 nIters) { + // int end = nIters ? nIters : (((E - 1) / 1000 + 1) * 1000); + float percent = 100 / float(nIters); - int etaMins = (end - k) * info.mean * (1 / 60000.f) + .5f; + int etaMins = (nIters - k) * info.mean * (1 / 60000.f) + .5f; int days = etaMins / (24 * 60); int hours = etaMins / 60 % 24; int mins = etaMins % 60; @@ -498,16 +505,16 @@ static string makeLogStr(int E, int k, u64 res, const StatsInfo &info, u32 nIter return buf; } -static void doLog(int E, int k, long timeCheck, u64 res, bool checkOK, Stats &stats) { +static void doLog(int E, int k, long timeCheck, u64 res, bool checkOK, Stats &stats, u32 nIters) { log("%s %s (check %.2fs)\n", checkOK ? "OK" : "EE", - makeLogStr(E, k, res, stats.getStats()).c_str(), + makeLogStr(E, k, res, stats.getStats(), nIters).c_str(), timeCheck * .001f); stats.reset(); } -static void doSmallLog(int E, int k, u64 res, Stats &stats) { - log(" %s\n", makeLogStr(E, k, res, stats.getStats()).c_str()); +static void doSmallLog(int E, int k, u64 res, Stats &stats, u32 nIters, u32 nAcc) { + log(" %s\n", makeLogStr(E, k, res, stats.getStats(), nIters).c_str()); stats.reset(); } @@ -616,8 +623,8 @@ pair, vector> Gpu::seedPRP(u32 E, u32 B1) { return make_pair(check, base); } -static vector kselect(u32 E, u32 B1, u32 B2) { - if (!B1) { return vector(); } +static vector kselect(u32 E, u32 B1, u32 B2) { + if (!B1) { return vector(E); } Primes primes(B2 + 1); vector covered(E); @@ -636,13 +643,15 @@ static vector kselect(u32 E, u32 B1, u32 B2) { } } } - + return on; + /* vector ret; for (u32 k = 0; k < E; ++k) { if (on[k]) { ret.push_back(k); } } return ret; + */ } -static auto asSet(const vector &v) { return unordered_set(v.begin(), v.end()); } +// static auto asSet(const vector &v) { return unordered_set(v.begin(), v.end()); } PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { u32 N = this->getFFTSize(); @@ -650,7 +659,7 @@ PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { if (B1 != 0 && B2 == 0) { B2 = E; } log("PRP M(%d), FFT %dK, %.2f bits/word, B1 %u, B2 %u\n", E, N/1024, E / float(N), B1, B2); - future> ksetFuture; + future> ksetFuture; if (B1 != 0) { ksetFuture = async(launch::async, kselect, E, B1, B2); } if (!PRPState::exists(E)) { @@ -679,14 +688,14 @@ PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { int startK = k; - unordered_set kset; + vector kset; if (ksetFuture.valid()) { log("Please wait for P-1 trial points selection..\n"); ksetFuture.wait(); - kset = asSet(ksetFuture.get()); + kset = ksetFuture.get(); } - log("Selected %u P-1 trial points\n", u32(kset.size())); + log("Selected %u P-1 trial points\n", countOnBits(kset)); Signal signal; Stats stats; @@ -699,10 +708,12 @@ PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { int nGcdAcc = 0; u64 finalRes64 = 0; + u32 nTotalIters = ((kEnd - 1) / blockSize + 1) * blockSize; while (true) { assert(k % blockSize == 0); + u32 nAcc = 0; if (k < kEnd && k + blockSize >= kEnd) { - nGcdAcc += this->dataLoopAcc(k, kEnd, kset); + nAcc = dataLoopAcc(k, kEnd, kset); auto words = this->roundtripData(); finalRes64 = residue(words); isPrime = (words == base || words == bitNeg(base)); @@ -711,10 +722,11 @@ PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { int itersLeft = blockSize - (kEnd - k); // assert(itersLeft > 0); - if (itersLeft > 0) { nGcdAcc += this->dataLoopAcc(kEnd, kEnd + itersLeft, kset); } + if (itersLeft > 0) { nAcc += dataLoopAcc(kEnd, kEnd + itersLeft, kset); } } else { - nGcdAcc += this->dataLoopAcc(k, k + blockSize, kset); + nAcc = dataLoopAcc(k, k + blockSize, kset); } + nGcdAcc += nAcc; k += blockSize; u64 res64 = this->dataResidue(); @@ -740,7 +752,7 @@ PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { if (!doCheck) { this->updateCheck(); if (k % 10000 == 0) { - doSmallLog(E, k, res64, stats); + doSmallLog(E, k, res64, stats, nAcc, nTotalIters); if (args.timeKernels) { this->logTimeKernels(); } } continue; @@ -752,7 +764,7 @@ PRPResult Gpu::isPrimePRP(u32 E, const Args &args, u32 B1, u32 B2) { bool doSave = (k < kEnd || k >= kEnd + blockSize) && ok; if (doSave) { PRPState{k, B1, blockSize, res64, check, base}.save(E); } - doLog(E, k, timer.deltaMillis(), res64, ok, stats); + doLog(E, k, timer.deltaMillis(), res64, ok, stats, nTotalIters); bool wantGCD = ok && (nGcdAcc > 10000 || doStop); if (wantGCD) { diff --git a/Gpu.h b/Gpu.h index 5e4fdfaf..78aefed1 100644 --- a/Gpu.h +++ b/Gpu.h @@ -100,15 +100,15 @@ class Gpu { vector writeData(const vector &v); vector writeCheck(const vector &v); - u64 dataResidue(); - u64 checkResidue(); + u64 dataResidue() { return bufResidue(bufData); } + u64 checkResidue() { return bufResidue(bufCheck); } bool doCheck(int blockSize); void updateCheck(); - // returns nb. of Ks selected for GCD accumulation. - u32 dataLoopAcc(u32 kBegin, u32 kEnd, const unordered_set &kset); - void dataLoopMul(const vector &muls); + void dataLoopMul(const vector &muls) { modSqLoopMul(bufData, muls); } + void dataLoopAcc(const vector &accs) { modSqLoopAcc(bufData, accs); } + u32 dataLoopAcc(u32 begin, u32 end, const vector &kset); void finish(); diff --git a/checkpoint.cpp b/checkpoint.cpp index e02cef29..ba81cc34 100644 --- a/checkpoint.cpp +++ b/checkpoint.cpp @@ -36,72 +36,6 @@ static bool read(FILE *fi, u32 nWords, vector *v) { return fread(v->data(), nWords * sizeof(u32), 1, fi); } -struct PRPState_v5 { - static constexpr const char *HEADER_R = R"(OWL 5 -Comment: %255[^ -] -Type: PRP -Exponent: %u -Iteration: %u -PRP-block-size: %u -Residue-64: 0x%016llx -Errors: %d -End-of-header: -\0)"; - - u32 k; - u32 blockSize; - int nErrors; - u64 res64; - vector check; - - bool load(u32 E) { - u32 nWords = (E - 1)/32 + 1; - u32 nBytes = (E - 1)/8 + 1; - - auto fi{openRead(fileName(E, ""))}; - if (!fi) { return false; } - - char buf[256]; - u32 fileE; - if (!(fscanf(fi.get(), HEADER_R, buf, &fileE, &k, &blockSize, &res64, &nErrors) == 6)) { return false; } - assert(E == fileE); - check = vector(nWords); - if (!fread(check.data(), nBytes, 1, fi.get())) { return false; } - return true; - } -}; - -bool PRPState::load_v5(u32 E) { - PRPState_v5 v5; - if (!v5.load(E)) { return false; } - - k = v5.k; - blockSize = v5.blockSize; - // nErrors = v5.nErrors; - res64 = v5.res64; - check = move(v5.check); - return true; -} - -bool PRPState::load_v6(u32 E) { - const char *HEADER = "OWL PRP 6 %u %u %u %016llx %u\n"; - u32 nWords = (E - 1) / 32 + 1; - if (auto fi{openRead(fileName(E, ".prp"))}) { - char line[256]; - u32 fileE; - u32 nErrors; - bool ok = fgets(line, sizeof(line), fi.get()) - && sscanf(line, HEADER, &fileE, &k, &blockSize, &res64, &nErrors) == 5 - && read(fi.get(), nWords, &check); - if (ok) { - assert(E == fileE); - return true; - } - } - return load_v5(E); -} - void PRPState::loadInt(u32 E, u32 wantB1, u32 iniBlockSize) { u32 nWords = (E - 1) / 32 + 1; string name = fileName(E, SUFFIX); @@ -127,13 +61,6 @@ void PRPState::loadInt(u32 E, u32 wantB1, u32 iniBlockSize) { return; } } - - if (load_v6(E)) { - if (wantB1 != 0) { log("B1 mismatch: using B1=0 from from savefile\n"); } - base = vector(nWords); - base[0] = 3; - return; - } log("PRP savefile not found '%s'\n", fileName(E, SUFFIX).c_str()); assert(false); @@ -152,4 +79,4 @@ string PRPState::durableName() { return k && (k % 20'000'000 == 0) ? "."s + to_string(k/1'000'000)+"M" : ""s; } -bool PRPState::exists(u32 E) { return openRead(fileName(E, SUFFIX)) || openRead(fileName(E,".prp")); } +bool PRPState::exists(u32 E) { return bool(openRead(fileName(E, SUFFIX))); } diff --git a/checkpoint.h b/checkpoint.h index fc683b5e..201508d9 100644 --- a/checkpoint.h +++ b/checkpoint.h @@ -16,8 +16,6 @@ class PRPState { static constexpr const char *HEADER = "OWL PRP 7 %u %u %u %u %016llx\n"; static constexpr const char *SUFFIX = ""; - bool load_v5(u32 E); - bool load_v6(u32 E); void loadInt(u32 E, u32 B1, u32 iniBlockSize); bool saveImpl(u32 E, const string &name); string durableName();