Skip to content

Commit b1bddb5

Browse files
committed
scrypt: reduce cpu usage (flush/sync)
and do some minimal keccak changes (for jane)
1 parent 9aace79 commit b1bddb5

6 files changed

+160
-134
lines changed

scrypt-jane.cpp

+35-37
Original file line numberDiff line numberDiff line change
@@ -514,10 +514,10 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
514514
cuda_scrypt_done(thr_id, nxt);
515515

516516
cuda_scrypt_DtoH(thr_id, cuda_X[nxt], nxt, false);
517-
cuda_scrypt_flush(thr_id, nxt);
518517

519-
if(!cuda_scrypt_sync(thr_id, cur)) {
520-
return -1;
518+
//cuda_scrypt_flush(thr_id, nxt);
519+
if(!cuda_scrypt_sync(thr_id, nxt)) {
520+
break;
521521
}
522522

523523
memcpy(Xbuf[cur].ptr, cuda_X[cur], 128 * throughput);
@@ -562,51 +562,49 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
562562
cuda_scrypt_serialize(thr_id, nxt);
563563
pre_keccak512(thr_id, nxt, nonce[nxt], throughput);
564564
cuda_scrypt_core(thr_id, nxt, N);
565-
cuda_scrypt_flush(thr_id, nxt); // required
565+
//cuda_scrypt_flush(thr_id, nxt);
566+
if (!cuda_scrypt_sync(thr_id, nxt)) {
567+
break;
568+
}
566569

567570
post_keccak512(thr_id, nxt, nonce[nxt], throughput);
568571
cuda_scrypt_done(thr_id, nxt);
569572

570573
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true);
571-
cuda_scrypt_flush(thr_id, nxt); // seems required here
572-
573-
if (!cuda_scrypt_sync(thr_id, cur)) {
574-
return -1;
574+
//cuda_scrypt_flush(thr_id, nxt); // made by cuda_scrypt_sync
575+
if (!cuda_scrypt_sync(thr_id, nxt)) {
576+
break;
575577
}
576578
}
577579

578-
if(iteration > 0)
580+
for (int i=0; iteration > 0 && i<throughput; i++)
579581
{
580-
for(int i=0;i<throughput;++i) {
581-
volatile unsigned char *hashc = (unsigned char *)(&hash[cur][8*i]);
582-
583-
if (hash[cur][8*i+7] <= Htarg && fulltest(&hash[cur][8*i], ptarget))
584-
{
585-
uint32_t _ALIGN(64) thash[8], tdata[20];
586-
uint32_t tmp_nonce = nonce[cur] + i;
582+
if (hash[cur][8*i+7] <= Htarg && fulltest(&hash[cur][8*i], ptarget))
583+
{
584+
uint32_t _ALIGN(64) thash[8], tdata[20];
585+
uint32_t tmp_nonce = nonce[cur] + i;
587586

588-
for(int z=0;z<20;z++)
589-
tdata[z] = bswap_32x4(pdata[z]);
590-
tdata[19] = bswap_32x4(tmp_nonce);
587+
for(int z=0;z<19;z++)
588+
tdata[z] = bswap_32x4(pdata[z]);
589+
tdata[19] = bswap_32x4(tmp_nonce);
591590

592-
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128);
593-
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), N);
594-
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32);
591+
scrypt_pbkdf2_1((unsigned char *)tdata, 80, (unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128);
592+
scrypt_ROMix_1((scrypt_mix_word_t *)(Xbuf[cur].ptr + 128 * i), (scrypt_mix_word_t *)(Ybuf.ptr), (scrypt_mix_word_t *)(Vbuf.ptr), N);
593+
scrypt_pbkdf2_1((unsigned char *)tdata, 80, Xbuf[cur].ptr + 128 * i, 128, (unsigned char *)thash, 32);
595594

596-
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
597-
{
598-
*hashes_done = n - pdata[19];
599-
pdata[19] = tmp_nonce;
600-
scrypt_free(&Vbuf);
601-
scrypt_free(&Ybuf);
602-
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]);
603-
delete[] data[0]; delete[] data[1];
604-
gettimeofday(tv_end, NULL);
605-
return 1;
606-
} else {
607-
applog(LOG_WARNING, "GPU #%d: %s result does not validate on CPU! (i=%d, s=%d)",
608-
device_map[thr_id], device_name[thr_id], i, cur);
609-
}
595+
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
596+
{
597+
*hashes_done = n - pdata[19];
598+
pdata[19] = tmp_nonce;
599+
scrypt_free(&Vbuf);
600+
scrypt_free(&Ybuf);
601+
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]);
602+
delete[] data[0]; delete[] data[1];
603+
gettimeofday(tv_end, NULL);
604+
return 1;
605+
} else {
606+
applog(LOG_WARNING, "GPU #%d: %s result does not validate on CPU! (i=%d, s=%d)",
607+
device_map[thr_id], device_name[thr_id], i, cur);
610608
}
611609
}
612610
}
@@ -615,7 +613,7 @@ int scanhash_scrypt_jane(int thr_id, uint32_t *pdata, const uint32_t *ptarget, u
615613
nxt = (nxt+1)&1;
616614
++iteration;
617615
} while (n <= max_nonce && !work_restart[thr_id].restart);
618-
616+
out:
619617
scrypt_free(&Vbuf);
620618
scrypt_free(&Ybuf);
621619
scrypt_free(&Xbuf[0]); scrypt_free(&Xbuf[1]);

scrypt.cpp

+10-7
Original file line numberDiff line numberDiff line change
@@ -802,9 +802,8 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsign
802802
cuda_scrypt_done(thr_id, nxt);
803803

804804
cuda_scrypt_DtoH(thr_id, X[nxt], nxt, false);
805-
cuda_scrypt_flush(thr_id, nxt);
806-
807-
if(!cuda_scrypt_sync(thr_id, cur))
805+
//cuda_scrypt_flush(thr_id, nxt);
806+
if(!cuda_scrypt_sync(thr_id, nxt))
808807
{
809808
result = -1;
810809
break;
@@ -858,15 +857,19 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata, const uint32_t *ptarget, unsign
858857
pre_sha256(thr_id, nxt, nonce[nxt], throughput);
859858

860859
cuda_scrypt_core(thr_id, nxt, N);
861-
cuda_scrypt_flush(thr_id, nxt); // required here ?
860+
// cuda_scrypt_flush(thr_id, nxt);
861+
if (!cuda_scrypt_sync(thr_id, nxt)) {
862+
printf("error\n");
863+
result = -1;
864+
break;
865+
}
862866

863867
post_sha256(thr_id, nxt, throughput);
864868
cuda_scrypt_done(thr_id, nxt);
865869

866870
cuda_scrypt_DtoH(thr_id, hash[nxt], nxt, true);
867-
cuda_scrypt_flush(thr_id, nxt); // required here ?
868-
869-
if (!cuda_scrypt_sync(thr_id, cur)) {
871+
// cuda_scrypt_flush(thr_id, nxt);
872+
if (!cuda_scrypt_sync(thr_id, nxt)) {
870873
printf("error\n");
871874
result = -1;
872875
break;

0 commit comments

Comments
 (0)