forked from xorinox/chiapos
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplotter_disk.hpp
1450 lines (1230 loc) · 76.8 KB
/
plotter_disk.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2018 Chia Network Inc
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SRC_CPP_PLOTTER_DISK_HPP_
#define SRC_CPP_PLOTTER_DISK_HPP_
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <map>
#include <algorithm>
#include <vector>
#include <string>
#include <utility>
#include "util.hpp"
#include "encoding.hpp"
#include "calculate_bucket.hpp"
#include "sort_on_disk.hpp"
#include "pos_constants.hpp"
#include "hellman.hpp"
// During backprop and compress, the write pointer is ahead of the read pointer
// Note that the large the offset, the higher these values must be
const uint32_t kReadMinusWrite = 2048;
const uint32_t kCachedPositionsSize = 8192;
// Distance between matching entries is stored in the offset
const uint32_t kOffsetSize = 11;
// Max matches a single entry can have, used for hardcoded memory allocation
const uint32_t kMaxMatchesSingleEntry = 30;
// Results of phase 3. These are passed into Phase 4, so the checkpoint tables
// can be properly built.
struct Phase3Results {
// Pointers to each table start byte in the plot file
vector<uint64_t> plot_table_begin_pointers;
// Pointers to each table start byet in the final file
vector<uint64_t> final_table_begin_pointers;
// Number of entries written for f7
uint64_t final_entries_written;
uint32_t right_entry_size_bits;
uint32_t header_size;
};
const Bits empty_bits;
class DiskPlotter {
public:
// This method creates a plot on disk with the filename. A temporary file, "plotting" + filename,
// is created and will be larger than the final plot file. This file is deleted at the end of
// the process.
void CreatePlotDisk(std::string filename, uint8_t k, uint8_t* memo,
uint32_t memo_len, uint8_t* id, uint32_t id_len) {
std::cout << std::endl << "Starting plotting progress into file " << filename << "." << std::endl;
std::cout << "Memo: " << Util::HexStr(memo, memo_len) << std::endl;
std::cout << "ID: " << Util::HexStr(id, id_len) << std::endl;
std::cout << "Plot size is: " << static_cast<int>(k) << std::endl;
// These variables are used in the WriteParkToFile method. They are preallocatted here
// to save time.
first_line_point_bytes = new uint8_t[CalculateLinePointSize(k)];
park_stubs_bytes = new uint8_t[CalculateStubsSize(k)];
park_deltas_bytes = new uint8_t[CalculateMaxDeltasSize(k, 1)];
assert(id_len == kIdLen);
assert(k >= kMinPlotSize);
assert(k <= kMaxPlotSize);
std::string plot_filename = filename + ".tmp";
std::cout << std::endl << "Starting phrase 0/4: Hellman Attacks extra storage..." << std::endl;
Timer extra_storage_timer;
std::vector<uint64_t> extra_metadata;
Timer hellman_timer;
BuildExtraStorage(k, id, plot_filename, extra_metadata);
hellman_timer.PrintElapsed("Time for phrase 0 = ");
std::cout << std::endl << "Starting phase 1/4: Forward Propagation..." << std::endl;
Timer p1;
Timer all_phases;
std::vector<uint64_t> results = WritePlotFile(plot_filename, k, id, memo, memo_len);
p1.PrintElapsed("Time for phase 1 =");
std::cout << std::endl << "Starting phase 2/4: Backpropagation..." << std::endl;
Timer p2;
Backpropagate(filename, plot_filename, k, id, memo, memo_len, results);
p2.PrintElapsed("Time for phase 2 =");
std::cout << std::endl << "Starting phase 3/4: Compression..." << std::endl;
Timer p3;
Phase3Results res = CompressTables(k, results, filename, plot_filename, id, memo, memo_len, extra_metadata);
p3.PrintElapsed("Time for phase 3 =");
std::cout << std::endl << "Starting phase 4/4: Write Checkpoint tables..." << std::endl;
Timer p4;
WriteCTables(k, k + 1, filename, plot_filename, res);
p4.PrintElapsed("Time for phase 4 =");
int removed = remove(plot_filename.c_str());
assert(removed == 0);
extra_storage_timer.PrintElapsed("Time for extra storage = ");
all_phases.PrintElapsed("Total time =");
delete[] first_line_point_bytes;
delete[] park_stubs_bytes;
delete[] park_deltas_bytes;
}
static uint32_t GetMaxEntrySize(uint8_t k, uint8_t table_index, bool phase_1_size) {
switch (table_index) {
case 1:
// Represents f1, x
return Util::ByteAlign(k + kExtraBits + k) / 8;
case 2:
case 3:
case 4:
case 5:
case 6:
if (phase_1_size)
// If we are in phase 1, use the max size, with metadata.
// Represents f, pos, offset, and metadata
return Util::ByteAlign(k + kExtraBits + (k + 1) + kOffsetSize +
k * kVectorLens[table_index + 1]) / 8;
else
// If we are past phase 1, we can use a smaller size, the smaller between
// phases 2 and 3. Represents either:
// a: sort_key, pos, offset or
// b: line_point, sort_key
return Util::ByteAlign(max(static_cast<uint32_t>(k + 1 + (k + 1) + kOffsetSize),
static_cast<uint32_t>(2 * k + k+1))) / 8;
case 7:
default:
// Represents line_point, f7
return Util::ByteAlign(3 * k) / 8;
}
}
// Calculates the size of one C3 park. This will store bits for each f7 between
// two C1 checkpoints, depending on how many times that f7 is present. For low
// values of k, we need extra space to account for the additional variability.
static uint32_t CalculateC3Size(uint8_t k) {
if (k < 20) {
return floor(Util::ByteAlign(8 * kCheckpoint1Interval) / 8);
} else {
// TODO(alex): tighten this bound, based on formula
return Util::ByteAlign(kC3BitsPerEntry * kCheckpoint1Interval) / 8;
}
}
static uint32_t CalculateLinePointSize(uint8_t k) {
return Util::ByteAlign(2*k) / 8;
}
// This is the full size of the deltas section in a park. However, it will not be fully filled
static uint32_t CalculateMaxDeltasSize(uint8_t k, uint8_t table_index) {
if (table_index == 1) {
return Util::ByteAlign((kEntriesPerPark - 1) * kMaxAverageDeltaTable1) / 8;
}
if (table_index == 2) {
return Util::ByteAlign(std::floor((kEntriesPerPark - 1) * (kMaxAverageDelta + 1))) / 8;
}
return Util::ByteAlign((kEntriesPerPark - 1) * kMaxAverageDelta) / 8;
}
static uint32_t CalculateStubsSize(uint32_t k) {
return Util::ByteAlign((kEntriesPerPark - 1) * (k - kStubMinusBits)) / 8;
}
static uint32_t CalculateParkSize(uint8_t k, uint8_t table_index) {
return CalculateLinePointSize(k) + CalculateStubsSize(k) + CalculateMaxDeltasSize(k, table_index);
}
private:
uint8_t* first_line_point_bytes;
uint8_t* park_stubs_bytes;
uint8_t* park_deltas_bytes;
// Writes the plot file header to a file
uint32_t WriteHeader(std::ofstream &plot_file, uint8_t k, uint8_t* id, uint8_t* memo, uint32_t memo_len) {
// 19 bytes - "Proof of Space Plot" (utf-8)
// 32 bytes - unique plot id
// 1 byte - k
// 2 bytes - format description length
// x bytes - format description
// 2 bytes - memo length
// x bytes - memo
std::string header_text = "Proof of Space Plot";
plot_file.write(header_text.data(), header_text.size());
plot_file.write(reinterpret_cast<char*>(id), kIdLen);
uint8_t k_buffer[1];
k_buffer[0] = k;
plot_file.write(reinterpret_cast<char*>(k_buffer), 1);
uint8_t size_buffer[2];
Bits(kFormatDescription.size(), 16).ToBytes(size_buffer);
plot_file.write(reinterpret_cast<char*>(size_buffer), 2);
plot_file.write(kFormatDescription.data(), kFormatDescription.size());
Bits(memo_len, 16).ToBytes(size_buffer);
plot_file.write(reinterpret_cast<char*>(size_buffer), 2);
plot_file.write(reinterpret_cast<char*>(memo), memo_len);
uint8_t pointers[10*8];
memset(pointers, 0, 10*8);
plot_file.write(reinterpret_cast<char*>(pointers), 10*8);
uint32_t bytes_written = header_text.size() + kIdLen + 1 + 2 + kFormatDescription.size()
+ 2 + memo_len + 10*8;
std::cout << "Wrote: " << bytes_written << std::endl;
return bytes_written;
}
// This is Phase 1, or forward propagation. During this phase, all of the 7 tables,
// and f functions, are evaluated. The result is an intermediate plot file, that is
// several times larger than what the final file will be, but that has all of the
// proofs of space in it. First, F1 is computed, which is special since it uses
// AES256, and each encrption provides multiple output values. Then, the rest of the
// f functions are computed, and a sort on disk happens for each table.
std::vector<uint64_t> WritePlotFile(std::string plot_filename, uint8_t k, uint8_t* id,
uint8_t* memo, uint8_t memo_len) {
// Note that the plot file is not the final file that will be stored on disk,
// it is only present during plotting.
std::ofstream plot_file(plot_filename, std::ios::out | std::ios::trunc | std::ios::binary);
if (!plot_file.is_open()) {
throw std::string("File not opened correct");
}
uint32_t header_size = WriteHeader(plot_file, k, id, memo, memo_len);
std::cout << "Computing table 1" << std::endl;
Timer f1_start_time;
F1Calculator f1(k, id);
uint64_t x = 0;
uint32_t entry_size_bytes = GetMaxEntrySize(k, 1, true);
// The max value our input (x), can take. A proof of space is 64 of these x values.
uint64_t max_value = ((uint64_t)1 << (k)) - 1;
uint8_t* buf = new uint8_t[entry_size_bytes];
// These are used for sorting on disk. The sort on disk code needs to know how
// many elements are in each bucket.
std::vector<uint64_t> bucket_sizes(kNumSortBuckets, 0);
std::vector<uint64_t> right_bucket_sizes(kNumSortBuckets, 0);
uint32_t bucket_log = floor(log2(kNumSortBuckets));
// Instead of computing f1(1), f1(2), etc, for each x, we compute them in batches
// to increase CPU efficency.
for (uint64_t lp = 0; lp <= (((uint64_t)1) << (k-kBatchSizes)); lp++) {
// For each pair x, y in the batch
for (auto kv : f1.CalculateBuckets(Bits(x, k), 2 << (kBatchSizes - 1))) {
// TODO(mariano): fix inefficient memory alloc here
(std::get<0>(kv) + std::get<1>(kv)).ToBytes(buf);
// We write the x, y pair
plot_file.write(reinterpret_cast<char*>(buf), entry_size_bytes);
bucket_sizes[SortOnDiskUtils::ExtractNum(buf, entry_size_bytes, 0, bucket_log)] += 1;
if (x + 1 > max_value) {
break;
}
++x;
}
if (x + 1 > max_value) {
break;
}
}
delete[] buf;
// A zero entry is the end of table symbol.
Util::WriteZeroesStack(plot_file, entry_size_bytes);
plot_file.close();
f1_start_time.PrintElapsed("F1 complete, Time = ");
// Begin byte of the f1 table
uint64_t begin_byte = header_size;
// Total number of entries in the current table (f1)
uint64_t total_table_entries = ((uint64_t)1) << k;
// This will contain the start bytes (into the plot file) for each table
std::vector<uint64_t> plot_table_begin_pointers(9, 0);
plot_table_begin_pointers[1] = begin_byte;
// Store positions to previous tables, in k+1 bits. This is because we may have
// more than 2^k entries in some of the tables, so we need an extra bit.
uint8_t pos_size = k + 1;
uint32_t right_entry_size_bytes = 0;
// Number of buckets that y values will be put into.
double num_buckets = ((uint64_t)1 << (k + kExtraBits)) / static_cast<double>(kBC) + 1;
// Memory to be used for sorting
uint8_t* memory = new uint8_t[kMemorySize];
// For tables 1 through 6, sort the table, calculate matches, and write
// the next table. This is the left table index.
for (uint8_t table_index = 1; table_index < 7; table_index++) {
Timer table_timer;
uint8_t metadata_size = kVectorLens[table_index + 1] * k;
// Determines how many bytes the entries in our left and right tables will take up.
uint32_t entry_size_bytes = GetMaxEntrySize(k, table_index, true);
right_entry_size_bytes = GetMaxEntrySize(k, table_index + 1, true);
uint64_t begin_byte_next = begin_byte + (entry_size_bytes * (total_table_entries + 1));
std::cout << "Computing table " << int{table_index + 1} << " at position 0x"
<< std::hex << begin_byte_next << std::dec << std::endl;
total_table_entries = 0;
std::cout << "\tSorting table " << int{table_index} << std::endl;
// Performs a sort on the left table,
Timer sort_timer;
FileDisk d(plot_filename);
Sorting::SortOnDisk(d, begin_byte, begin_byte_next, entry_size_bytes,
0, bucket_sizes, memory, kMemorySize);
d.Close();
sort_timer.PrintElapsed("\tSort time:");
Timer computation_pass_timer;
// Streams to read and right to tables. We will have handles to two tables. We will
// read through the left table, compute matches, and evaluate f for matching entries,
// writing results to the right table.
std::ifstream left_reader(plot_filename, std::fstream::in | std::fstream::binary);
std::fstream right_writer(plot_filename, std::fstream::out | std::fstream::in | std::fstream::binary);
left_reader.seekg(begin_byte);
right_writer.seekp(begin_byte_next);
FxCalculator f(k, table_index + 1, id);
// This is a sliding window of entries, since things in bucket i can match with things in bucket
// i + 1. At the end of each bucket, we find matches between the two previous buckets.
std::vector<PlotEntry> bucket_L;
std::vector<PlotEntry> bucket_R;
uint64_t bucket = 0;
uint64_t pos = 0; // Position into the left table
bool end_of_table = false; // We finished all entries in the left table
uint64_t matches = 0; // Total matches
// Buffers for storing a left or a right entry, used for disk IO
uint8_t* left_buf = new uint8_t[entry_size_bytes];
uint8_t* right_buf = new uint8_t[right_entry_size_bytes];
Bits zero_bits(0, metadata_size);
// Start at left table pos = 0 and iterate through the whole table. Note that the left table
// will already be sorted by y
while (!end_of_table) {
PlotEntry left_entry;
left_entry.right_metadata = 0;
// Reads a left entry from disk
left_reader.read(reinterpret_cast<char*>(left_buf), entry_size_bytes);
if (table_index == 1) {
// For table 1, we only have y and metadata
left_entry.y = Util::SliceInt64FromBytes(left_buf, entry_size_bytes,
0, k + kExtraBits);
left_entry.left_metadata = Util::SliceInt128FromBytes(left_buf, entry_size_bytes,
k + kExtraBits, metadata_size);
} else {
// For tables 2-6, we we also have pos and offset, but we don't use it here.
left_entry.y = Util::SliceInt64FromBytes(left_buf, entry_size_bytes, 0, k + kExtraBits);
if (metadata_size <= 128) {
left_entry.left_metadata = Util::SliceInt128FromBytes(left_buf, entry_size_bytes,
k + kExtraBits + pos_size + kOffsetSize,
metadata_size);
} else {
// Large metadatas that don't fit into 128 bits. (k > 32).
left_entry.left_metadata = Util::SliceInt128FromBytes(left_buf, entry_size_bytes,
k + kExtraBits + pos_size
+ kOffsetSize, 128);
left_entry.right_metadata = Util::SliceInt128FromBytes(left_buf, entry_size_bytes,
k + kExtraBits + pos_size
+ kOffsetSize + 128,
metadata_size - 128);
}
}
// This is not the pos that was read from disk,but the position of the entry we read, within L table.
left_entry.pos = pos;
end_of_table = (left_entry.y == 0 && left_entry.left_metadata == 0 && left_entry.right_metadata == 0);
uint64_t y_bucket = left_entry.y / kBC;
// Keep reading left entries into bucket_L and R, until we run out of things
if (y_bucket == bucket) {
bucket_L.emplace_back(std::move(left_entry));
} else if (y_bucket == bucket + 1) {
bucket_R.emplace_back(std::move(left_entry));
} else {
// This is reached when we have finished adding stuff to bucket_R and bucket_L,
// so now we can compare entries in both buckets to find matches. If two entries match,
// the result is written to the right table.
if (bucket_L.size() > 0 && bucket_R.size() > 0) {
// Compute all matches between the two buckets, and return indeces into each bucket
std::vector<std::pair<uint16_t, uint16_t> > match_indexes = f.FindMatches(bucket_L, bucket_R);
for (auto& indeces : match_indexes) {
PlotEntry& L_entry = bucket_L[std::get<0>(indeces)];
PlotEntry& R_entry = bucket_R[std::get<1>(indeces)];
std::pair<Bits, Bits> f_output;
// Computes the output pair (fx, new_metadata)
if (metadata_size <= 128) {
f_output = f.CalculateBucket(Bits(L_entry.y, k + kExtraBits),
Bits(R_entry.y, k + kExtraBits),
Bits(L_entry.left_metadata, metadata_size),
Bits(R_entry.left_metadata, metadata_size));
} else {
// Metadata does not fit into 128 bits
f_output = f.CalculateBucket(Bits(L_entry.y, k + kExtraBits),
Bits(R_entry.y, k + kExtraBits),
Bits(L_entry.left_metadata, 128)
+ Bits(L_entry.right_metadata, metadata_size - 128),
Bits(R_entry.left_metadata, 128)
+ Bits(R_entry.right_metadata, metadata_size - 128));
}
// fx/y, which will be used for sorting and matching
Bits& new_entry = std::get<0>(f_output);
++matches;
++total_table_entries;
if (table_index + 1 == 7) {
// We only need k instead of k + kExtraBits bits for the last table
new_entry = new_entry.Slice(0, k);
}
// Position in the previous table
new_entry += Bits(L_entry.pos, pos_size);
// Offset for matching entry
if (R_entry.pos - L_entry.pos > 2000) {
std::cout << "Offset: " << R_entry.pos - L_entry.pos << std::endl;
}
new_entry.AppendValue(R_entry.pos - L_entry.pos, kOffsetSize);
// New metadata which will be used to compute the next f
new_entry += std::get<1>(f_output);
// Fill with 0s if entry is not long enough
new_entry.AppendValue(0, right_entry_size_bytes * 8 - new_entry.GetSize());
new_entry.ToBytes(right_buf);
// Writes the new entry into the right table
right_writer.write(reinterpret_cast<char*>(right_buf), right_entry_size_bytes);
// Computes sort bucket, so we can sort the table by y later, more easily
right_bucket_sizes[SortOnDiskUtils::ExtractNum(right_buf, right_entry_size_bytes, 0,
floor(log2(kNumSortBuckets)))] += 1;
}
}
if (y_bucket == bucket + 2) {
// We saw a bucket that is 2 more than the current, so we just set L = R, and R = [entry]
bucket_L = bucket_R;
bucket_R = std::vector<PlotEntry>();
bucket_R.emplace_back(std::move(left_entry));
++bucket;
} else {
// We saw a bucket that >2 more than the current, so we just set L = [entry], and R = []
bucket = y_bucket;
bucket_L = std::vector<PlotEntry>();
bucket_L.emplace_back(std::move(left_entry));
bucket_R = std::vector<PlotEntry>();
}
}
// Increase the read pointer in the left table, by one
++pos;
}
// Total matches found in the left table
std::cout << "\tTotal matches: " << matches << ". Per bucket: "
<< (matches / num_buckets) << std::endl;
// Writes the 0 entry (EOT)
memset(right_buf, 0, right_entry_size_bytes);
Bits(0, right_entry_size_bytes * 8).ToBytes(right_buf);
right_writer.write(reinterpret_cast<char*>(right_buf), right_entry_size_bytes);
// Writes the start of the table to the header, so we can resume plotting if it
// interrups.
right_writer.seekp(header_size - 8 * (12 - table_index));
uint8_t pointer_buf[8];
Bits(begin_byte_next, 8*8).ToBytes(pointer_buf);
right_writer.write(reinterpret_cast<char*>(pointer_buf), 8);
// Resets variables
plot_table_begin_pointers[table_index + 1] = begin_byte_next;
begin_byte = begin_byte_next;
bucket_sizes = right_bucket_sizes;
right_bucket_sizes = std::vector<uint64_t>(kNumSortBuckets, 0);
left_reader.close();
right_writer.close();
delete[] left_buf;
delete[] right_buf;
computation_pass_timer.PrintElapsed("\tComputation pass time:");
table_timer.PrintElapsed("Forward propagation table time:");
}
// Pointer to the end of the last table + 1, used for spare space for disk sorting
plot_table_begin_pointers[8] = plot_table_begin_pointers[7] +
(right_entry_size_bytes * (total_table_entries + 1));
delete[] memory;
std::cout << "Final plot table begin pointers: " << std::endl;
for (uint8_t i = 1; i <= 8; i++) {
std::cout << "\tTable " << int{i} << " 0x"
<< std::hex << plot_table_begin_pointers[i] << std::dec << std::endl;
}
return plot_table_begin_pointers;
}
// Backpropagate takes in as input, a file on which forward propagation has been done.
// The purpose of backpropagate is to eliminate any dead entries that don't contribute
// to final values in f7, to minimize disk usage. A sort on disk is applied to each table,
// so that they are sorted by position.
void Backpropagate(std::string filename, std::string plot_filename, uint8_t k,
uint8_t* id, uint8_t* memo, uint32_t memo_len, const std::vector<uint64_t>& results) {
std::vector<uint64_t> plot_table_begin_pointers = results;
// An extra bit is used, since we may have more than 2^k entries in a table. (After pruning, each table will
// have 0.8*2^k or less entries).
uint8_t pos_size = k + 1;
std::vector<uint64_t> bucket_sizes_pos(kNumSortBuckets, 0);
// The end of the table 7, is spare space that we can use for sorting
uint64_t spare_pointer = plot_table_begin_pointers[8];
// Memory to be used for sorting
uint8_t* memory = new uint8_t[kMemorySize];
// Iterates through each table (with a left and right pointer), starting at 6 & 7.
for (uint8_t table_index = 7; table_index > 1; --table_index) {
Timer table_timer;
// We will have reader and writer for both tables.
std::ifstream left_reader(plot_filename, std::ios::in | std::ios::binary);
std::ofstream left_writer(plot_filename, std::ios::in | std::ios::out | std::ios::binary);
std::ifstream right_reader(plot_filename, std::ios::in | std::ios::binary);
std::ofstream right_writer(plot_filename, std::ios::in | std::ios::out | std::ios::binary);
std::cout << "Backpropagating on table " << int{table_index} << std::endl;
std::vector<uint64_t> new_bucket_sizes_pos(kNumSortBuckets, 0);
uint16_t left_metadata_size = kVectorLens[table_index] * k;
// The entry that we are reading (includes metadata)
uint16_t left_entry_size_bytes = GetMaxEntrySize(k, table_index - 1, true);
// The entry that we are writing (no metadata)
uint16_t new_left_entry_size_bytes = GetMaxEntrySize(k, table_index - 1, false);
// The right entries which we read and write (the already have no metadata, since they have
// been pruned in previous iteration)
uint16_t right_entry_size_bytes = GetMaxEntrySize(k, table_index, false);
left_writer.flush();
right_writer.flush();
// Doesn't sort table 7, since it's already sorted by pos6 (position into table 6).
// The reason we sort, is so we can iterate through both tables at once. For example,
// if we read a right entry (pos, offset) = (456, 2), the next one might be (458, 19),
// and in the left table, we are reading entries around pos 450, etc..
if (table_index != 7) {
std::cout << "\tSorting table " << int{table_index} << " starting at "
<< plot_table_begin_pointers[table_index] << std::endl;
Timer sort_timer;
FileDisk d(plot_filename);
Sorting::SortOnDisk(d, plot_table_begin_pointers[table_index], spare_pointer,
right_entry_size_bytes,
0, bucket_sizes_pos, memory, kMemorySize);
d.Close();
sort_timer.PrintElapsed("\tSort time:");
}
Timer computation_pass_timer;
left_reader.seekg(plot_table_begin_pointers[table_index - 1]);
left_writer.seekp(plot_table_begin_pointers[table_index - 1]);
right_reader.seekg(plot_table_begin_pointers[table_index]);
right_writer.seekp(plot_table_begin_pointers[table_index]);
left_writer.flush();
right_writer.flush();
// We will divide by 2, so it must be even.
assert(kCachedPositionsSize % 2 == 0);
// Used positions will be used to mark which posL are present in table R, the rest will be pruned
bool used_positions[kCachedPositionsSize];
memset(used_positions, 0, sizeof(used_positions));
bool should_read_entry = true;
// Cache for when we read a right entry that is too far forward
uint64_t cached_entry_sort_key = 0; // For table_index == 7, y is here
uint64_t cached_entry_pos = 0;
uint64_t cached_entry_offset = 0;
uint64_t left_entry_counter = 0; // Total left entries written
// Sliding window map, from old position to new position (after pruning)
uint64_t new_positions[kCachedPositionsSize];
// Sort keys represent the ordering of entries, sorted by (y, pos, offset),
// but using less bits (only k+1 instead of 2k + 9, etc.)
// This is a map from old position to array of sort keys (one for each R entry with this pos)
Bits old_sort_keys[kReadMinusWrite][kMaxMatchesSingleEntry];
// Map from old position to other positions that it matches with
uint64_t old_offsets[kReadMinusWrite][kMaxMatchesSingleEntry];
// Map from old position to count (number of times it appears)
uint16_t old_counters[kReadMinusWrite];
for (uint32_t i = 0; i < kReadMinusWrite; i++) {
old_counters[i] = 0;
}
bool end_of_right_table = false;
uint64_t current_pos = 0; // This is the current pos that we are looking for in the L table
uint64_t end_of_table_pos = 0;
uint64_t greatest_pos = 0; // This is the greatest position we have seen in R table
// Buffers for reading and writing to disk
uint8_t* left_entry_buf = new uint8_t[left_entry_size_bytes];
uint8_t* new_left_entry_buf = new uint8_t[new_left_entry_size_bytes];
uint8_t* right_entry_buf = new uint8_t[right_entry_size_bytes];
// Go through all right entries, and keep going since write pointer is behind read pointer
while (!end_of_right_table || (current_pos - end_of_table_pos <= kReadMinusWrite)) {
old_counters[current_pos % kReadMinusWrite] = 0;
// Resets used positions after a while, so we use little memory
if ((current_pos - kReadMinusWrite) % (kCachedPositionsSize / 2) == 0) {
if ((current_pos - kReadMinusWrite) % kCachedPositionsSize == 0) {
for (uint32_t i = kCachedPositionsSize / 2; i < kCachedPositionsSize; i++) {
used_positions[i] = false;
}
} else {
for (uint32_t i = 0; i < kCachedPositionsSize / 2; i++) {
used_positions[i] = false;
}
}
}
// Only runs this code if we are still reading the right table, or we still need to read
// more left table entries (current_pos <= greatest_pos), otherwise, it skips to the
// writing of the final R table entries
if (!end_of_right_table || current_pos <= greatest_pos) {
uint64_t entry_sort_key = 0;
uint64_t entry_pos = 0;
uint64_t entry_offset = 0;
while (!end_of_right_table) {
if (should_read_entry) {
// Need to read another entry at the current position
right_reader.read(reinterpret_cast<char *>(right_entry_buf),
right_entry_size_bytes);
if (table_index == 7) {
// This is actually y for table 7
entry_sort_key = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
0, k);
entry_pos = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
k, pos_size);
entry_offset = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
k + pos_size, kOffsetSize);
} else {
entry_pos = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
0, pos_size);
entry_offset = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
pos_size, kOffsetSize);
entry_sort_key = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
pos_size + kOffsetSize, k + 1);
}
} else if (cached_entry_pos == current_pos) {
// We have a cached entry at this position
entry_sort_key = cached_entry_sort_key;
entry_pos = cached_entry_pos;
entry_offset = cached_entry_offset;
} else {
// The cached entry is at a later pos, so we don't read any more R entries,
// read more L entries instead.
break;
}
should_read_entry = true; // By default, read another entry
if (entry_pos + entry_offset > greatest_pos) {
// Greatest L pos that we should look for
greatest_pos = entry_pos + entry_offset;
}
if (entry_sort_key == 0 && entry_pos == 0 && entry_offset == 0) {
// Table R has ended, don't read any more (but keep writing)
end_of_right_table = true;
end_of_table_pos = current_pos;
break;
} else if (entry_pos == current_pos) {
// The current L position is the current R entry
// Marks the two matching entries as used (pos and pos+offset)
used_positions[entry_pos % kCachedPositionsSize] = true;
used_positions[(entry_pos + entry_offset) % kCachedPositionsSize] = true;
uint64_t old_write_pos = entry_pos % kReadMinusWrite;
if (table_index == 7) {
// Stores the sort key for this R entry, which is just y (so k bits)
old_sort_keys[old_write_pos][old_counters[old_write_pos]] = Bits(entry_sort_key, k);
} else {
// Stores the sort key for this R entry
old_sort_keys[old_write_pos][old_counters[old_write_pos]] = Bits(entry_sort_key, k + 1);
}
// Stores the other matching pos for this R entry (pos6 + offset)
old_offsets[old_write_pos][old_counters[old_write_pos]] = entry_pos + entry_offset;
++old_counters[old_write_pos];
} else {
// Don't read any more right entries for now, because we haven't caught up on the
// left table yet
should_read_entry = false;
cached_entry_sort_key = entry_sort_key;
cached_entry_pos = entry_pos;
cached_entry_offset = entry_offset;
break;
}
}
// Reads a left entry
left_reader.read(reinterpret_cast<char *>(left_entry_buf),
left_entry_size_bytes);
// If this left entry is used, we rewrite it. If it's not used, we ignore it.
if (used_positions[current_pos % kCachedPositionsSize]) {
uint64_t entry_y = Util::SliceInt64FromBytes(left_entry_buf, left_entry_size_bytes,
0, k + kExtraBits);
uint64_t entry_metadata;
if (table_index > 2) {
// For tables 2-6, the entry is: f, pos, offset metadata
entry_pos = Util::SliceInt64FromBytes(left_entry_buf, left_entry_size_bytes,
k + kExtraBits, pos_size);
entry_offset = Util::SliceInt64FromBytes(left_entry_buf, left_entry_size_bytes,
k + kExtraBits + pos_size, kOffsetSize);
} else {
// For table1, the entry is: f, metadata
entry_metadata = Util::SliceInt128FromBytes(left_entry_buf, left_entry_size_bytes,
k + kExtraBits, left_metadata_size);
}
Bits new_left_entry;
if (table_index > 2) {
// The new left entry is slightly different. Metadata is dropped, to save space,
// and the counter of the entry is written (sort_key). We use this instead of
// (y + pos + offset) since its smaller.
new_left_entry += Bits(entry_pos, pos_size);
new_left_entry += Bits(entry_offset, kOffsetSize);
new_left_entry += Bits(left_entry_counter, k + 1);
// If we are not taking up all the bits, make sure they are zeroed
if (Util::ByteAlign(new_left_entry.GetSize()) < new_left_entry_size_bytes * 8) {
memset(new_left_entry_buf, 0, new_left_entry_size_bytes);
}
} else {
// For table one entries, we don't care about sort key, only y and x.
new_left_entry += Bits(entry_y, k + kExtraBits);
new_left_entry += Bits(entry_metadata, left_metadata_size);
// std::cout << "Writing X:" << entry_metadata.GetValue() << std::endl;
}
new_left_entry.ToBytes(new_left_entry_buf);
left_writer.write(reinterpret_cast<char *>(new_left_entry_buf), new_left_entry_size_bytes);
new_bucket_sizes_pos[SortOnDiskUtils::ExtractNum(new_left_entry_buf, new_left_entry_size_bytes,
0, floor(log2(kNumSortBuckets)))] += 1;
// Mapped positions, so we can rewrite the R entry properly
new_positions[current_pos % kCachedPositionsSize] = left_entry_counter;
// Counter for new left entries written
++left_entry_counter;
}
}
// Write pointer lags behind the read pointer
int64_t write_pointer_pos = current_pos - kReadMinusWrite + 1;
// Only write entries for write_pointer_pos, if we are above 0, and there are actually R entries
// for that pos.
if (write_pointer_pos >= 0 && used_positions[write_pointer_pos % kCachedPositionsSize]) {
uint64_t new_pos = new_positions[write_pointer_pos % kCachedPositionsSize];
Bits new_pos_bin(new_pos, pos_size);
// There may be multiple R entries that share the write_pointer_pos, so write all of them
for (uint32_t counter = 0; counter < old_counters[write_pointer_pos % kReadMinusWrite]; counter++) {
// Creates and writes the new right entry, with the cached data
uint64_t new_offset_pos = new_positions[old_offsets[write_pointer_pos % kReadMinusWrite]
[counter] % kCachedPositionsSize];
Bits& new_right_entry = old_sort_keys[write_pointer_pos % kReadMinusWrite][counter];
new_right_entry += new_pos_bin;
new_right_entry.AppendValue(new_offset_pos - new_pos, kOffsetSize);
if (Util::ByteAlign(new_right_entry.GetSize()) < right_entry_size_bytes * 8) {
memset(right_entry_buf, 0, right_entry_size_bytes);
}
new_right_entry.ToBytes(right_entry_buf);
right_writer.write(reinterpret_cast<char*>(right_entry_buf),
right_entry_size_bytes);
}
}
++current_pos;
}
std::cout << "\tWrote left entries: " << left_entry_counter << std::endl;
computation_pass_timer.PrintElapsed("\tComputation pass time:");
table_timer.PrintElapsed("Total backpropagation time::");
Bits(0, right_entry_size_bytes * 8).ToBytes(right_entry_buf);
right_writer.write(reinterpret_cast<char*>(right_entry_buf), right_entry_size_bytes);
Bits(0, new_left_entry_size_bytes * 8).ToBytes(new_left_entry_buf);
left_writer.write(reinterpret_cast<char*>(new_left_entry_buf), new_left_entry_size_bytes);
left_reader.close();
left_writer.close();
right_reader.close();
right_writer.close();
bucket_sizes_pos = new_bucket_sizes_pos;
delete[] left_entry_buf;
delete[] new_left_entry_buf;
delete[] right_entry_buf;
}
delete[] memory;
}
// This writes a number of entries into a file, in the final, optimized format. The park contains
// a checkpoint value (whicch is a 2k bits line point), as well as EPP (entries per park) entries.
// These entries are each divded into stub and delta section. The stub bits are encoded as is, but
// the delta bits are optimized into a variable encoding scheme. Since we have many entries in each
// park, we can approximate how much space each park with take.
// Format is: [2k bits of first_line_point] [EPP-1 stubs] [Deltas size] [EPP-1 deltas].... [first_line_point] ...
void WriteParkToFile(std::ofstream &writer, uint64_t table_start, uint64_t park_index, uint32_t park_size_bytes,
uint128_t first_line_point, const std::vector<uint8_t>& park_deltas,
const std::vector<uint64_t>& park_stubs, uint8_t k, uint8_t table_index) {
// Parks are fixed size, so we know where to start writing. The deltas will not go over
// into the next park.
writer.seekp(table_start + park_index * park_size_bytes);
Bits first_line_point_bits(first_line_point, 2*k);
memset(first_line_point_bytes, 0, CalculateLinePointSize(k));
first_line_point_bits.ToBytes(first_line_point_bytes);
writer.write((const char*)first_line_point_bytes, CalculateLinePointSize(k));
// We use ParkBits insted of Bits since it allows storing more data
ParkBits park_stubs_bits;
for (uint64_t stub : park_stubs) {
park_stubs_bits.AppendValue(stub, (k - kStubMinusBits));
}
uint32_t stubs_size = CalculateStubsSize(k);
memset(park_stubs_bytes, 0, stubs_size);
park_stubs_bits.ToBytes(park_stubs_bytes);
writer.write((const char*)park_stubs_bytes, stubs_size);
// The stubs are random so they don't need encoding. But deltas are more likely to
// be small, so we can compress them
double R = kRValues[table_index - 1];
ParkBits deltas_bits = Encoding::ANSEncodeDeltas(park_deltas, R);
deltas_bits.ToBytes(park_deltas_bytes);
uint16_t encoded_size = deltas_bits.GetSize() / 8;
if (encoded_size + 2 > CalculateMaxDeltasSize(k, table_index))
std::cout << encoded_size + 2 << " " << CalculateMaxDeltasSize(k, table_index) << "\n";
assert(encoded_size + 2 < CalculateMaxDeltasSize(k, table_index));
writer.write((const char*)&encoded_size, 2);
writer.write((const char*)park_deltas_bytes, encoded_size);
}
// Compresses the plot file tables into the final file. In order to do this, entries must be
// reorganized from the (pos, offset) bucket sorting order, to a more free line_point sorting
// order. In (pos, offset ordering), we store two pointers two the previous table, (x, y) which
// are very close together, by storing (x, y-x), or (pos, offset), which can be done in about k + 8 bits,
// since y is in the next bucket as x. In order to decrease this, We store the actual entries from the
// previous table (e1, e2), instead of pos, offset pointers, and sort the entire table by (e1,e2).
// Then, the deltas between each (e1, e2) can be stored, which require around k bits.
// Converting into this format requires a few passes and sorts on disk. It also assumes that the
// backpropagation step happened, so there will be no more dropped entries. See the design
// document for more details on the algorithm.
Phase3Results CompressTables(uint8_t k, vector<uint64_t> plot_table_begin_pointers, std::string filename,
std::string plot_filename, uint8_t* id, uint8_t* memo, uint32_t memo_len,
std::vector<uint64_t>& extra_metadata) {
// In this phase we open a new file, where the final contents of the plot will be stored.
std::ofstream header_writer(filename, std::ios::out | std::ios::trunc | std::ios::binary);
if (!header_writer.is_open()) {
throw std::string("Final file not opened correct");
}
uint32_t header_size = WriteHeader(header_writer, k, id, memo, memo_len);
uint8_t pos_size = k + 1;
std::vector<uint64_t> final_table_begin_pointers(12, 0);
final_table_begin_pointers[1] = header_size;
header_writer.seekp(header_size - 10*8);
uint8_t table_1_pointer_bytes[8*8];
Bits(final_table_begin_pointers[1], 8*8).ToBytes(table_1_pointer_bytes);
header_writer.write((const char*)table_1_pointer_bytes, 8);
header_writer.close();
uint64_t spare_pointer = plot_table_begin_pointers[8];
uint8_t* memory = new uint8_t[kMemorySize];
uint64_t final_entries_written = 0;
uint32_t right_entry_size_bytes = 0;
// Iterates through all tables, starting at 1, with L and R pointers.
// For each table, R entries are rewritten with line points. Then, the right table is
// sorted by line_point. After this, the right table entries are rewritten as (sort_key, new_pos),
// where new_pos is the position in the table, where it's sorted by line_point, and the line_points
// are written to disk to a final table. Finally, table_i is sorted by sort_key. This allows us to
// compare to the next table.
F1Calculator f1(k, id);
f1.ReloadKey();
for (uint8_t table_index = 1; table_index < 7; table_index++) {
Timer table_timer;
Timer computation_pass_1_timer;
std::cout << "Compressing tables " << int{table_index} << " and " << int{table_index + 1} << std::endl;
std::ifstream left_reader(plot_filename, std::ios::in | std::ios::binary);
std::ifstream right_reader(plot_filename, std::ios::in | std::ios::binary);
std::ofstream right_writer(plot_filename, std::ios::in | std::ios::out | std::ios::binary);
// The park size must be constant, for simplicity, but must be big enough to store EPP entries.
// entry deltas are encoded with variable length, and thus there is no guarantee that they
// won't override into the next park. It is only different (larger) for table 1
uint32_t park_size_bytes = CalculateParkSize(k, table_index);
std::vector<uint64_t> bucket_sizes(kNumSortBuckets, 0);
uint32_t left_y_size = k + kExtraBits;
// Sort key for table 7 is just y, which is k bits. For all other tables it can
// be higher than 2^k and therefore k+1 bits are used.
uint32_t right_sort_key_size = table_index == 6 ? k : k + 1;
uint32_t left_entry_size_bytes = GetMaxEntrySize(k, table_index, false);
right_entry_size_bytes = GetMaxEntrySize(k, table_index + 1, false);
left_reader.seekg(plot_table_begin_pointers[table_index]);
right_reader.seekg(plot_table_begin_pointers[table_index + 1]);
right_writer.seekp(plot_table_begin_pointers[table_index + 1]);
bool should_read_entry = true;
std::vector<uint64_t> left_new_pos(kCachedPositionsSize);
Bits old_sort_keys[kReadMinusWrite][kMaxMatchesSingleEntry];
uint64_t old_offsets[kReadMinusWrite][kMaxMatchesSingleEntry];
uint16_t old_counters[kReadMinusWrite];
for (uint32_t i = 0; i < kReadMinusWrite; i++) {
old_counters[i] = 0;
}
bool end_of_right_table = false;
uint64_t current_pos = 0;
uint64_t end_of_table_pos = 0;
uint64_t greatest_pos = 0;
uint8_t* right_entry_buf = new uint8_t[right_entry_size_bytes];
uint8_t* left_entry_disk_buf = new uint8_t[left_entry_size_bytes];
uint64_t entry_sort_key, entry_pos, entry_offset;
uint64_t cached_entry_sort_key = 0;
uint64_t cached_entry_pos = 0;
uint64_t cached_entry_offset = 0;
// Similar algorithm as Backprop, to read both L and R tables simultaneously
while (!end_of_right_table || (current_pos - end_of_table_pos <= kReadMinusWrite)) {
old_counters[current_pos % kReadMinusWrite] = 0;
if (end_of_right_table || current_pos <= greatest_pos) {
while (!end_of_right_table) {
if (should_read_entry) {
// The right entries are in the format from backprop, (sort_key, pos, offset)
right_reader.read(reinterpret_cast<char *>(right_entry_buf),
right_entry_size_bytes);
entry_sort_key = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
0, right_sort_key_size);
entry_pos = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
right_sort_key_size, pos_size);
entry_offset = Util::SliceInt64FromBytes(right_entry_buf, right_entry_size_bytes,
right_sort_key_size + pos_size, kOffsetSize);
} else if (cached_entry_pos == current_pos) {
entry_sort_key = cached_entry_sort_key;
entry_pos = cached_entry_pos;
entry_offset = cached_entry_offset;
} else {