Skip to content

Commit

Permalink
fix: verify tool will advance past invalid_iterator error
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesPiechota committed Nov 29, 2024
1 parent c92d936 commit 2c75cec
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
20 changes: 13 additions & 7 deletions apps/arweave/src/ar_data_sync.erl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
get_tx_offset/1, get_tx_offset_data_in_range/2, has_data_root/2,
request_tx_data_removal/3, request_data_removal/4, record_disk_pool_chunks_count/0,
record_chunk_cache_size_metric/0, is_chunk_cache_full/0, is_disk_space_sufficient/1,
get_chunk_by_byte/2, get_chunk_seek_offset/1, read_chunk/4, read_data_path/2,
get_chunk_by_byte/2, advance_chunks_index_cursor/1, get_chunk_seek_offset/1,
read_chunk/4, read_data_path/2,
increment_chunk_cache_size/0, decrement_chunk_cache_size/0,
get_chunk_padded_offset/1, get_chunk_metadata_range/3,
get_merkle_rebase_threshold/0, should_store_in_chunk_storage/3]).
Expand Down Expand Up @@ -529,6 +530,16 @@ get_chunk_by_byte(ChunksIndex, Byte) ->
{ok, Key, FullMetaData}
end.

%% @doc: handle situation where get_chunks_by_byte returns invalid_iterator, so we can't
%% use the chunk's end offset to advance the cursor.
%%
%% get_chunk_by_byte looks for a key with the same prefix or the next prefix.
%% Therefore, if there is no such key, it does not make sense to look for any
%% key smaller than the prefix + 2 in the next iteration.
advance_chunks_index_cursor(Cursor) ->
PrefixSpaceSize = trunc(math:pow(2, ?OFFSET_KEY_BITSIZE - ?OFFSET_KEY_PREFIX_BITSIZE)),
NextCursor = ((Cursor div PrefixSpaceSize) + 2) * PrefixSpaceSize.

read_chunk(Offset, ChunkDataDB, ChunkDataKey, StoreID) ->
case ar_kv:get(ChunkDataDB, ChunkDataKey) of
not_found ->
Expand Down Expand Up @@ -1270,12 +1281,7 @@ handle_cast({remove_range, End, Cursor, Ref, PID}, State) ->
end,
{noreply, State};
{error, invalid_iterator} ->
%% get_chunk_by_byte looks for a key with the same prefix or the next prefix.
%% Therefore, if there is no such key, it does not make sense to look for any
%% key smaller than the prefix + 2 in the next iteration.
PrefixSpaceSize =
trunc(math:pow(2, ?OFFSET_KEY_BITSIZE - ?OFFSET_KEY_PREFIX_BITSIZE)),
NextCursor = ((Cursor div PrefixSpaceSize) + 2) * PrefixSpaceSize,
NextCursor = advance_chunks_index_cursor(Cursor),
gen_server:cast(self(), {remove_range, End, NextCursor, Ref, PID}),
{noreply, State};
{error, Reason} ->
Expand Down
5 changes: 4 additions & 1 deletion apps/arweave/src/ar_verify_chunks.erl
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ verify_chunks({IntervalEnd, IntervalStart}, Intervals, State) ->

verify_chunk({error, Reason}, _Intervals, State) ->
#state{ cursor = Cursor } = State,
log_error(get_chunk_error, Cursor, ?DATA_CHUNK_SIZE, [{reason, Reason}], State);
NextCursor = ar_data_sync:advance_chunks_index_cursor(Cursor),
RangeSkipped = NextCursor - Cursor,
State2 = log_error(get_chunk_error, Cursor, RangeSkipped, [{reason, Reason}], State),
State2#state{ cursor = NextCursor };
verify_chunk({ok, _Key, MetaData}, Intervals, State) ->
{AbsoluteOffset, _ChunkDataKey, _TXRoot, _DataRoot, _TXPath,
_TXRelativeOffset, ChunkSize} = MetaData,
Expand Down

0 comments on commit 2c75cec

Please sign in to comment.