forked from simdjson/simdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFuzzUtils.h
166 lines (141 loc) · 4.97 KB
/
FuzzUtils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#ifndef SIMDJSON_FUZZUTILS_H
#define SIMDJSON_FUZZUTILS_H
#include <cstdint>
#include <vector>
#include <string_view>
#include <cstring> //memcpy
// view data as a byte pointer
template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
}
// view data as a char pointer
template <typename T> inline const char* as_chars(const T* data) {
return static_cast<const char*>(static_cast<const void*>(data));
}
// Splits the input into strings, using a four byte separator which is human
// readable. Makes for nicer debugging of fuzz data.
// See https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#magic-separator
// for background. Note: don't use memmem, it is not standard C++.
inline std::vector<std::string_view> split(const char* Data, size_t Size) {
std::vector<std::string_view> ret;
using namespace std::literals;
constexpr auto sep="\n~~\n"sv;
std::string_view all(Data,Size);
auto pos=all.find(sep);
while(pos!=std::string_view::npos) {
ret.push_back(all.substr(0,pos));
all=all.substr(pos+sep.size());
pos=all.find(sep);
}
ret.push_back(all);
return ret;
}
// Generic helper to split fuzz data into usable parts, like ints etc.
// Note that it does not throw, instead it sets the data pointer to null
// if the input is exhausted.
struct FuzzData {
// data may not be null, even if size is zero.
FuzzData(const uint8_t* data,
size_t size) : Data(data),Size(size){}
///range is inclusive
template<int Min, int Max>
int getInt() {
static_assert (Min<Max,"min must be <max");
// make this constexpr, can't overflow because that is UB and is forbidden
// in constexpr evaluation
constexpr int range=(Max-Min)+1;
constexpr unsigned int urange=range;
// don't use std::uniform_int_distribution, we don't want to pay for
// over consumption of random data. Accept the slightly non-uniform distribution.
if(range<256)
return Min+static_cast<int>(get<uint8_t>()%urange);
if(range<65536)
return Min+static_cast<int>(get<uint16_t>()%urange);
return Min+static_cast<int>(get<uint32_t>()%urange);
}
template<typename T>
T get() {
const auto Nbytes=sizeof(T);
T ret{};
if(Size<Nbytes) {
//don't throw, signal with null instead.
Data=nullptr;
Size=0;
return ret;
}
std::memcpy(&ret,Data,Nbytes);
Data+=Nbytes;
Size-=Nbytes;
return ret;
}
// gets a string view with length in [Min,Max]
template<int Min, int Max>
std::string_view get_stringview() {
static_assert (Min>=0,"Min must be positive");
const int len=getInt<Min,Max>();
const unsigned int ulen=static_cast<unsigned int>(len);
if(ulen<Size) {
std::string_view ret(chardata(),ulen);
Data+=len;
Size-=ulen;
return ret;
}
//mark that there is too little data to fulfill the request
Data=nullptr;
Size=0;
return {};
}
// consumes the rest of the data as a string view
std::string_view remainder_as_stringview() {
std::string_view ret{chardata(),Size};
Data+=Size;
Size=0;
return ret;
}
// split the remainder of the data into string views,
std::vector<std::string_view> splitIntoStrings() {
std::vector<std::string_view> ret;
if(Size>0) {
ret=split(chardata(),Size);
// all data consumed.
Data+=Size;
Size=0;
}
return ret;
}
//are we good?
explicit operator bool() const { return Data!=nullptr;}
//we are a URBG
// https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator
//The type G satisfies UniformRandomBitGenerator if Given
// T, the type named by G::result_type
// g, a value of type G
//
// The following expressions must be valid and have their specified effects
// Expression Return type Requirements
// G::result_type T T is an unsigned integer type
using result_type=uint8_t;
// G::min() T Returns the smallest value that G's operator() may return. The value is strictly less than G::max(). The function must be constexpr.
static constexpr result_type min() {return 0;}
// G::max() T Returns the largest value that G's operator() may return. The value is strictly greater than G::min(). The function must be constexpr.
static constexpr result_type max() {return 255;}
// g() T Returns a value in the closed interval [G::min(), G::max()]. Has amortized constant complexity.
result_type operator()() {
if(Size==0) {
// return something varying, otherwise uniform_int_distribution may get
// stuck
return failcount++;
}
const result_type ret=Data[0];
Data++;
Size--;
return ret;
}
// returns a pointer to data as const char* to avoid those cstyle casts
const char* chardata() const {return static_cast<const char*>(static_cast<const void*>(Data));}
// members
const uint8_t* Data;
size_t Size;
uint8_t failcount=0;
};
#endif // SIMDJSON_FUZZUTILS_H