forked from simdjson/simdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlsx.cpp
136 lines (111 loc) · 4.54 KB
/
lsx.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#ifndef SIMDJSON_SRC_LSX_CPP
#define SIMDJSON_SRC_LSX_CPP
#ifndef SIMDJSON_CONDITIONAL_INCLUDE
#include <base.h>
#endif // SIMDJSON_CONDITIONAL_INCLUDE
#include <simdjson/lsx.h>
#include <simdjson/lsx/implementation.h>
#include <simdjson/lsx/begin.h>
#include <generic/amalgamated.h>
#include <generic/stage1/amalgamated.h>
#include <generic/stage2/amalgamated.h>
//
// Stage 1
//
namespace simdjson {
namespace lsx {
simdjson_warn_unused error_code implementation::create_dom_parser_implementation(
size_t capacity,
size_t max_depth,
std::unique_ptr<internal::dom_parser_implementation>& dst
) const noexcept {
dst.reset( new (std::nothrow) dom_parser_implementation() );
if (!dst) { return MEMALLOC; }
if (auto err = dst->set_capacity(capacity))
return err;
if (auto err = dst->set_max_depth(max_depth))
return err;
return SUCCESS;
}
namespace {
using namespace simd;
simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
// Inspired by haswell.
// LSX use low 5 bits as index. For the 6 operators (:,[]{}), the unique-5bits is [6:2].
// The ASCII white-space and operators have these values: (char, hex, unique-5bits)
// (' ', 20, 00000) ('\t', 09, 01001) ('\n', 0A, 01010) ('\r', 0D, 01101)
// (',', 2C, 01011) (':', 3A, 01110) ('[', 5B, 10110) ('{', 7B, 11110) (']', 5D, 10111) ('}', 7D, 11111)
const simd8<uint8_t> ws_table = simd8<uint8_t>::repeat_16(
' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0
);
const simd8<uint8_t> op_table_lo = simd8<uint8_t>::repeat_16(
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, ':', 0
);
const simd8<uint8_t> op_table_hi = simd8<uint8_t>::repeat_16(
0, 0, 0, 0, 0, 0, '[', ']', 0, 0, 0, 0, 0, 0, '{', '}'
);
uint64_t ws = in.eq({
in.chunks[0].lookup_16(ws_table),
in.chunks[1].lookup_16(ws_table),
in.chunks[2].lookup_16(ws_table),
in.chunks[3].lookup_16(ws_table)
});
uint64_t op = in.eq({
__lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[0].shr<2>()),
__lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[1].shr<2>()),
__lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[2].shr<2>()),
__lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[3].shr<2>())
});
return { ws, op };
}
simdjson_inline bool is_ascii(const simd8x64<uint8_t>& input) {
return input.reduce_or().is_ascii();
}
simdjson_inline simd8<uint8_t> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return is_third_byte | is_fourth_byte;
}
} // unnamed namespace
} // namespace lsx
} // namespace simdjson
//
// Stage 2
//
//
// Implementation-specific overrides
//
namespace simdjson {
namespace lsx {
simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept {
return lsx::stage1::json_minifier::minify<64>(buf, len, dst, dst_len);
}
simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept {
this->buf = _buf;
this->len = _len;
return lsx::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming);
}
simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
return lsx::stage1::generic_validate_utf8(buf,len);
}
simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
return stage2::tape_builder::parse_document<false>(*this, _doc);
}
simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
return stage2::tape_builder::parse_document<true>(*this, _doc);
}
simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept {
return lsx::stringparsing::parse_string(src, dst, allow_replacement);
}
simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept {
return lsx::stringparsing::parse_wobbly_string(src, dst);
}
simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {
auto error = stage1(_buf, _len, stage1_mode::regular);
if (error) { return error; }
return stage2(_doc);
}
} // namespace lsx
} // namespace simdjson
#include <simdjson/lsx/end.h>
#endif // SIMDJSON_SRC_LSX_CPP