Generated by https://github.com/ocxtal/insn_bench_aarch64 (commit: unknown).
measuring CPU frequency, assuming latency of 64bit addition is 1 cycle(s):
- 3199.79 MHz
- 3199.83 MHz
- 3199.73 MHz
instruction | latency | throughput |
---|---|---|
ldr (imm; ofs = 0) | 3.00 | 3.00 |
ldr (imm; ofs = 16) | 3.00 | 3.00 |
ldr (imm; pre, ofs = 0) | 3.00 | 3.00 |
ldr (imm; pre, ofs = 16) | 3.00 | - |
ldr (imm; post, ofs = 0) | 3.00 | 3.00 |
ldr (imm; post, ofs = 16) | 3.00 | - |
ldr (ptr fwd.; imm; pre, ofs = 16) | 1.00 | - |
ldr (ptr fwd.; imm; post, ofs = 16) | 1.00 | - |
ldr (imm; ofs = 0; unaligned) | 4.00 | 3.00 |
ldr (imm; ofs = 16; unaligned) | 4.00 | 3.00 |
ldr (imm; ofs = 0; cross-cache) | 4.01 | 3.00 |
ldr (imm; ofs = 0; cross-page) | 31.00 | 0.03 |
ldr (reg) | 4.00 | 3.00 |
ldr (reg; <<3) | 4.00 | 3.00 |
ldr (literal) | - | 3.00 |
ldr (reg; unaligned) | 4.00 | 3.00 |
ldr (reg; <<3; unaligned) | 4.00 | 3.00 |
ldrb (imm; ofs = 0) | 4.00 | 2.94 |
ldrb (imm; ofs = 16) | 4.00 | 2.95 |
ldrb (reg) | 3.00 | 3.00 |
ldrb (imm; ofs = 0; unaligned) | 4.00 | 2.94 |
ldrb (imm; ofs = 16; unaligned) | 4.01 | 2.95 |
ldrb (reg; unaligned) | 4.00 | 3.00 |
ldrsb (imm; ofs = 0) | 4.00 | 2.95 |
ldrsb (imm; ofs = 16) | 4.00 | 2.93 |
ldrsb (reg) | 4.00 | 3.00 |
ldrsb (imm; ofs = 0; unaligned) | 4.00 | 2.94 |
ldrsb (imm; ofs = 16; unaligned) | 4.00 | 2.93 |
ldrsb (reg; unaligned) | 4.01 | 3.00 |
ldrh (imm; ofs = 0) | 4.00 | 2.94 |
ldrh (imm; ofs = 16) | 4.00 | 2.95 |
ldrh (reg) | 3.00 | 3.00 |
ldrh (imm; ofs = 0; unaligned) | 4.00 | 2.94 |
ldrh (imm; ofs = 16; unaligned) | 4.00 | 2.93 |
ldrh (reg; unaligned) | 4.00 | 3.00 |
ldrsh (imm; ofs = 0) | 4.01 | 2.95 |
ldrsh (imm; ofs = 16) | 4.00 | 2.94 |
ldrsh (reg) | 4.00 | 3.00 |
ldrsh (imm; ofs = 0; unaligned) | 4.00 | 2.94 |
ldrsh (imm; ofs = 16; unaligned) | 4.00 | 2.94 |
ldrsh (reg; unaligned) | 4.00 | 3.00 |
ldrsw (imm; ofs = 0) | 4.00 | 2.95 |
ldrsw (imm; ofs = 16) | 4.01 | 2.93 |
ldrsw (reg) | 4.00 | 3.00 |
ldrsw (literal) | - | 3.00 |
ldrsw (imm; ofs = 0; unaligned) | 4.00 | 2.93 |
ldrsw (imm; ofs = 16; unaligned) | 4.00 | 2.94 |
ldrsw (reg; unaligned) | 4.00 | 3.00 |
ldur (ofs = 0) | 3.00 | 3.00 |
ldur (ofs = 16) | 3.00 | 3.00 |
ldur (ofs = 0; unaligned) | 4.00 | 3.00 |
ldur (ofs = 16; unaligned) | 4.00 | 3.00 |
ldurb (ofs = 0) | 4.00 | 2.94 |
ldurb (ofs = 16) | 4.00 | 2.95 |
ldurb (ofs = 0; unaligned) | 4.00 | 2.95 |
ldurb (ofs = 16; unaligned) | 4.00 | 2.96 |
ldurh (ofs = 0) | 4.00 | 2.94 |
ldurh (ofs = 16) | 4.00 | 2.95 |
ldurh (ofs = 0; unaligned) | 4.00 | 2.96 |
ldurh (ofs = 16; unaligned) | 4.00 | 2.96 |
ldursb (ofs = 0) | 4.00 | 2.94 |
ldursb (ofs = 16) | 4.00 | 2.94 |
ldursb (ofs = 0; unaligned) | 4.00 | 2.95 |
ldursb (ofs = 16; unaligned) | 4.00 | 2.94 |
ldursh (ofs = 0) | 4.00 | 2.96 |
ldursh (ofs = 16) | 4.00 | 2.94 |
ldursh (ofs = 0; unaligned) | 4.00 | 2.95 |
ldursh (ofs = 16; unaligned) | 4.00 | 2.96 |
ldursw (ofs = 0) | 4.00 | 2.96 |
ldursw (ofs = 16) | 4.00 | 2.94 |
ldursw (ofs = 0; unaligned) | 4.00 | 2.95 |
ldursw (ofs = 16; unaligned) | 4.00 | 2.95 |
ldp (x; ofs = 0; 1st elem) | 3.00 | 2.98 |
ldp (x; ofs = 0; 2nd elem) | 4.00 | 2.98 |
ldp (w; ofs = 0; 1st elem) | 4.00 | 2.43 |
ldp (w; ofs = 0; 2nd elem) | 4.00 | 2.43 |
ldp (x; ofs = 0; 1st elem; unaligned) | 4.00 | 2.93 |
ldp (x; ofs = 0; 2nd elem; unaligned) | 4.00 | 2.88 |
ldp (w; ofs = 0; 1st elem; unaligned) | 4.00 | 2.43 |
ldp (w; ofs = 0; 2nd elem; unaligned) | 4.00 | 2.43 |
ldpsw (ofs = 0; 1st elem) | 4.00 | 2.43 |
ldpsw (ofs = 0; 2nd elem) | 4.00 | 2.43 |
ldpsw (ofs = 0; 1st elem; unaligned) | 4.00 | 2.43 |
ldpsw (ofs = 0; 2nd elem; unaligned) | 4.00 | 2.43 |
ldnp | - | 0.89 |
instruction | latency | throughput |
---|---|---|
str (imm; ofs = 0) | - | 2.00 |
str (imm; ofs = 16) | - | 2.00 |
str (imm; pre, ofs = 0) | - | 2.00 |
str (imm; pre, ofs = 16) | - | 2.00 |
str (imm; post, ofs = 0) | - | 2.00 |
str (imm; post, ofs = 16) | - | 2.00 |
str (imm; ofs = 0; unaligned) | - | 2.00 |
str (imm; ofs = 16; unaligned) | - | 2.00 |
str (imm; pre, ofs = 0; unaligned) | - | 2.00 |
str (imm; pre, ofs = 16; unaligned) | - | 2.00 |
str (imm; post, ofs = 0; unaligned) | - | 2.00 |
str (imm; post, ofs = 16; unaligned) | - | 2.00 |
str (imm; ofs = 0; cross-cache) | - | 2.00 |
str (imm; pre, ofs = 0; cross-cache) | - | 2.00 |
str (imm; post, ofs = 0; cross-cache) | - | 2.00 |
str (imm; ofs = 0; cross-page) | - | 0.03 |
str (imm; pre, ofs = 0; cross-page) | - | 0.03 |
str (imm; post, ofs = 0; cross-page) | - | 0.03 |
str (reg) | - | 2.00 |
str (reg; <<3) | - | 2.00 |
strb (reg) | - | 2.00 |
strh (reg) | - | 2.00 |
stur (imm; ofs = 0) | - | 2.00 |
sturb (imm; ofs = 0) | - | 2.00 |
sturh (imm; ofs = 0) | - | 2.00 |
stp (ofs = 0) | - | 2.00 |
instruction | latency | throughput |
---|---|---|
str -> ldr (ofs = 0 -> ofs = 0) | 5.82 | 1.72 |
str -> ldur (ofs = 0 -> ofs = 1) | 5.86 | 1.81 |
stur -> ldr (ofs = 1 -> ofs = 1) | 5.85 | 1.81 |
stur -> ldur (ofs = 1 -> ofs = 1) | 5.84 | 1.73 |
stur -> ldur (cross-cache -> aligned) | 5.84 | 1.79 |
stur -> ldur (aligned -> cross-cache) | 7.22 | 1.75 |
stur -> ldur (cross-cache -> cross-cache) | 7.23 | 1.76 |
stp -> ldp (ofs = 0 -> ofs = 0) | 6.93 | 1.17 |
stp -> ldp (ofs = 0 -> ofs = 0; swap) | 6.97 | 1.17 |
stp -> ldp (ofs = 0 -> ofs = 8) | 6.99 | 1.07 |
stp -> ldp (ofs = 8 -> ofs = 0) | 7.00 | 1.07 |
instruction | latency | throughput |
---|---|---|
b (pc+4) | 1.01 | 0.99 |
b (pc+8) | 1.01 | 0.99 |
b (pc+4) // add (chain) | 1.01 | 0.99 |
b (pc+4) // add x 2 (chain) | 2.00 | 0.50 |
adr -> br (pc+4) | 1.01 | 0.99 |
adr -> br (pc+4) // add (chain) | 1.01 | 0.99 |
adr -> br (pc+4) // add x 2 (chain) | 2.00 | 0.50 |
bl-ret | 2.01 | 0.50 |
bl-ret // add (chain) | 2.01 | 0.50 |
bl-ret // add x 2 (chain) | 2.01 | 0.50 |
bl-ret // add x 3 (chain) | 3.00 | 0.33 |
blr-ret | 2.01 | 0.50 |
blr-ret // add (chain) | 2.01 | 0.50 |
blr-ret // add x 2 (chain) | 2.01 | 0.50 |
blr-ret // add x 3 (chain) | 3.00 | 0.33 |
cbz (pc+4; taken) | 1.01 | 0.99 |
cbz (pc+4; taken) // add (chain) | 1.01 | 0.99 |
cbz (pc+4; taken) // add x 2 (chain) | 2.00 | 0.50 |
cbz (pc+4; taken) // b (pc+4) | - | 0.50 |
cbz (pc+4; not taken) | 0.50 | 1.99 |
cbz (pc+4; not taken) // add (chain) | 1.00 | 1.00 |
cbz (pc+4; not taken) // add x 2 (chain) | 2.00 | 0.50 |
cbz (pc+4; not taken) // b (pc+4) | - | 1.00 |
cbnz (pc+4; taken) | 1.01 | 0.99 |
cbnz (pc+4; not taken) | 0.50 | 1.99 |
tbz (pc+4; taken) | 1.01 | 0.99 |
tbz (pc+4; not taken) | 0.50 | 1.99 |
tbnz (pc+4; taken) | 1.01 | 0.99 |
tbnz (pc+4; not taken) | 0.50 | 1.99 |
adds -> b.eq (pc+4; taken) | 1.01 | 0.99 |
adds -> b.eq (pc+4; not taken) | 1.11 | 0.90 |
fcmp -> b.eq (pc+4; taken) | 1.01 | 0.99 |
fcmp -> b.ne (pc+4; not taken) | 1.00 | 1.00 |
and -> cbz (pc+4; full random) | 6.98 | 0.14 |
and -> cbnz (pc+4; full random) | 6.97 | 0.14 |
and -> cbz (pc+4; full random) // add (chain) | 7.25 | 0.14 |
and -> cbnz (pc+4; full random) // add (chain) | 7.24 | 0.14 |
and -> cbz (pc+4; full random) // add x 2 (chain) | 7.48 | 0.13 |
and -> cbnz (pc+4; full random) // add x 2 (chain) | 7.47 | 0.13 |
tbz (pc+4; full random) | 6.88 | 0.15 |
tbnz (pc+4; full random) | 6.87 | 0.15 |
tbz (pc+4; full random) // add (chain) | 6.99 | 0.14 |
tbnz (pc+4; full random) // add (chain) | 6.97 | 0.14 |
tbz (pc+4; full random) // add x 2 (chain) | 7.27 | 0.14 |
tbnz (pc+4; full random) // add x 2 (chain) | 7.25 | 0.14 |
tbz (pc+4; full random) | 13.00 | 0.08 |
tbnz (pc+4; full random) | 13.00 | 0.08 |
tbz (pc+4; full random) // add (chain) | 12.75 | 0.08 |
tbnz (pc+4; full random) // add (chain) | 12.76 | 0.08 |
tbz (pc+4; full random) // add x 2 (chain) | 12.66 | 0.08 |
tbnz (pc+4; full random) // add x 2 (chain) | 12.63 | 0.08 |
instruction | latency | throughput |
---|---|---|
nop | - | 8.00 |
mov (x -> x) | 0.13 | 8.00 |
mov (x -> x; chain) | - | 7.90 |
mov (v.b -> v.b) | 1.75 | 8.00 |
mov (v.b -> v.b; chain) | - | 0.57 |
mov / movz (imm; 0x00) | - | 8.00 |
mov / movz (imm; 0x1ffc) | - | 7.65 |
mov / movz (imm; 0x1ffc<<16) | - | 7.65 |
mov (mask imm; 0x1ffffffffffc) | - | 7.64 |
mov / movn (imm; 0x1ffc) | - | 7.65 |
mov / movn (imm; 0x1ffc<<16) | - | 7.66 |
movk (0x00) | - | 6.00 |
movk (0x1ffc) | - | 6.00 |
movk (0x1ffc<<16) | - | 6.00 |
eor (reg; clearing idiom) | 1.00 | 6.00 |
sub (reg; clearing idiom) | 1.00 | 6.00 |
eor.b (clearing idiom) | 2.00 | 4.00 |
sub.b (clearing idiom) | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
add (reg) | 1.00 | 6.01 |
add (reg<<2) | 2.00 | 3.00 |
add (reg<<17) | 2.00 | 3.00 |
add (reg>>17) | 2.00 | 3.00 |
add (reg>>17; signed) | 2.00 | 3.00 |
add (imm) | 1.00 | 6.00 |
add (imm<<12) | 1.00 | 6.00 |
adds (reg) | 1.00 | 3.00 |
adds (reg<<2) | 2.00 | 1.50 |
adds (reg<<17) | 2.00 | 1.50 |
adds (reg>>17) | 2.00 | 1.50 |
adds (imm) | 1.00 | 3.00 |
adds (imm<<12) | 1.00 | 3.00 |
adc | 1.00 | 3.00 |
adcs | 1.00 | 2.63 |
sub (reg) | 1.00 | 6.00 |
sub (reg<<2) | 2.00 | 3.00 |
sub (imm) | 1.00 | 6.01 |
sub (imm<<12) | 1.00 | 6.00 |
subs (reg) | 1.00 | 3.00 |
subs (reg<<2) | 2.00 | 1.50 |
subs (imm) | 1.00 | 3.00 |
subs (imm<<12) | 1.00 | 3.00 |
sbc | 1.00 | 3.00 |
sbcs | 1.00 | 2.63 |
sub | - | 6.00 |
adr | - | 2.00 |
adrp | - | 2.00 |
neg (reg) | 1.00 | 6.01 |
neg (reg<<2) | 2.00 | 3.00 |
negs (reg) | 1.00 | 3.00 |
negs (reg<<2) | 2.00 | 1.50 |
ngc | 1.00 | 3.00 |
ngcs | 1.00 | 2.63 |
instruction | latency | throughput |
---|---|---|
mul | 3.00 | 2.00 |
mneg | 3.00 | 2.00 |
madd | 3.00 | 1.00 |
msub | 3.00 | 1.00 |
smull | 3.00 | 2.00 |
smnegl | 3.00 | 2.00 |
smaddl | 3.00 | 1.00 |
smsubl | 3.00 | 1.00 |
smulh | 3.00 | 2.00 |
umull | 3.00 | 2.00 |
umnegl | 3.00 | 2.00 |
umaddl | 3.00 | 1.00 |
umsubl | 3.00 | 1.00 |
umulh | 3.00 | 2.00 |
instruction | latency | throughput |
---|---|---|
sdiv | 7.00 | 0.50 |
udiv | 7.00 | 0.50 |
instruction | latency | throughput |
---|---|---|
sxtb | 1.00 | 6.00 |
sxth | 1.00 | 6.01 |
sxtw | 1.00 | 6.00 |
uxtb | 1.00 | 6.00 |
uxth | 1.00 | 6.00 |
instruction | latency | throughput |
---|---|---|
lsl (reg) / lslv | 1.00 | 6.00 |
lsl (imm) | 1.00 | 6.00 |
lsr (reg) / lsrv | 1.00 | 6.00 |
lsr (imm) | 1.00 | 6.00 |
asr (reg) / asrv | 1.00 | 6.00 |
asr (imm) | 1.00 | 6.00 |
ror (imm) | 1.00 | 6.00 |
ror (reg) / rorv | 1.00 | 6.01 |
extr (imm; >>1) | 1.00 | 6.00 |
extr (imm; >>17) | 1.00 | 6.00 |
bfc | 1.00 | 1.00 |
bfi | 1.00 | 1.00 |
bfm | 1.00 | 1.00 |
bfxil | 1.00 | 1.00 |
sbfm | 1.00 | 6.00 |
sbfx | 1.00 | 6.00 |
sbfiz | 1.00 | 6.00 |
ubfm | 1.00 | 6.00 |
ubfx | 1.00 | 6.01 |
ubfiz | 1.00 | 6.00 |
bic (reg) | 1.00 | 6.00 |
bic (reg<<2) | 2.00 | 3.00 |
bics (reg) | 1.00 | 3.00 |
bics (reg<<2) | 2.00 | 1.50 |
rbit | 1.00 | 6.00 |
rev (rev16) | 1.00 | 6.00 |
rev (rev32) | 1.00 | 6.00 |
rev (rev64) | 1.00 | 6.00 |
clz | 1.00 | 6.00 |
cls | 1.00 | 6.00 |
instruction | latency | throughput |
---|---|---|
and (reg) | 1.00 | 6.00 |
and (reg<<2) | 2.00 | 3.00 |
and (reg<<17) | 2.00 | 3.00 |
and (reg>>17) | 2.00 | 3.00 |
and (reg>>17; signed) | 2.00 | 3.00 |
and (reg>>17; rotate) | 2.00 | 3.00 |
and (mask imm) | 1.00 | 6.00 |
ands (reg) | 1.00 | 3.00 |
ands (reg<<2) | 2.00 | 1.50 |
ands (reg<<17) | 2.00 | 1.50 |
ands (reg>>17) | 2.00 | 1.50 |
ands (reg>>17; signed) | 2.00 | 1.50 |
ands (reg>>17; rotate) | 2.00 | 1.50 |
ands (mask imm) | 1.00 | 3.00 |
orr (reg) | 1.00 | 5.99 |
orr (reg<<2) | 2.00 | 3.00 |
orr (mask imm) | 1.00 | 6.00 |
orn (reg) | 1.00 | 6.01 |
orn (reg<<2) | 2.00 | 3.00 |
eor (reg) | 1.00 | 6.00 |
eor (reg<<2) | 2.00 | 3.00 |
eon (reg) | 1.00 | 6.00 |
eon (reg<<2) | 2.00 | 3.00 |
mvn (reg) | 1.00 | 6.00 |
mvn (reg<<2) | 2.00 | 3.00 |
instruction | latency | throughput |
---|---|---|
ccmn (reg; eq) | 0.99 | 2.66 |
ccmn (reg; lt) | 1.00 | 2.66 |
ccmn (imm; eq) | 0.99 | 2.66 |
ccmn (imm; lt) | 0.99 | 2.66 |
ccmp (reg; eq) | 0.99 | 2.66 |
ccmp (reg; lt) | 1.00 | 2.64 |
ccmp (imm; eq) | 1.00 | 2.65 |
ccmp (imm; lt) | 0.99 | 2.66 |
tst (reg) | 0.99 | 2.67 |
tst (reg<<2) | 1.99 | 1.32 |
tst (imm) | 0.99 | 2.67 |
rmif | 0.99 | 2.66 |
setf8 | 0.99 | 2.64 |
setf16 | 1.00 | 2.65 |
cfinv | 1.00 | 2.65 |
instruction | latency | throughput |
---|---|---|
csinc (eq) | 1.00 | 3.00 |
csinc (lt) | 1.00 | 3.00 |
cinc (eq) | 1.00 | 3.00 |
cinc (lt) | 1.00 | 3.00 |
csinv (eq) | 1.00 | 3.00 |
csinv (lt) | 1.00 | 3.00 |
cset (eq) | - | 3.00 |
cset (lt) | - | 3.00 |
csetm (eq) | - | 3.00 |
csetm (lt) | - | 3.00 |
cinv (eq) | 1.00 | 3.00 |
cinv (lt) | 1.00 | 3.00 |
csneg (eq) | 1.00 | 3.00 |
csneg (lt) | 1.00 | 3.00 |
cneg (eq) | 1.00 | 3.00 |
cneg (lt) | 1.00 | 3.00 |
instruction | latency | throughput |
---|---|---|
crc32x | 3.00 | 1.00 |
crc32cx | 3.00 | 1.00 |
sha1c | 4.99 | 0.25 |
sha1h | 2.00 | 1.00 |
sha1m | 5.01 | 0.25 |
sha1p | 5.00 | 0.25 |
sha1su0 | 2.00 | 1.00 |
sha1su1 | 2.00 | 1.00 |
sha256h | 5.00 | 0.50 |
sha256h2 | 5.00 | 0.50 |
sha256su0 | 2.00 | 1.00 |
sha256su1 | 3.00 | 1.00 |
sha512h | 3.00 | 0.50 |
sha512h2 | 3.00 | 0.50 |
sha512su0 | 2.00 | 1.00 |
sha512su1 | 2.00 | 1.00 |
aese | 3.00 | 4.01 |
aesd | 3.00 | 4.01 |
aesmc | 2.00 | 4.01 |
aesimc | 2.00 | 4.01 |
sm3partw1 | n/a | n/a |
sm3partw2 | n/a | n/a |
sm3ss1 | n/a | n/a |
sm3tt1a ([0]) | n/a | n/a |
sm3tt1a ([3]) | n/a | n/a |
sm3tt1b ([0]) | n/a | n/a |
sm3tt1b ([3]) | n/a | n/a |
sm3tt2a ([0]) | n/a | n/a |
sm3tt2a ([3]) | n/a | n/a |
sm3tt2b ([0]) | n/a | n/a |
sm3tt2b ([3]) | n/a | n/a |
sm4e | n/a | n/a |
sm4ekey | n/a | n/a |
instruction | latency | throughput |
---|---|---|
casal | 21.06 | 0.05 |
caspal | 18.02 | 0.06 |
casalb | 21.05 | 0.05 |
casalh | 21.06 | 0.05 |
ldaddal | 21.99 | 0.05 |
ldaddalb | 22.00 | 0.05 |
ldaddalh | 22.00 | 0.05 |
ldclral | 21.99 | 0.05 |
ldclralb | 22.00 | 0.05 |
ldclralh | 22.00 | 0.05 |
ldsetal | 21.99 | 0.05 |
ldsetalb | 21.99 | 0.05 |
ldsetalh | 22.00 | 0.05 |
ldeoral | 21.99 | 0.05 |
ldeoralb | 21.99 | 0.05 |
ldeoralh | 22.00 | 0.05 |
ldumaxal | 20.98 | 0.05 |
ldumaxalb | 21.00 | 0.05 |
ldumaxalh | 20.99 | 0.05 |
ldsmaxal | 20.99 | 0.05 |
ldsmaxalb | 20.99 | 0.05 |
ldsmaxalh | 21.00 | 0.05 |
lduminal | 20.99 | 0.05 |
lduminalb | 21.00 | 0.05 |
lduminalh | 20.99 | 0.05 |
ldsminal | 21.00 | 0.05 |
ldsminalb | 20.99 | 0.05 |
ldsminalh | 20.99 | 0.05 |
instruction | latency | throughput |
---|---|---|
ldr.q (imm; ofs = 0) | 5.00 | 3.00 |
ldr.q (imm; ofs = 16) | 5.00 | 3.00 |
ldr.q (imm; ofs = 0; unaligned) | 5.00 | 3.00 |
ldr.q (imm; ofs = 16; unaligned) | 5.00 | 3.00 |
ldur.q (imm; ofs = 0) | 5.00 | 3.00 |
ldur.q (imm; ofs = 16) | 5.00 | 3.00 |
ldur.q (imm; ofs = 0; unaligned) | 5.00 | 3.00 |
ldur.q (imm; ofs = 16; unaligned) | 5.00 | 3.00 |
ldp.q (ofs = 0; 1st elem) | 5.00 | 1.50 |
ldp.q (ofs = 0; 2nd elem) | 5.08 | 1.50 |
ldp.q (ofs = 0; unaligned) | 5.08 | 1.50 |
ldnp.q | - | 1.50 |
ld1.b (multi; 1 reg) | 5.00 | 3.00 |
ld1.b (multi; 2 reg) | 5.08 | 1.50 |
ld1.b (multi; 3 reg) | 5.56 | 1.00 |
ld1.b (multi; 4 reg) | 5.99 | 0.75 |
ld1.h (multi; 1 reg) | 5.00 | 3.00 |
ld1.h (multi; 2 reg) | 5.08 | 1.50 |
ld1.h (multi; 3 reg) | 5.56 | 1.00 |
ld1.h (multi; 4 reg) | 6.00 | 0.75 |
ld1.s (multi; 1 reg) | 5.00 | 3.00 |
ld1.s (multi; 2 reg) | 5.08 | 1.50 |
ld1.s (multi; 3 reg) | 5.57 | 1.00 |
ld1.s (multi; 4 reg) | 6.00 | 0.75 |
ld1.d (multi; 1 reg) | 4.99 | 3.00 |
ld1.d (multi; 2 reg) | 5.09 | 1.50 |
ld1.d (multi; 3 reg) | 5.56 | 1.00 |
ld1.d (multi; 4 reg) | 5.99 | 0.75 |
ld2.b (multi) | 7.00 | 1.00 |
ld3.b (multi) | 7.46 | 1.00 |
ld4.b (multi) | 10.19 | 0.50 |
ld2.h (multi) | 7.00 | 1.00 |
ld3.h (multi) | 7.47 | 1.00 |
ld4.h (multi) | 10.19 | 0.50 |
ld2.s (multi) | 7.00 | 1.00 |
ld3.s (multi) | 7.46 | 1.00 |
ld4.s (multi) | 10.19 | 0.50 |
ld2.d (multi) | 7.01 | 1.00 |
ld3.d (multi) | 7.47 | 1.00 |
ld4.d (multi) | 10.19 | 0.50 |
ld1.b (single; [15]) | 7.00 | 3.00 |
ld2.b (single; [15]) | 6.99 | 1.99 |
ld3.b (single; [15]) | 7.47 | 1.00 |
ld4.b (single; [15]) | 7.30 | 0.96 |
ld1.h (single; [7]) | 6.99 | 3.00 |
ld2.h (single; [7]) | 7.00 | 1.99 |
ld3.h (single; [7]) | 7.48 | 1.00 |
ld4.h (single; [7]) | 7.32 | 0.98 |
ld1.s (single; [3]) | 7.00 | 3.00 |
ld2.s (single; [3]) | 7.00 | 2.00 |
ld3.s (single; [3]) | 7.47 | 1.00 |
ld4.s (single; [3]) | 7.30 | 0.97 |
ld1.d (single; [1]) | 7.00 | 3.00 |
ld2.d (single; [1]) | 6.99 | 2.00 |
ld3.d (single; [1]) | 7.47 | 1.00 |
ld4.d (single; [1]) | 7.80 | 1.00 |
ld1r.b | 7.00 | 3.00 |
ld2r.b | 7.00 | 2.00 |
ld3r.b | 7.47 | 1.00 |
ld4r.b | 7.30 | 1.00 |
ld1r.h | 7.00 | 3.00 |
ld2r.h | 6.99 | 2.00 |
ld3r.h | 7.47 | 1.00 |
ld4r.h | 7.31 | 1.00 |
ld1r.s | 7.00 | 3.00 |
ld2r.s | 7.00 | 2.00 |
ld3r.s | 7.47 | 1.00 |
ld4r.s | 7.30 | 1.00 |
ld1r.d | 6.99 | 3.00 |
ld2r.d | 6.99 | 2.00 |
ld3r.d | 7.47 | 1.00 |
ld4r.d | 7.79 | 1.00 |
instruction | latency | throughput |
---|---|---|
str.q (imm; ofs = 0) | - | 2.00 |
str.q (imm; ofs = 16) | - | 2.00 |
str.q (imm; pre, ofs = 0) | - | 2.00 |
str.q (imm; pre, ofs = 16) | - | 2.00 |
str.q (imm; post, ofs = 0) | - | 2.00 |
str.q (imm; post, ofs = 16) | - | 2.00 |
str.q (imm; ofs = 0; unaligned) | - | 2.00 |
str.q (imm; ofs = 16; unaligned) | - | 2.00 |
str.q (imm; pre, ofs = 0; unaligned) | - | 2.00 |
str.q (imm; pre, ofs = 16; unaligned) | - | 2.00 |
str.q (imm; post, ofs = 0; unaligned) | - | 2.00 |
str.q (imm; post, ofs = 16; unaligned) | - | 2.00 |
stur.q (imm; ofs = 0) | - | 2.00 |
stp.q (ofs = 0) | - | 1.00 |
st1.b (multi) | - | 2.00 |
st2.b (multi) | - | 1.00 |
st3.b (multi) | - | 0.67 |
st4.b (multi) | - | 0.47 |
st1.h (multi) | - | 2.00 |
st2.h (multi) | - | 1.00 |
st3.h (multi) | - | 0.67 |
st4.h (multi) | - | 0.47 |
st1.s (multi) | - | 2.00 |
st2.s (multi) | - | 1.00 |
st3.s (multi) | - | 0.67 |
st4.s (multi) | - | 0.47 |
st1.d (multi) | - | 2.00 |
st2.d (multi) | - | 1.00 |
st3.d (multi) | - | 0.67 |
st4.d (multi) | - | 0.47 |
st1.b (single; [15]) | - | 2.00 |
st2.b (single; [15]) | - | 2.00 |
st3.b (single; [15]) | - | 2.00 |
st4.b (single; [15]) | - | 1.87 |
st1.h (single; [7]) | - | 2.00 |
st2.h (single; [7]) | - | 2.00 |
st3.h (single; [7]) | - | 2.00 |
st4.h (single; [7]) | - | 1.86 |
st1.s (single; [3]) | - | 2.00 |
st2.s (single; [3]) | - | 2.00 |
st3.s (single; [3]) | - | 2.00 |
st4.s (single; [3]) | - | 1.86 |
st1.d (single; [1]) | - | 2.00 |
st2.d (single; [1]) | - | 2.00 |
st3.d (single; [1]) | - | 1.00 |
st4.d (single; [1]) | - | 1.00 |
instruction | latency | throughput |
---|---|---|
str.q -> ldr.q (ofs = 0 -> ofs = 0) | 6.25 | 1.57 |
str.q -> ldr.q (ofs = 0 -> ofs = 1) | 6.27 | 1.58 |
str.q -> ldr.q (ofs = 1 -> ofs = 0) | 6.27 | 1.61 |
str.q -> ldr.q (ofs = 1 -> ofs = 1) | 6.26 | 1.59 |
stp.s -> ldr.q (ofs = 0) | 13.32 | 0.98 |
stp.d -> ldr.q (ofs = 0) | 13.48 | 0.99 |
stp.q -> ldr.q (ofs = 0 -> 1st reg) | 6.50 | 0.74 |
stp.q -> ldr.q (ofs = 16 -> 2nd reg) | 6.49 | 0.64 |
stp.q -> ldr.q (false dep. ofs = 0 -> 2nd reg) | 1.34 | 0.43 |
stp.q -> ldr.q (false dep. ofs = 16 -> 1st reg) | 1.34 | 0.43 |
st1.b (single) -> ldr.q (ofs = 0) | 13.31 | 0.99 |
st2.b (single) -> ldr.q (ofs = 0) | 13.29 | 0.76 |
st3.b (single) -> ldr.q (ofs = 0) | 13.31 | 0.54 |
st4.b (single) -> ldr.q (ofs = 0) | 14.09 | 0.40 |
st1.h (single) -> ldr.q (ofs = 0) | 13.31 | 0.99 |
st2.h (single) -> ldr.q (ofs = 0) | 13.36 | 0.76 |
st3.h (single) -> ldr.q (ofs = 0) | 13.26 | 0.54 |
st4.h (single) -> ldr.q (ofs = 0) | 14.12 | 0.40 |
st1.s (single) -> ldr.q (ofs = 0) | 13.28 | 0.99 |
st2.s (single) -> ldr.q (ofs = 0) | 13.32 | 0.76 |
st3.s (single) -> ldr.q (ofs = 0) | 13.26 | 0.54 |
st4.s (single) -> ldr.q (ofs = 0) | 14.09 | 0.45 |
st1.d (single) -> ldr.q (ofs = 0) | 13.37 | 0.99 |
st2.d (single) -> ldr.q (ofs = 0) | 13.48 | 0.80 |
st3.d (single) -> ldr.q (ofs = 0) | 12.61 | 0.51 |
st4.d (single) -> ldr.q (ofs = 0) | 11.86 | 0.50 |
st1.b (multi; 1 reg) -> ldr.q (1st reg) | 6.25 | 1.60 |
st1.b (multi; 2 regs) -> ldr.q (2nd reg) | 6.49 | 0.73 |
st1.b (multi; 3 regs) -> ldr.q (3rd reg) | 5.90 | 0.45 |
st1.b (multi; 4 regs) -> ldr.q (4th reg) | 5.66 | 0.38 |
st1.b (multi; 2 regs) -> ldr.q (false dep. ofs = 16 -> 1st reg) | 1.40 | 0.73 |
st1.b (multi; 3 regs) -> ldr.q (false dep. ofs = 32 -> 1st reg) | 2.85 | 0.33 |
st1.b (multi; 4 regs) -> ldr.q (false dep. ofs = 48 -> 1st reg) | 2.34 | 0.33 |
st1.b (multi; 2 regs) -> ldr.q (false dep. ofs = 0 -> 2nd reg) | 1.39 | 0.73 |
st1.b (multi; 3 regs) -> ldr.q (false dep. ofs = 0 -> 3rd reg) | 2.97 | 0.33 |
st1.b (multi; 4 regs) -> ldr.q (false dep. ofs = 0 -> 4th reg) | 2.34 | 0.33 |
st1.b (multi; 2 regs) -> ldr.q (false dep. ofs = 0 -> 2nd reg; unaligned) | 1.34 | 0.75 |
st1.b (multi; 3 regs) -> ldr.q (false dep. ofs = 0 -> 3rd reg; unaligned) | 2.69 | 0.30 |
st1.b (multi; 4 regs) -> ldr.q (false dep. ofs = 0 -> 4th reg; unaligned) | 2.49 | 0.37 |
st1.b (multi; 2 regs) -> ldr.q (false dep. ofs = 0 -> 2nd reg; unaligned; cross-cache) | 5.16 | 0.15 |
st1.b (multi; 3 regs) -> ldr.q (false dep. ofs = 0 -> 3rd reg; unaligned; cross-cache) | 3.34 | 0.32 |
st1.b (multi; 4 regs) -> ldr.q (false dep. ofs = 0 -> 4th reg; unaligned; cross-cache) | 8.66 | 0.09 |
st2.b (multi) -> ldr.q (ofs = 16) | 12.72 | 0.62 |
st3.b (multi) -> ldr.q (ofs = 32) | 13.11 | 0.49 |
st4.b (multi) -> ldr.q (ofs = 48) | 14.63 | 0.34 |
st2.h (multi) -> ldr.q (ofs = 16) | 12.72 | 0.62 |
st3.h (multi) -> ldr.q (ofs = 32) | 13.23 | 0.49 |
st4.h (multi) -> ldr.q (ofs = 48) | 14.59 | 0.34 |
st2.s (multi) -> ldr.q (ofs = 16) | 12.75 | 0.62 |
st3.s (multi) -> ldr.q (ofs = 32) | 13.26 | 0.49 |
st4.s (multi) -> ldr.q (ofs = 48) | 14.54 | 0.34 |
st2.d (multi) -> ldr.q (ofs = 16) | 12.81 | 0.62 |
st3.d (multi) -> ldr.q (ofs = 32) | 13.08 | 0.49 |
st4.d (multi) -> ldr.q (ofs = 48) | 14.57 | 0.34 |
instruction | latency | throughput |
---|---|---|
movi.b (0x00) | - | 8.01 |
movi.h (0x00) | - | 8.00 |
movi.h (0x00<<8) | - | 8.00 |
movi.s (0x00) | - | 8.00 |
movi.s (0x00<<8) | - | 8.00 |
movi.b (0xff) | - | 4.00 |
movi.h (0xff) | - | 4.00 |
movi.h (0xff<<8) | - | 4.01 |
movi.s (0xff) | - | 4.01 |
movi.s (0xff<<8) | - | 4.00 |
mvni.h (0x00) | - | 4.00 |
mvni.s (0x00) | - | 4.00 |
mvni.h (0x11) | - | 4.00 |
mvni.s (0x11) | - | 4.00 |
mvni.h (0x11<<8) | - | 4.00 |
mvni.s (0x11<<8) | - | 4.01 |
mov.s (v.s[0] <-> w) | 12.00 | - |
mov.d (v.d[0] <-> x) | 12.00 | - |
mov.s (v.s[3] <-> w) | 12.00 | - |
mov.d (v.d[1] <-> x) | 11.99 | - |
smov.b (v.b[0] <-> x) | 12.00 | - |
smov.h (v.h[0] <-> x) | 12.00 | - |
smov.s (v.s[0] <-> x) | 12.00 | - |
umov.b (v.b[0] <-> w) | 11.99 | - |
umov.h (v.h[0] <-> w) | 12.00 | - |
umov.s (v.s[0] <-> w) | 12.00 | - |
umov.d (v.d[0] <-> x) | 12.00 | - |
fmov.d (v.d[0] <-> x) | 12.00 | - |
dup.b (vec; lane = 0) | 2.00 | 4.01 |
dup.b (vec; lane = 15) | 2.00 | 4.01 |
dup.h (vec; lane = 7) | 2.00 | 4.00 |
dup.s (vec; lane = 3) | 2.00 | 4.00 |
dup.d (vec; lane = 1) | 2.00 | 4.00 |
dup.b (elem) | 6.00 | 3.00 |
dup.h (elem) | 6.00 | 3.00 |
dup.s (elem) | 6.00 | 3.00 |
dup.d (elem) | 6.00 | 3.00 |
xtn.h | 2.00 | 4.01 |
xtn.s | 2.00 | 4.00 |
xtn.d | 2.00 | 4.00 |
sqxtn.h (scl) | 3.00 | 4.00 |
sqxtn.s (scl) | 3.00 | 4.00 |
sqxtn.d (scl) | 3.00 | 4.00 |
sqxtn.h (vec) | 3.00 | 4.00 |
sqxtn.s (vec) | 3.00 | 4.00 |
sqxtn.d (vec) | 3.00 | 4.00 |
sqxtn2.h (vec) | 3.00 | 4.00 |
sqxtn2.s (vec) | 3.00 | 4.00 |
sqxtn2.d (vec) | 3.00 | 4.01 |
uqxtn.h (scl) | 3.00 | 4.00 |
uqxtn.s (scl) | 3.00 | 4.00 |
uqxtn.d (scl) | 3.00 | 4.01 |
uqxtn.h (vec) | 3.00 | 4.00 |
uqxtn.s (vec) | 3.00 | 4.00 |
uqxtn.d (vec) | 3.00 | 4.00 |
uqxtn2.h (vec) | 3.00 | 4.00 |
uqxtn2.s (vec) | 3.00 | 4.00 |
uqxtn2.d (vec) | 3.00 | 4.00 |
sqxtun.h (scl) | 3.00 | 4.00 |
sqxtun.s (scl) | 3.00 | 4.01 |
sqxtun.d (scl) | 3.00 | 4.00 |
sqxtun.h (vec) | 3.00 | 4.00 |
sqxtun.s (vec) | 3.00 | 4.00 |
sqxtun.d (vec) | 3.00 | 4.00 |
sqxtun2.h (vec) | 3.00 | 4.01 |
sqxtun2.s (vec) | 3.00 | 4.00 |
sqxtun2.d (vec) | 3.00 | 4.00 |
sxtl.b (vec) | 2.00 | 4.00 |
sxtl.h (vec) | 2.00 | 4.00 |
sxtl.s (vec) | 2.00 | 4.00 |
sxtl2.b (vec) | 2.00 | 4.00 |
sxtl2.h (vec) | 2.00 | 4.00 |
sxtl2.s (vec) | 2.00 | 4.00 |
uxtl.b (vec) | 2.00 | 4.00 |
uxtl.h (vec) | 2.00 | 4.00 |
uxtl.s (vec) | 2.00 | 4.00 |
uxtl2.b (vec) | 2.00 | 4.00 |
uxtl2.h (vec) | 2.00 | 4.00 |
uxtl2.s (vec) | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
add.b | 2.00 | 4.00 |
add.h | 2.00 | 4.00 |
add.s | 2.00 | 4.00 |
add.d | 2.00 | 4.00 |
sqadd.b | 3.00 | 4.00 |
sqadd.h | 3.00 | 4.00 |
sqadd.s | 3.00 | 4.00 |
sqadd.d | 3.00 | 4.00 |
uqadd.b | 3.00 | 4.00 |
uqadd.h | 3.00 | 4.01 |
uqadd.s | 3.00 | 4.00 |
uqadd.d | 3.00 | 4.00 |
suqadd.b (scl) | 3.00 | 4.00 |
suqadd.h (scl) | 3.00 | 4.00 |
suqadd.s (scl) | 3.00 | 4.00 |
suqadd.d (scl) | 3.00 | 4.01 |
suqadd.b (vec) | 3.00 | 4.00 |
suqadd.h (vec) | 3.00 | 4.01 |
suqadd.s (vec) | 3.00 | 4.01 |
suqadd.d (vec) | 3.00 | 4.00 |
usqadd.b (scl) | 3.00 | 4.01 |
usqadd.h (scl) | 3.00 | 4.00 |
usqadd.s (scl) | 3.00 | 4.00 |
usqadd.d (scl) | 3.00 | 4.00 |
usqadd.b (vec) | 3.00 | 4.00 |
usqadd.h (vec) | 3.00 | 4.00 |
usqadd.s (vec) | 3.00 | 4.01 |
usqadd.d (vec) | 3.00 | 4.00 |
sub.b | 2.00 | 4.01 |
sub.h | 2.00 | 4.00 |
sub.s | 2.00 | 4.00 |
sub.d | 2.00 | 4.01 |
sqsub.b | 3.00 | 4.00 |
sqsub.h | 3.00 | 4.01 |
sqsub.s | 3.00 | 4.00 |
sqsub.d | 3.00 | 4.00 |
uqsub.b | 3.00 | 4.00 |
uqsub.h | 3.00 | 4.00 |
uqsub.s | 3.00 | 4.01 |
uqsub.d | 3.00 | 4.01 |
abs.b | 3.00 | 4.00 |
abs.h | 3.00 | 4.00 |
abs.s | 3.00 | 4.00 |
abs.d | 3.00 | 4.00 |
sqabs.b | 3.00 | 4.00 |
sqabs.h | 3.00 | 4.00 |
sqabs.s | 3.00 | 4.01 |
sqabs.d | 3.00 | 4.00 |
neg.b | 2.00 | 4.01 |
neg.h | 2.00 | 4.00 |
neg.s | 2.00 | 4.00 |
neg.d | 2.00 | 4.01 |
sqneg.b | 3.00 | 4.00 |
sqneg.h | 3.00 | 4.00 |
sqneg.s | 3.00 | 4.00 |
sqneg.d | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
saddl.b | 2.00 | 4.00 |
saddl.h | 2.00 | 4.00 |
saddl.s | 2.00 | 4.00 |
saddl2.b | 2.00 | 4.00 |
saddl2.h | 2.00 | 4.00 |
saddl2.s | 2.00 | 4.00 |
ssubl.b | 2.00 | 4.00 |
ssubl.h | 2.00 | 4.00 |
ssubl.s | 2.00 | 4.00 |
ssubl2.b | 2.00 | 4.00 |
ssubl2.h | 2.00 | 4.00 |
ssubl2.s | 2.00 | 4.00 |
usubl.b | 2.00 | 4.00 |
usubl.h | 2.00 | 4.00 |
usubl.s | 2.00 | 4.00 |
usubl2.b | 2.00 | 4.00 |
usubl2.h | 2.00 | 4.00 |
usubl2.s | 2.00 | 4.00 |
saddlp.b | 2.00 | 4.00 |
saddlp.h | 2.00 | 4.00 |
saddlp.s | 2.00 | 4.00 |
saddlv.b | 3.00 | 4.00 |
saddlv.h | 3.00 | 4.01 |
saddlv.s | 3.00 | 4.00 |
saddw.h | 2.00 | 4.00 |
saddw.s | 2.00 | 4.01 |
saddw.d | 2.00 | 4.00 |
saddw2.h | 2.00 | 4.01 |
saddw2.s | 2.00 | 4.00 |
saddw2.d | 2.00 | 4.00 |
uaddw.h | 2.00 | 4.00 |
uaddw.s | 2.00 | 4.00 |
uaddw.d | 2.00 | 4.00 |
uaddw2.h | 2.00 | 4.00 |
uaddw2.s | 2.00 | 4.00 |
uaddw2.d | 2.00 | 4.00 |
ssubw.h | 2.00 | 4.00 |
ssubw.s | 2.00 | 4.01 |
ssubw.d | 2.00 | 4.00 |
ssubw2.h | 2.00 | 4.00 |
ssubw2.s | 2.00 | 4.00 |
ssubw2.d | 2.00 | 4.00 |
usubw.h | 2.00 | 4.00 |
usubw.s | 2.00 | 4.00 |
usubw.d | 2.00 | 4.00 |
usubw2.h | 2.00 | 4.00 |
usubw2.s | 2.00 | 4.01 |
usubw2.d | 2.00 | 4.00 |
addhn.h | 3.00 | 4.01 |
addhn.s | 3.00 | 4.01 |
addhn.d | 3.00 | 4.00 |
addhn2.h | 3.00 | 4.00 |
addhn2.s | 3.00 | 4.00 |
addhn2.d | 3.00 | 4.01 |
subhn.h | 3.00 | 4.00 |
subhn.s | 3.00 | 4.00 |
subhn.d | 3.00 | 4.00 |
subhn2.h | 3.00 | 4.00 |
subhn2.s | 3.00 | 4.00 |
subhn2.d | 3.00 | 4.00 |
raddhn.h | 3.00 | 4.00 |
raddhn.s | 3.00 | 4.00 |
raddhn.d | 3.00 | 4.00 |
raddhn2.h | 3.00 | 4.00 |
raddhn2.s | 3.00 | 4.00 |
raddhn2.d | 3.00 | 4.00 |
rsubhn.h | 3.00 | 4.00 |
rsubhn.s | 3.00 | 4.01 |
rsubhn.d | 3.00 | 4.00 |
rsubhn2.h | 3.00 | 4.00 |
rsubhn2.s | 3.00 | 4.00 |
rsubhn2.d | 3.00 | 4.00 |
shadd.b | 2.00 | 4.00 |
shadd.h | 2.00 | 4.00 |
shadd.s | 2.00 | 4.00 |
shsub.b | 2.00 | 4.00 |
shsub.h | 2.00 | 4.00 |
shsub.s | 2.00 | 4.00 |
uhadd.b | 2.00 | 4.01 |
uhadd.h | 2.00 | 4.00 |
uhadd.s | 2.00 | 4.00 |
uhsub.b | 2.00 | 4.00 |
uhsub.h | 2.00 | 4.00 |
uhsub.s | 2.00 | 4.00 |
srhadd.b | 2.00 | 4.00 |
srhadd.h | 2.00 | 4.00 |
srhadd.s | 2.00 | 4.00 |
urhadd.b | 2.00 | 4.00 |
urhadd.h | 2.00 | 4.00 |
urhadd.s | 2.00 | 4.00 |
addp.b | 2.00 | 4.00 |
addp.h | 2.00 | 4.00 |
addp.s | 2.00 | 4.00 |
addp.d | 2.00 | 4.00 |
addv.b | 3.00 | 4.00 |
addv.h | 3.00 | 4.00 |
addv.s | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
smax.b | 2.00 | 4.00 |
smax.h | 2.00 | 4.00 |
smax.s | 2.00 | 4.00 |
smin.b | 2.00 | 4.00 |
smin.h | 2.00 | 4.00 |
smin.s | 2.00 | 4.01 |
smaxp.b | 2.00 | 4.00 |
smaxp.h | 2.00 | 4.00 |
smaxp.s | 2.00 | 4.00 |
sminp.b | 2.00 | 4.00 |
sminp.h | 2.00 | 4.00 |
sminp.s | 2.00 | 4.00 |
smaxv.b | 3.00 | 4.00 |
smaxv.h | 3.00 | 4.01 |
smaxv.s | 3.00 | 4.01 |
sminv.b | 3.00 | 4.00 |
sminv.h | 3.00 | 4.00 |
sminv.s | 3.00 | 4.00 |
umax.b | 2.00 | 4.00 |
umax.h | 2.00 | 4.00 |
umax.s | 2.00 | 4.00 |
umin.b | 2.00 | 4.00 |
umin.h | 2.00 | 4.00 |
umin.s | 2.00 | 4.00 |
umaxp.b | 2.00 | 4.00 |
umaxp.h | 2.00 | 4.00 |
umaxp.s | 2.00 | 4.00 |
uminp.b | 2.00 | 4.01 |
uminp.h | 2.00 | 4.00 |
uminp.s | 2.00 | 4.00 |
umaxv.b | 3.00 | 4.00 |
umaxv.h | 3.00 | 4.00 |
umaxv.s | 3.00 | 4.00 |
uminv.b | 3.00 | 4.00 |
uminv.h | 3.00 | 4.00 |
uminv.s | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
sabd.b | 3.00 | 4.00 |
sabd.h | 3.00 | 4.00 |
sabd.s | 3.00 | 4.00 |
uabd.b | 3.00 | 4.00 |
uabd.h | 3.00 | 4.00 |
uabd.s | 3.00 | 4.00 |
sabdl.b | 3.00 | 4.00 |
sabdl.h | 3.00 | 4.00 |
sabdl.s | 3.00 | 4.00 |
sabdl2.b | 3.00 | 4.01 |
sabdl2.h | 3.00 | 4.00 |
sabdl2.s | 3.00 | 4.00 |
uabdl.b | 3.00 | 4.01 |
uabdl.h | 3.00 | 4.00 |
uabdl.s | 3.00 | 4.00 |
uabdl2.b | 3.00 | 4.00 |
uabdl2.h | 3.00 | 4.00 |
uabdl2.s | 3.00 | 4.01 |
instruction | latency | throughput |
---|---|---|
pmul.b | 3.00 | 4.00 |
pmull.b | 3.00 | 4.00 |
pmull.d | 3.00 | 4.00 |
pmull2.d | 6.00 | 2.00 |
pmull2.d | 5.00 | 2.00 |
mul.b (vec) | 3.00 | 4.01 |
mul.h (vec) | 3.00 | 4.00 |
mul.s (vec) | 3.00 | 4.00 |
mul.h (elem; [0]) | 3.00 | 4.00 |
mul.h (elem; [7]) | 3.00 | 4.00 |
mul.s (elem; [0]) | 3.00 | 4.00 |
mul.s (elem; [3]) | 3.00 | 4.01 |
smull.b (vec) | 3.00 | 4.00 |
smull.h (vec) | 3.00 | 4.01 |
smull.s (vec) | 3.00 | 4.00 |
smull2.b (vec) | 3.00 | 4.00 |
smull2.h (vec) | 3.00 | 4.00 |
smull2.s (vec) | 3.00 | 4.00 |
smull.h (elem; [0]) | 3.00 | 4.01 |
smull.h (elem; [7]) | 3.00 | 4.00 |
smull.s (elem; [0]) | 3.00 | 4.00 |
smull.s (elem; [3]) | 3.00 | 4.00 |
smull2.h (elem; [0]) | 3.00 | 4.01 |
smull2.h (elem; [7]) | 3.00 | 4.00 |
smull2.s (elem; [0]) | 3.00 | 4.00 |
smull2.s (elem; [3]) | 3.00 | 4.00 |
umull.b (vec) | 3.00 | 4.00 |
umull.h (vec) | 3.00 | 4.00 |
umull.s (vec) | 3.00 | 4.00 |
umull2.b (vec) | 3.00 | 4.00 |
umull2.h (vec) | 3.00 | 4.00 |
umull2.s (vec) | 3.00 | 4.00 |
umull.h (elem; [0]) | 3.00 | 4.00 |
umull.h (elem; [7]) | 3.00 | 4.00 |
umull.s (elem; [0]) | 3.00 | 4.00 |
umull.s (elem; [3]) | 3.00 | 4.00 |
umull2.h (elem; [0]) | 3.00 | 4.00 |
umull2.h (elem; [7]) | 3.00 | 4.01 |
umull2.s (elem; [0]) | 3.00 | 4.00 |
umull2.s (elem; [3]) | 3.00 | 4.00 |
sqdmull.h (vec) | 3.00 | 4.01 |
sqdmull.s (vec) | 3.00 | 4.00 |
sqdmull2.h (vec) | 3.00 | 4.00 |
sqdmull2.s (vec) | 3.00 | 4.00 |
sqdmull.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmull.h (elem; v.h[7]) | 3.01 | 4.00 |
sqdmull.s (elem; v.s[0]) | 3.00 | 4.00 |
sqdmull.s (elem; v.s[3]) | 3.00 | 4.00 |
sqdmull2.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmull2.h (elem; v.h[7]) | 3.00 | 4.00 |
sqdmull2.s (elem; v.s[0]) | 3.00 | 4.00 |
sqdmull2.s (elem; v.s[3]) | 3.00 | 4.00 |
sqdmulh.h (vec) | 3.00 | 4.00 |
sqdmulh.s (vec) | 3.00 | 4.00 |
sqdmulh.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmulh.h (elem; v.h[7]) | 3.00 | 4.00 |
sqdmulh.s (elem; v.s[0]) | 3.00 | 4.00 |
sqdmulh.s (elem; v.s[3]) | 3.00 | 4.00 |
sqrdmulh.h (vec) | 3.00 | 4.01 |
sqrdmulh.s (vec) | 3.00 | 4.00 |
sqrdmulh.h (elem; v.h[0]) | 3.00 | 4.00 |
sqrdmulh.h (elem; v.h[7]) | 3.00 | 4.00 |
sqrdmulh.s (elem; v.s[0]) | 3.00 | 4.00 |
sqrdmulh.s (elem; v.s[3]) | 3.00 | 4.00 |
pmul.b | 3.00 | 4.01 |
pmull.b | 3.00 | 4.00 |
pmull.d | 3.00 | 4.00 |
pmull2.b | 3.00 | 4.00 |
pmull2.d | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
mla.b (vec) | 3.00 | 4.00 |
mla.h (vec) | 3.00 | 4.00 |
mla.s (vec) | 3.00 | 4.00 |
mla.b (vec; acc. fwd.) | 3.00 | - |
mla.h (vec; acc. fwd.) | 3.00 | - |
mla.s (vec; acc. fwd.) | 3.00 | - |
mla.h (elem; [0]) | 3.00 | 4.00 |
mla.h (elem; [7]) | 3.00 | 4.00 |
mla.s (elem; [0]) | 3.00 | 4.00 |
mla.s (elem; [3]) | 3.00 | 4.00 |
smlal.h (vec) | 3.00 | 4.00 |
smlal.s (vec) | 3.00 | 4.01 |
smlal2.h (vec) | 3.00 | 4.00 |
smlal2.s (vec) | 3.00 | 4.00 |
smlal.h (vec; acc. fwd.) | 3.00 | - |
smlal.s (vec; acc. fwd.) | 3.00 | - |
smlal2.h (vec; acc. fwd.) | 3.00 | - |
smlal2.s (vec; acc. fwd.) | 3.00 | - |
smlal.h (elem; v.h[0]) | 3.00 | 4.00 |
smlal.h (elem; v.h[7]) | 3.00 | 4.00 |
smlal.s (elem; v.s[0]) | 3.00 | 4.00 |
smlal.s (elem; v.s[3]) | 3.00 | 4.01 |
smlal2.h (elem; v.h[0]) | 3.00 | 4.00 |
smlal2.h (elem; v.h[7]) | 3.00 | 4.00 |
smlal2.s (elem; v.s[0]) | 3.00 | 4.00 |
smlal2.s (elem; v.s[3]) | 3.00 | 4.00 |
umlal.h (vec) | 3.00 | 4.00 |
umlal.s (vec) | 3.00 | 4.00 |
umlal2.h (vec) | 3.00 | 4.01 |
umlal2.s (vec) | 3.00 | 4.00 |
umlal.h (vec; acc. fwd.) | 3.00 | - |
umlal.s (vec; acc. fwd.) | 3.00 | - |
umlal2.h (vec; acc. fwd.) | 3.00 | - |
umlal2.s (vec; acc. fwd.) | 3.00 | - |
umlal.h (elem; v.h[0]) | 3.00 | 4.00 |
umlal.h (elem; v.h[7]) | 3.00 | 4.00 |
umlal.s (elem; v.s[0]) | 3.00 | 4.00 |
umlal.s (elem; v.s[3]) | 3.00 | 4.01 |
umlal2.h (elem; v.h[0]) | 3.00 | 4.00 |
umlal2.h (elem; v.h[7]) | 3.00 | 4.00 |
umlal2.s (elem; v.s[0]) | 3.00 | 4.00 |
umlal2.s (elem; v.s[3]) | 3.00 | 4.00 |
sqdmlal.h (vec) | 3.00 | 4.00 |
sqdmlal.s (vec) | 3.00 | 4.00 |
sqdmlal2.h (vec) | 3.00 | 4.00 |
sqdmlal2.s (vec) | 3.00 | 4.00 |
sqdmlal.h (vec; acc. fwd.) | 3.00 | - |
sqdmlal.s (vec; acc. fwd.) | 3.00 | - |
sqdmlal2.h (vec; acc. fwd.) | 3.00 | - |
sqdmlal2.s (vec; acc. fwd.) | 3.00 | - |
sqdmlal.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmlal.h (elem; v.h[7]) | 3.00 | 4.00 |
sqdmlal.s (elem; v.s[0]) | 3.00 | 4.00 |
sqdmlal.s (elem; v.s[3]) | 3.00 | 4.00 |
sqdmlal2.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmlal2.h (elem; v.h[7]) | 3.00 | 4.00 |
sqdmlal2.s (elem; v.s[0]) | 3.00 | 4.00 |
sqdmlal2.s (elem; v.s[3]) | 3.00 | 4.00 |
sqrdmlah.h (vec) | 3.00 | 4.00 |
sqrdmlah.s (vec) | 3.00 | 4.00 |
sqrdmlah.h (vec; acc. fwd.) | 3.00 | - |
sqrdmlah.s (vec; acc. fwd.) | 3.00 | - |
sqrdmlah.h (elem; v.h[0]) | 3.00 | 4.00 |
sqrdmlah.h (elem; v.h[7]) | 3.00 | 4.00 |
sqrdmlah.s (elem; v.s[0]) | 3.00 | 4.00 |
sqrdmlah.s (elem; v.s[3]) | 3.00 | 4.00 |
mls.b (vec) | 3.00 | 4.00 |
mls.h (vec) | 3.00 | 4.00 |
mls.s (vec) | 3.00 | 4.01 |
mls.b (vec; acc. fwd.) | 3.00 | - |
mls.h (vec; acc. fwd.) | 3.00 | - |
mls.s (vec; acc. fwd.) | 3.00 | - |
mls.h (elem; [0]) | 3.00 | 4.00 |
mls.h (elem; [7]) | 3.00 | 4.00 |
mls.s (elem; [0]) | 3.00 | 4.00 |
mls.s (elem; [3]) | 3.00 | 4.01 |
smlsl.h (vec) | 3.00 | 4.01 |
smlsl.s (vec) | 3.00 | 4.00 |
smlsl2.h (vec) | 3.00 | 4.00 |
smlsl2.s (vec) | 3.00 | 4.00 |
smlsl.h (vec; acc. fwd.) | 3.00 | - |
smlsl.s (vec; acc. fwd.) | 3.00 | - |
smlsl2.h (vec; acc. fwd.) | 3.00 | - |
smlsl2.s (vec; acc. fwd.) | 3.00 | - |
smlsl.h (elem; v.h[0]) | 3.00 | 4.00 |
smlsl.h (elem; v.h[7]) | 3.00 | 4.00 |
smlsl.s (elem; v.s[0]) | 3.00 | 4.00 |
smlsl.s (elem; v.s[3]) | 3.00 | 4.00 |
smlsl2.h (elem; v.h[0]) | 3.00 | 4.01 |
smlsl2.h (elem; v.h[7]) | 3.00 | 4.00 |
smlsl2.s (elem; v.s[0]) | 3.00 | 4.00 |
smlsl2.s (elem; v.s[3]) | 3.00 | 4.00 |
umlsl.h (vec) | 3.00 | 4.00 |
umlsl.s (vec) | 3.00 | 4.00 |
umlsl2.h (vec) | 3.00 | 4.01 |
umlsl2.s (vec) | 3.00 | 4.00 |
umlsl.h (vec; acc. fwd.) | 3.00 | - |
umlsl.s (vec; acc. fwd.) | 3.00 | - |
umlsl2.h (vec; acc. fwd.) | 3.00 | - |
umlsl2.s (vec; acc. fwd.) | 3.00 | - |
umlsl.h (elem; v.h[0]) | 3.00 | 4.00 |
umlsl.h (elem; v.h[7]) | 3.00 | 4.01 |
umlsl.s (elem; v.s[0]) | 3.00 | 4.00 |
umlsl.s (elem; v.s[3]) | 3.00 | 4.00 |
umlsl2.h (elem; v.h[0]) | 3.00 | 4.00 |
umlsl2.h (elem; v.h[7]) | 3.00 | 4.00 |
umlsl2.s (elem; v.s[0]) | 3.00 | 4.00 |
umlsl2.s (elem; v.s[3]) | 3.00 | 4.00 |
sqdmlsl.h (vec) | 3.00 | 4.01 |
sqdmlsl.s (vec) | 3.00 | 4.00 |
sqdmlsl2.h (vec) | 3.00 | 4.00 |
sqdmlsl2.s (vec) | 3.00 | 4.00 |
sqdmlsl.h (vec; acc. fwd.) | 3.00 | - |
sqdmlsl.s (vec; acc. fwd.) | 3.00 | - |
sqdmlsl2.h (vec; acc. fwd.) | 3.00 | - |
sqdmlsl2.s (vec; acc. fwd.) | 3.00 | - |
sqdmlsl.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmlsl.h (elem; v.h[7]) | 3.00 | 4.00 |
sqdmlsl.s (elem; v.s[0]) | 3.00 | 4.00 |
sqdmlsl.s (elem; v.s[3]) | 3.00 | 4.00 |
sqdmlsl2.h (elem; v.h[0]) | 3.00 | 4.00 |
sqdmlsl2.h (elem; v.h[7]) | 3.00 | 4.01 |
sqdmlsl2.s (elem; v.s[0]) | 3.00 | 4.01 |
sqdmlsl2.s (elem; v.s[3]) | 3.00 | 4.00 |
sqrdmlsh.h (vec) | 3.00 | 4.01 |
sqrdmlsh.s (vec) | 3.00 | 4.01 |
sqrdmlsh.h (vec; acc. fwd.) | 3.00 | - |
sqrdmlsh.s (vec; acc. fwd.) | 3.00 | - |
sqrdmlsh.h (elem; v.h[0]) | 3.00 | 4.00 |
sqrdmlsh.h (elem; v.h[7]) | 3.00 | 4.00 |
sqrdmlsh.s (elem; v.s[0]) | 3.00 | 4.00 |
sqrdmlsh.s (elem; v.s[3]) | 3.00 | 4.00 |
sdot.b (vec) | 3.00 | 4.00 |
sdot.b (elem; v.b[0]) | 3.00 | 4.00 |
sdot.b (elem; v.b[3]) | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
saba.b | 3.00 | 4.00 |
saba.h | 3.00 | 4.00 |
saba.s | 3.00 | 4.01 |
saba.b (acc. fwd.) | 3.00 | - |
saba.h (acc. fwd.) | 3.00 | - |
saba.s (acc. fwd.) | 3.00 | - |
uaba.b | 3.00 | 4.00 |
uaba.h | 3.00 | 4.00 |
uaba.s | 3.00 | 4.00 |
uaba.b (acc. fwd.) | 3.00 | - |
uaba.h (acc. fwd.) | 3.00 | - |
uaba.s (acc. fwd.) | 3.00 | - |
sabal.b | 3.00 | 4.00 |
sabal.h | 3.00 | 4.00 |
sabal.s | 3.00 | 4.00 |
sabal2.b | 3.00 | 4.00 |
sabal2.h | 3.00 | 4.00 |
sabal2.s | 3.00 | 4.00 |
uabal.b | 3.00 | 4.01 |
uabal.h | 3.00 | 4.01 |
uabal.s | 3.00 | 4.00 |
uabal2.b | 3.00 | 4.01 |
uabal2.h | 3.00 | 4.00 |
uabal2.s | 3.00 | 4.01 |
sadalp.b | 3.00 | 4.00 |
sadalp.h | 3.00 | 4.00 |
sadalp.s | 3.00 | 4.00 |
uadalp.b | 3.00 | 4.00 |
uadalp.h | 3.00 | 4.00 |
uadalp.s | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
urecpe.s | 3.00 | 1.00 |
ursqrte.s | 3.00 | 1.00 |
instruction | latency | throughput |
---|---|---|
and.b | 2.00 | 4.01 |
orr.b | 2.00 | 4.00 |
orr.h | 2.00 | 4.00 |
orr.s | 2.00 | 4.00 |
orr.h | 2.00 | 4.00 |
orr.s | 2.00 | 4.00 |
orn.b | 2.00 | 4.00 |
eor.b | 2.00 | 4.00 |
eor3.b | 2.00 | 4.00 |
not.b | 2.00 | 4.00 |
mvn.b | 2.00 | 4.01 |
instruction | latency | throughput |
---|---|---|
bic.b (reg) | 2.00 | 4.00 |
bic.h (imm) | 2.00 | 4.00 |
bic.h (imm; <<8) | 2.00 | 4.00 |
bic.s (imm) | 2.00 | 4.00 |
bic.s (imm; <<8) | 2.00 | 4.00 |
bif.b | 2.00 | 4.00 |
bit.b | 2.00 | 4.00 |
bsl.b | 2.00 | 4.01 |
bcax.b | 2.00 | 4.00 |
rax1.d | 2.00 | 4.01 |
xar.d | 2.00 | 4.00 |
rbit.b | 2.00 | 4.01 |
rev16.b | 2.00 | 4.01 |
rev32.b | 2.00 | 4.00 |
rev32.h | 2.00 | 4.01 |
rev64.b | 2.00 | 4.00 |
rev64.h | 2.00 | 4.00 |
rev64.s | 2.00 | 4.00 |
cls.b | 2.00 | 4.00 |
cls.h | 2.00 | 4.00 |
cls.s | 2.00 | 4.01 |
clz.b | 2.00 | 4.00 |
clz.h | 2.00 | 4.00 |
clz.s | 2.00 | 4.00 |
cnt.b | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
shl.b (imm; <<2) | 2.00 | 4.01 |
shl.h (imm; <<2) | 2.00 | 4.00 |
shl.s (imm; <<2) | 2.00 | 4.00 |
shl.d (imm; <<2) | 2.00 | 4.00 |
sshl.b (reg) | 2.00 | 4.00 |
sshl.h (reg) | 2.00 | 4.00 |
sshl.s (reg) | 2.00 | 4.00 |
sshl.d (reg) | 2.00 | 4.00 |
ushl.b (reg) | 2.00 | 4.00 |
ushl.h (reg) | 2.00 | 4.00 |
ushl.s (reg) | 2.00 | 4.00 |
ushl.d (reg) | 2.00 | 4.00 |
sqshl.b (imm; <<2) | 2.00 | 4.00 |
sqshl.h (imm; <<2) | 2.00 | 4.00 |
sqshl.s (imm; <<2) | 2.00 | 4.00 |
sqshl.d (imm; <<2) | 2.00 | 4.00 |
sqshl.b (reg; <<2) | 2.00 | 4.00 |
sqshl.h (reg; <<2) | 2.00 | 4.00 |
sqshl.s (reg; <<2) | 2.00 | 4.00 |
sqshl.d (reg; <<2) | 2.00 | 4.00 |
uqshl.b (imm; <<2) | 2.00 | 4.00 |
uqshl.h (imm; <<2) | 2.00 | 4.00 |
uqshl.s (imm; <<2) | 2.00 | 4.00 |
uqshl.d (imm; <<2) | 2.00 | 4.00 |
uqshl.b (reg; <<2) | 2.00 | 4.00 |
uqshl.h (reg; <<2) | 2.00 | 4.00 |
uqshl.s (reg; <<2) | 2.00 | 4.01 |
uqshl.d (reg; <<2) | 2.00 | 4.00 |
sqshlu.b (<<2) | 2.00 | 4.00 |
sqshlu.h (<<2) | 2.00 | 4.00 |
sqshlu.s (<<2) | 2.00 | 4.00 |
sqshlu.d (<<2) | 2.00 | 4.00 |
srshl.b (reg) | 3.00 | 4.00 |
srshl.h (reg) | 3.00 | 4.00 |
srshl.s (reg) | 3.00 | 4.00 |
srshl.d (reg) | 3.00 | 4.00 |
urshl.b (reg) | 3.00 | 4.00 |
urshl.h (reg) | 3.00 | 4.00 |
urshl.s (reg) | 3.00 | 4.00 |
urshl.d (reg) | 3.00 | 4.01 |
uqrshl.b (reg) | 3.00 | 4.00 |
uqrshl.h (reg) | 3.00 | 4.00 |
uqrshl.s (reg) | 3.00 | 4.01 |
uqrshl.d (reg) | 3.00 | 4.00 |
sqshlu.b (imm; <<2) | 2.00 | 4.00 |
sqshlu.h (imm; <<2) | 2.00 | 4.01 |
sqshlu.s (imm; <<2) | 2.00 | 4.00 |
sqshlu.d (imm; <<2) | 2.00 | 4.01 |
sqrshl.b | 3.00 | 4.00 |
sqrshl.h | 3.00 | 4.00 |
sqrshl.s | 3.00 | 4.00 |
sqrshl.d | 3.00 | 4.01 |
shll.b (<<8) | 2.00 | 4.00 |
shll.h (<<16) | 2.00 | 4.01 |
shll.s (<<32) | 2.00 | 4.01 |
shll2.b (<<8) | 2.00 | 4.00 |
shll2.h (<<16) | 2.00 | 4.00 |
shll2.s (<<32) | 2.00 | 4.01 |
sshll.b (<<2) | 2.00 | 4.00 |
sshll.h (<<2) | 2.00 | 4.00 |
sshll.s (<<2) | 2.00 | 4.00 |
sshll2.b (<<2) | 2.00 | 4.00 |
sshll2.h (<<2) | 2.00 | 4.00 |
sshll2.s (<<2) | 2.00 | 4.00 |
ushll.b (<<2) | 2.00 | 4.00 |
ushll.h (<<2) | 2.00 | 4.01 |
ushll.s (<<2) | 2.00 | 4.01 |
ushll2.b (<<2) | 2.00 | 4.00 |
ushll2.h (<<2) | 2.00 | 4.00 |
ushll2.s (<<2) | 2.00 | 4.00 |
sli.b (vec; <<2) | 2.00 | 4.00 |
sli.h (vec; <<2) | 2.00 | 4.01 |
sli.s (vec; <<2) | 2.00 | 4.00 |
sli.d (vec; <<2) | 2.00 | 4.00 |
sshr.b (imm; >>2) | 2.00 | 4.00 |
sshr.h (imm; >>2) | 2.00 | 4.00 |
sshr.s (imm; >>2) | 2.00 | 4.00 |
sshr.d (imm; >>2) | 2.00 | 4.00 |
ushr.b (imm; >>2) | 2.00 | 4.00 |
ushr.h (imm; >>2) | 2.00 | 4.00 |
ushr.s (imm; >>2) | 2.00 | 4.00 |
ushr.d (imm; >>2) | 2.00 | 4.01 |
srshr.b (imm; >>2) | 3.00 | 4.00 |
srshr.h (imm; >>2) | 3.00 | 4.00 |
srshr.s (imm; >>2) | 3.00 | 4.00 |
srshr.d (imm; >>2) | 3.00 | 4.00 |
urshr.b (imm; >>2) | 3.00 | 4.00 |
urshr.h (imm; >>2) | 3.00 | 4.00 |
urshr.s (imm; >>2) | 3.00 | 4.00 |
urshr.d (imm; >>2) | 3.00 | 4.00 |
ssra.b (imm; >>2) | 3.00 | 4.00 |
ssra.h (imm; >>2) | 3.00 | 4.00 |
ssra.s (imm; >>2) | 3.00 | 4.01 |
ssra.d (imm; >>2) | 3.00 | 4.00 |
usra.b (imm; >>2) | 3.00 | 4.01 |
usra.h (imm; >>2) | 3.00 | 4.00 |
usra.s (imm; >>2) | 3.00 | 4.01 |
usra.d (imm; >>2) | 3.00 | 4.00 |
srsra.b (imm; >>2) | 3.00 | 4.00 |
srsra.h (imm; >>2) | 3.00 | 4.00 |
srsra.s (imm; >>2) | 3.00 | 4.00 |
srsra.d (imm; >>2) | 3.00 | 4.01 |
ursra.b (imm; >>2) | 3.00 | 4.00 |
ursra.h (imm; >>2) | 3.00 | 4.00 |
ursra.s (imm; >>2) | 3.00 | 4.00 |
ursra.d (imm; >>2) | 3.00 | 4.00 |
shrn.h (>>2) | 3.00 | 4.01 |
shrn.s (>>2) | 3.00 | 4.00 |
shrn.d (>>2) | 3.00 | 4.00 |
shrn2.h (>>2) | 3.00 | 4.00 |
shrn2.s (>>2) | 3.00 | 4.00 |
shrn2.d (>>2) | 3.00 | 4.00 |
sqshrn.h (>>2) | 3.00 | 4.00 |
sqshrn.s (>>2) | 3.00 | 4.00 |
sqshrn.d (>>2) | 3.00 | 4.00 |
sqshrn2.h (>>2) | 3.00 | 4.00 |
sqshrn2.s (>>2) | 3.00 | 4.00 |
sqshrn2.d (>>2) | 3.00 | 4.00 |
uqshrn.h (>>2) | 3.00 | 4.00 |
uqshrn.s (>>2) | 3.00 | 4.00 |
uqshrn.d (>>2) | 3.00 | 4.01 |
uqshrn2.h (>>2) | 3.00 | 4.00 |
uqshrn2.s (>>2) | 3.00 | 4.00 |
uqshrn2.d (>>2) | 3.00 | 4.00 |
sqshrun.h (>>2) | 3.00 | 4.00 |
sqshrun.s (>>2) | 3.00 | 4.00 |
sqshrun.d (>>2) | 3.00 | 4.00 |
sqshrun2.h (>>2) | 3.00 | 4.00 |
sqshrun2.s (>>2) | 3.00 | 4.01 |
sqshrun2.d (>>2) | 3.00 | 4.00 |
rshrn.h (>>2) | 3.00 | 4.00 |
rshrn.s (>>2) | 3.00 | 4.00 |
rshrn.d (>>2) | 3.00 | 4.00 |
rshrn2.h (>>2) | 3.00 | 4.00 |
rshrn2.s (>>2) | 3.00 | 4.00 |
rshrn2.d (>>2) | 3.00 | 4.01 |
sqrshrn.h (>>2) | 3.00 | 4.00 |
sqrshrn.s (>>2) | 3.00 | 4.00 |
sqrshrn.d (>>2) | 3.00 | 4.00 |
sqrshrn2.h (>>2) | 3.00 | 4.00 |
sqrshrn2.s (>>2) | 3.00 | 4.00 |
sqrshrn2.d (>>2) | 3.00 | 4.00 |
uqrshrn.h (>>2) | 3.00 | 4.00 |
uqrshrn.s (>>2) | 3.00 | 4.00 |
uqrshrn.d (>>2) | 3.00 | 4.00 |
uqrshrn2.h (>>2) | 3.00 | 4.00 |
uqrshrn2.s (>>2) | 3.00 | 4.00 |
uqrshrn2.d (>>2) | 3.00 | 4.00 |
sqrshrun.h (>>2) | 3.00 | 4.00 |
sqrshrun.s (>>2) | 3.00 | 4.00 |
sqrshrun.d (>>2) | 3.00 | 4.00 |
sqrshrun2.h (>>2) | 3.00 | 4.00 |
sqrshrun2.s (>>2) | 3.00 | 4.00 |
sqrshrun2.d (>>2) | 3.00 | 4.00 |
sri.b (vec; >>2) | 2.00 | 4.00 |
sri.h (vec; >>2) | 2.00 | 4.00 |
sri.s (vec; >>2) | 2.00 | 4.00 |
sri.d (vec; >>2) | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
cmeq.b (reg) | 2.00 | 4.00 |
cmeq.h (reg) | 2.00 | 4.00 |
cmeq.s (reg) | 2.00 | 4.00 |
cmeq.d (reg) | 2.00 | 4.00 |
cmeq.b (zero) | 2.00 | 4.00 |
cmeq.h (zero) | 2.00 | 4.00 |
cmeq.s (zero) | 2.00 | 4.00 |
cmeq.d (zero) | 2.00 | 4.00 |
cmge.b (reg) | 2.00 | 4.01 |
cmge.h (reg) | 2.00 | 4.01 |
cmge.s (reg) | 2.00 | 4.01 |
cmge.d (reg) | 2.00 | 4.00 |
cmge.b (zero) | 2.00 | 4.00 |
cmge.h (zero) | 2.00 | 4.00 |
cmge.s (zero) | 2.00 | 4.00 |
cmge.d (zero) | 2.00 | 4.00 |
cmgt.b (reg) | 2.00 | 4.00 |
cmgt.h (reg) | 2.00 | 4.00 |
cmgt.s (reg) | 2.00 | 4.00 |
cmgt.d (reg) | 2.00 | 4.00 |
cmgt.b (zero) | 2.00 | 4.00 |
cmgt.h (zero) | 2.00 | 4.00 |
cmgt.s (zero) | 2.00 | 4.00 |
cmgt.d (zero) | 2.00 | 4.00 |
cmle.b (zero) | 2.00 | 4.00 |
cmle.h (zero) | 2.00 | 4.01 |
cmle.s (zero) | 2.00 | 4.00 |
cmle.d (zero) | 2.00 | 4.01 |
cmlt.b (zero) | 2.00 | 4.00 |
cmlt.h (zero) | 2.00 | 4.00 |
cmlt.s (zero) | 2.00 | 4.00 |
cmlt.d (zero) | 2.00 | 4.00 |
cmhi.b (reg) | 2.00 | 4.01 |
cmhi.h (reg) | 2.00 | 4.00 |
cmhi.s (reg) | 2.00 | 4.00 |
cmhi.d (reg) | 2.00 | 4.00 |
cmhs.b (reg) | 2.00 | 4.00 |
cmhs.h (reg) | 2.00 | 4.00 |
cmhs.s (reg) | 2.00 | 4.00 |
cmhs.d (reg) | 2.00 | 4.01 |
cmtst.b (reg) | 2.00 | 4.00 |
cmtst.h (reg) | 2.00 | 4.00 |
cmtst.s (reg) | 2.00 | 4.01 |
cmtst.d (reg) | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
ext.b (>>1) | 2.00 | 4.00 |
ext.b (>>15) | 2.00 | 4.00 |
tbl (len == 1) | 2.00 | 4.00 |
tbl (len == 2) | 2.00 | 4.00 |
tbl (len == 3) | 4.00 | 2.00 |
tbl (len == 4) | 4.00 | 1.33 |
tbx (len == 1) | 2.00 | 4.00 |
tbx (len == 2) | 4.00 | 2.00 |
tbx (len == 3) | 6.00 | 1.33 |
tbx (len == 4) | 8.00 | 0.98 |
trn1.b | 2.00 | 4.00 |
trn2.b | 2.00 | 4.00 |
trn1.h | 2.00 | 4.00 |
trn2.h | 2.00 | 4.00 |
trn1.s | 2.00 | 4.00 |
trn2.s | 2.00 | 4.01 |
trn1.d | 2.00 | 4.00 |
trn2.d | 2.00 | 4.00 |
zip1.b | 2.00 | 4.00 |
zip2.b | 2.00 | 4.00 |
zip1.h | 2.00 | 4.00 |
zip2.h | 2.00 | 4.01 |
zip1.s | 2.00 | 4.01 |
zip2.s | 2.00 | 4.00 |
zip1.d | 2.00 | 4.01 |
zip2.d | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
fadd.h (scl) | 3.00 | 4.00 |
fadd.s (scl) | 3.00 | 4.00 |
fadd.d (scl) | 3.00 | 4.00 |
fadd.h (vec) | 3.00 | 4.00 |
fadd.s (vec) | 3.00 | 4.00 |
fadd.d (vec) | 3.00 | 4.00 |
faddp.h (scl) | 3.00 | 4.00 |
faddp.s (scl) | 3.00 | 4.00 |
faddp.d (scl) | 3.00 | 4.00 |
faddp.h (vec) | 3.00 | 4.00 |
faddp.s (vec) | 3.00 | 4.00 |
faddp.d (vec) | 3.00 | 4.01 |
fsub.h (scl) | 3.00 | 4.00 |
fsub.s (scl) | 3.00 | 4.00 |
fsub.d (scl) | 3.00 | 4.00 |
fsub.h (vec) | 3.00 | 4.00 |
fsub.s (vec) | 3.00 | 4.00 |
fsub.d (vec) | 3.00 | 4.00 |
fcadd.h (deg = 90) | 3.00 | 4.00 |
fcadd.s (deg = 90) | 3.00 | 4.01 |
fcadd.d (deg = 90) | 3.00 | 4.00 |
fabs.h (scl) | 2.00 | 4.00 |
fabs.s (scl) | 2.00 | 4.01 |
fabs.d (scl) | 2.00 | 4.00 |
fabs.h (vec) | 2.00 | 4.00 |
fabs.s (vec) | 2.00 | 4.00 |
fabs.d (vec) | 2.00 | 4.00 |
fabd.h (scl) | 3.00 | 4.00 |
fabd.s (scl) | 3.00 | 4.00 |
fabd.d (scl) | 3.00 | 4.00 |
fabd.h (vec) | 3.00 | 4.00 |
fabd.s (vec) | 3.00 | 4.00 |
fabd.d (vec) | 3.00 | 4.00 |
fneg.h (scl) | 2.00 | 4.00 |
fneg.s (scl) | 2.00 | 4.00 |
fneg.d (scl) | 2.00 | 4.00 |
fneg.h (vec) | 2.00 | 4.00 |
fneg.s (vec) | 2.00 | 4.00 |
fneg.d (vec) | 2.00 | 4.01 |
instruction | latency | throughput |
---|---|---|
fmax.h (scl) | 2.00 | 4.01 |
fmax.s (scl) | 2.00 | 4.00 |
fmax.d (scl) | 2.00 | 4.00 |
fmax.h (vec) | 2.00 | 4.00 |
fmax.s (vec) | 2.00 | 4.00 |
fmax.d (vec) | 2.00 | 4.00 |
fmaxp.h (scl) | 2.00 | 4.00 |
fmaxp.s (scl) | 2.00 | 4.00 |
fmaxp.d (scl) | 2.00 | 4.00 |
fmaxp.h (vec) | 2.00 | 4.00 |
fmaxp.s (vec) | 2.00 | 4.00 |
fmaxp.d (vec) | 2.00 | 4.00 |
fmaxv.h | 3.00 | 4.01 |
fmaxv.s | 3.00 | 4.01 |
fmaxnm.h (scl) | 2.00 | 4.00 |
fmaxnm.s (scl) | 2.00 | 4.00 |
fmaxnm.d (scl) | 2.00 | 4.01 |
fmaxnm.h (vec) | 2.00 | 4.00 |
fmaxnm.s (vec) | 2.00 | 4.00 |
fmaxnm.d (vec) | 2.00 | 4.00 |
fmaxnmp.h (scl) | 2.00 | 4.00 |
fmaxnmp.s (scl) | 2.00 | 4.01 |
fmaxnmp.d (scl) | 2.00 | 4.00 |
fmaxnmp.h (vec) | 2.00 | 4.00 |
fmaxnmp.s (vec) | 2.00 | 4.00 |
fmaxnmp.d (vec) | 2.00 | 4.00 |
fmaxnmv.h | 3.00 | 4.00 |
fmaxnmv.s | 3.00 | 4.00 |
fmin.h (scl) | 2.00 | 4.00 |
fmin.s (scl) | 2.00 | 4.00 |
fmin.d (scl) | 2.00 | 4.00 |
fmin.h (vec) | 2.00 | 4.00 |
fmin.s (vec) | 2.00 | 4.00 |
fmin.d (vec) | 2.00 | 4.00 |
fminp.h (scl) | 2.00 | 4.00 |
fminp.s (scl) | 2.00 | 4.00 |
fminp.d (scl) | 2.00 | 4.00 |
fminp.h (vec) | 2.00 | 4.00 |
fminp.s (vec) | 2.00 | 4.00 |
fminp.d (vec) | 2.00 | 4.00 |
fminv.h | 3.00 | 4.00 |
fminv.s | 3.00 | 4.00 |
fminnm.h (scl) | 2.00 | 4.01 |
fminnm.s (scl) | 2.00 | 4.00 |
fminnm.d (scl) | 2.00 | 4.01 |
fminnm.h (vec) | 2.00 | 4.00 |
fminnm.s (vec) | 2.00 | 4.01 |
fminnm.d (vec) | 2.00 | 4.00 |
fminnmp.h (scl) | 2.00 | 4.00 |
fminnmp.s (scl) | 2.00 | 4.00 |
fminnmp.d (scl) | 2.00 | 4.00 |
fminnmp.h (vec) | 2.00 | 4.00 |
fminnmp.s (vec) | 2.00 | 4.00 |
fminnmp.d (vec) | 2.00 | 4.00 |
fminnmv.h | 3.00 | 4.01 |
fminnmv.s | 3.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
fmul.h (scl) | 4.00 | 4.00 |
fmul.s (scl) | 4.00 | 4.00 |
fmul.d (scl) | 4.00 | 4.00 |
fmul.h (vec) | 4.00 | 4.01 |
fmul.s (vec) | 4.00 | 4.00 |
fmul.d (vec) | 4.00 | 4.00 |
fmulx.h (scl) | 4.00 | 4.00 |
fmulx.s (scl) | 4.00 | 4.01 |
fmulx.d (scl) | 4.00 | 4.00 |
fmulx.h (vec) | 4.00 | 4.00 |
fmulx.s (vec) | 4.00 | 4.00 |
fmulx.d (vec) | 4.00 | 4.01 |
fnmul.h (scl) | 4.00 | 4.00 |
fnmul.s (scl) | 4.00 | 4.00 |
fnmul.d (scl) | 4.00 | 4.01 |
instruction | latency | throughput |
---|---|---|
fmla.h (vec) | 4.00 | 4.00 |
fmla.s (vec) | 4.00 | 4.00 |
fmla.d (vec) | 4.00 | 4.00 |
fmla.h (vec; acc. fwd.) | 4.00 | - |
fmla.s (vec; acc. fwd.) | 4.00 | - |
fmla.d (vec; acc. fwd.) | 4.00 | - |
fmla.h (elem; [7]) | 4.00 | 3.43 |
fmla.s (elem; [3]) | 4.00 | 3.43 |
fmla.d (elem; [1]) | 4.00 | 3.43 |
fmlal.h (vec) | 4.00 | 4.00 |
fmlal2.h (vec) | 4.00 | 4.00 |
fmlal.h (vec; acc. fwd.) | 4.00 | - |
fmlal2.h (vec; acc. fwd.) | 4.00 | - |
fmls.h (vec) | 4.00 | 4.00 |
fmls.s (vec) | 4.00 | 4.00 |
fmls.d (vec) | 4.00 | 4.00 |
fmls.h (vec; acc. fwd.) | 4.00 | - |
fmls.s (vec; acc. fwd.) | 4.00 | - |
fmls.d (vec; acc. fwd.) | 4.00 | - |
fmls.h (elem; [7]) | 4.00 | 3.42 |
fmls.s (elem; [3]) | 4.00 | 3.43 |
fmls.d (elem; [1]) | 4.00 | 3.43 |
fmlsl.h (vec) | 4.00 | 4.00 |
fmlsl2.h (vec) | 4.00 | 4.00 |
fmlsl.h (vec; acc. fwd.) | 4.00 | - |
fmlsl2.h (vec; acc. fwd.) | 4.00 | - |
fmadd.h | 4.00 | 4.00 |
fmadd.s | 4.00 | 4.01 |
fmadd.d | 4.00 | 4.01 |
fmadd.h (acc. fwd.) | 4.00 | - |
fmadd.s (acc. fwd.) | 4.00 | - |
fmadd.d (acc. fwd.) | 4.00 | - |
fmsub.h | 4.00 | 4.00 |
fmsub.s | 4.00 | 4.00 |
fmsub.d | 4.00 | 4.00 |
fmsub.h (acc. fwd.) | 4.00 | - |
fmsub.s (acc. fwd.) | 4.00 | - |
fmsub.d (acc. fwd.) | 4.00 | - |
fnmadd.h | 4.00 | 4.00 |
fnmadd.s | 4.00 | 4.00 |
fnmadd.d | 4.00 | 4.00 |
fnmadd.h (acc. fwd.) | 4.00 | - |
fnmadd.s (acc. fwd.) | 4.00 | - |
fnmadd.d (acc. fwd.) | 4.00 | - |
fnmsub.h | 4.00 | 4.00 |
fnmsub.s | 4.00 | 4.00 |
fnmsub.d | 4.00 | 4.00 |
fnmsub.h (acc. fwd.) | 4.00 | - |
fnmsub.s (acc. fwd.) | 4.00 | - |
fnmsub.d (acc. fwd.) | 4.00 | - |
fcmla.h (vec; deg = 0) | 4.00 | 4.00 |
fcmla.s (vec; deg = 0) | 4.00 | 4.01 |
fcmla.d (vec; deg = 0) | 4.00 | 4.00 |
fcmla.h (vec; deg = 90) | 4.00 | 4.00 |
fcmla.s (vec; deg = 90) | 4.00 | 4.00 |
fcmla.d (vec; deg = 90) | 4.00 | 4.00 |
fcmla.h (vec; deg = 0; acc. fwd.) | 4.00 | - |
fcmla.s (vec; deg = 0; acc. fwd.) | 4.00 | - |
fcmla.d (vec; deg = 0; acc. fwd.) | 4.00 | - |
fcmla.h (vec; deg = 90; acc. fwd.) | 4.00 | - |
fcmla.s (vec; deg = 90; acc. fwd.) | 4.00 | - |
fcmla.d (vec; deg = 90; acc. fwd.) | 4.00 | - |
fcmla.h (elem; deg = 0; v.h[7]) | 4.00 | 3.43 |
fcmla.s (elem; deg = 0; v.s[3]) | 4.00 | 3.43 |
fcmla.h (elem; deg = 90; v.h[7]) | 4.00 | 3.43 |
fcmla.s (elem; deg = 90; v.s[3]) | 4.00 | 3.43 |
instruction | latency | throughput |
---|---|---|
frecpe.h (scl) | 3.00 | 1.00 |
frecpe.s (scl) | 3.00 | 1.00 |
frecpe.d (scl) | 3.00 | 1.00 |
frecpe.h (vec) | 3.00 | 1.00 |
frecpe.s (vec) | 3.00 | 1.00 |
frecpe.d (vec) | 3.00 | 1.00 |
frecps.h (scl) | 4.00 | 4.00 |
frecps.s (scl) | 4.00 | 4.00 |
frecps.d (scl) | 4.00 | 4.00 |
frecps.h (vec) | 4.00 | 4.00 |
frecps.s (vec) | 4.00 | 4.00 |
frecps.d (vec) | 4.00 | 4.00 |
frecpx.h (scl) | 3.00 | 1.00 |
frecpx.s (scl) | 3.00 | 1.00 |
frecpx.d (scl) | 3.00 | 1.00 |
instruction | latency | throughput |
---|---|---|
fsqrt.h (scl) | 8.00 | 0.50 |
fsqrt.s (scl) | 10.00 | 0.50 |
fsqrt.d (scl) | 13.00 | 0.50 |
fsqrt.h (vec) | 8.00 | 0.50 |
fsqrt.s (vec) | 10.00 | 0.50 |
fsqrt.d (vec) | 13.00 | 0.50 |
frsqrte.h (scl) | 3.00 | 1.00 |
frsqrte.s (scl) | 3.00 | 1.00 |
frsqrte.d (scl) | 3.00 | 1.00 |
frsqrte.h (vec) | 3.00 | 1.00 |
frsqrte.s (vec) | 3.00 | 1.00 |
frsqrte.d (vec) | 3.00 | 1.00 |
frsqrts.h (scl) | 4.00 | 4.00 |
frsqrts.s (scl) | 4.00 | 4.01 |
frsqrts.d (scl) | 4.00 | 4.00 |
frsqrts.h (vec) | 4.00 | 4.00 |
frsqrts.s (vec) | 4.00 | 4.00 |
frsqrts.d (vec) | 4.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
facge.h (scl) | 2.00 | 4.00 |
facge.s (scl) | 2.00 | 4.00 |
facge.d (scl) | 2.00 | 4.00 |
facge.h (vec) | 2.00 | 4.00 |
facge.s (vec) | 2.00 | 4.00 |
facge.d (vec) | 2.00 | 4.00 |
facgt.h (scl) | 2.00 | 4.00 |
facgt.s (scl) | 2.00 | 4.00 |
facgt.d (scl) | 2.00 | 4.01 |
facgt.h (vec) | 2.00 | 4.01 |
facgt.s (vec) | 2.00 | 4.00 |
facgt.d (vec) | 2.00 | 4.00 |
fcmp.h (reg) | 2.00 | 1.00 |
fcmp.h (zero) | 2.00 | 1.00 |
fcmp.s (reg) | 2.00 | 1.00 |
fcmp.s (zero) | 2.00 | 1.00 |
fcmp.d (reg) | 2.00 | 1.00 |
fcmp.d (zero) | 2.00 | 1.00 |
fcmpe.h (reg) | 2.00 | 1.00 |
fcmpe.h (zero) | 2.00 | 1.00 |
fcmpe.s (reg) | 2.00 | 1.00 |
fcmpe.s (zero) | 2.00 | 1.00 |
fcmpe.d (reg) | 2.00 | 1.00 |
fcmpe.d (zero) | 2.00 | 1.00 |
fccmp.h (eq) | 2.00 | 1.00 |
fccmp.h (le) | 2.00 | 1.00 |
fccmp.s (eq) | 2.00 | 1.00 |
fccmp.s (le) | 2.00 | 1.00 |
fccmp.d (eq) | 2.00 | 1.00 |
fccmp.d (le) | 2.00 | 1.00 |
fccmpe.h (eq) | 2.00 | 1.00 |
fccmpe.h (le) | 2.00 | 1.00 |
fccmpe.s (eq) | 2.00 | 1.00 |
fccmpe.s (le) | 2.00 | 1.00 |
fccmpe.d (eq) | 2.00 | 1.00 |
fccmpe.d (le) | 2.00 | 1.00 |
fcmeq.h (scl) | 2.00 | 4.01 |
fcmeq.s (scl) | 2.00 | 4.00 |
fcmeq.d (scl) | 2.00 | 4.00 |
fcmeq.h (vec) | 2.00 | 4.00 |
fcmeq.s (vec) | 2.00 | 4.00 |
fcmeq.d (vec) | 2.00 | 4.00 |
fcmeq.h (scl; zero) | 2.00 | 4.00 |
fcmeq.s (scl; zero) | 2.00 | 4.00 |
fcmeq.d (scl; zero) | 2.00 | 4.00 |
fcmeq.h (vec; zero) | 2.00 | 4.00 |
fcmeq.s (vec; zero) | 2.00 | 4.00 |
fcmeq.d (vec; zero) | 2.00 | 4.00 |
fcmge.h (scl) | 2.00 | 4.00 |
fcmge.s (scl) | 2.00 | 4.00 |
fcmge.d (scl) | 2.00 | 4.01 |
fcmge.h (vec) | 2.00 | 4.01 |
fcmge.s (vec) | 2.00 | 4.00 |
fcmge.d (vec) | 2.00 | 4.00 |
fcmge.h (scl; zero) | 2.00 | 4.00 |
fcmge.s (scl; zero) | 2.00 | 4.00 |
fcmge.d (scl; zero) | 2.00 | 4.01 |
fcmge.h (vec; zero) | 2.00 | 4.00 |
fcmge.s (vec; zero) | 2.00 | 4.00 |
fcmge.d (vec; zero) | 2.00 | 4.00 |
fcmgt.h (scl) | 2.00 | 4.00 |
fcmgt.s (scl) | 2.00 | 4.01 |
fcmgt.d (scl) | 2.00 | 4.00 |
fcmgt.h (vec) | 2.00 | 4.00 |
fcmgt.s (vec) | 2.00 | 4.01 |
fcmgt.d (vec) | 2.00 | 4.00 |
fcmgt.h (scl; zero) | 2.00 | 4.00 |
fcmgt.s (scl; zero) | 2.00 | 4.00 |
fcmgt.d (scl; zero) | 2.00 | 4.00 |
fcmgt.h (vec; zero) | 2.00 | 4.00 |
fcmgt.s (vec; zero) | 2.00 | 4.00 |
fcmgt.d (vec; zero) | 2.00 | 4.00 |
fcmle.h (scl; zero) | 2.00 | 4.00 |
fcmle.s (scl; zero) | 2.00 | 4.00 |
fcmle.d (scl; zero) | 2.00 | 4.00 |
fcmle.h (vec; zero) | 2.00 | 4.01 |
fcmle.s (vec; zero) | 2.00 | 4.00 |
fcmle.d (vec; zero) | 2.00 | 4.01 |
fcmlt.h (scl; zero) | 2.00 | 4.00 |
fcmlt.s (scl; zero) | 2.00 | 4.00 |
fcmlt.d (scl; zero) | 2.00 | 4.00 |
fcmlt.h (vec; zero) | 2.00 | 4.00 |
fcmlt.s (vec; zero) | 2.00 | 4.00 |
fcmlt.d (vec; zero) | 2.00 | 4.00 |
instruction | latency | throughput |
---|---|---|
fcsel.h (eq) | 2.00 | 2.00 |
fcsel.h (le) | 2.00 | 2.00 |
fcsel.s (eq) | 2.00 | 2.00 |
fcsel.s (le) | 2.00 | 2.00 |
fcsel.d (eq) | 2.00 | 2.00 |
fcsel.d (le) | 2.00 | 2.00 |
instruction | latency | throughput |
---|---|---|
scvtf.h (scl; >>2) | 8.00 | 3.00 |
scvtf.s (scl; >>2) | 7.99 | 3.00 |
scvtf.d (scl; >>2) | 8.00 | 3.00 |
scvtf.h (scl; int) | 8.00 | 3.00 |
scvtf.s (scl; int) | 8.00 | 3.00 |
scvtf.d (scl; int) | 7.99 | 3.00 |
scvtf.h (vec; >>2) | 3.00 | 4.00 |
scvtf.s (vec; >>2) | 3.00 | 4.00 |
scvtf.d (vec; >>2) | 3.00 | 4.00 |
scvtf.h (vec; int) | 3.00 | 4.00 |
scvtf.s (vec; int) | 3.00 | 4.00 |
scvtf.d (vec; int) | 3.00 | 4.00 |
fcvt (h -> s) | 3.00 | 4.00 |
fcvt (h -> d) | 3.00 | 4.01 |
fcvt (s -> h) | 3.00 | 4.01 |
fcvt (s -> d) | 3.00 | 4.00 |
fcvt (d -> h) | 3.00 | 4.00 |
fcvt (d -> s) | 3.00 | 4.00 |
fcvtl (h -> s) | 3.00 | 4.01 |
fcvtl (s -> d) | 3.00 | 4.00 |
fcvtl2 (h -> s) | 3.00 | 4.00 |
fcvtl2 (s -> d) | 3.00 | 4.00 |
fcvtn (s -> h) | 3.00 | 4.00 |
fcvtn (d -> s) | 3.00 | 4.01 |
fcvtn2 (s -> h) | 3.00 | 4.00 |
fcvtn2 (d -> s) | 3.00 | 4.00 |
fcvtxn | 3.00 | 4.00 |
fcvtxn2 | 3.00 | 4.00 |
fcvtas.h (scl) | 3.00 | 4.00 |
fcvtas.s (scl) | 3.00 | 4.00 |
fcvtas.d (scl) | 3.00 | 4.00 |
fcvtas.h (vec) | 3.00 | 4.00 |
fcvtas.s (vec) | 3.00 | 4.00 |
fcvtas.d (vec) | 3.00 | 4.00 |
fcvtas.h (scl -> reg) | 10.00 | 2.00 |
fcvtas.s (scl -> reg) | 10.00 | 2.00 |
fcvtas.d (scl -> reg) | 10.00 | 2.00 |
fcvtau.h (scl) | 3.00 | 4.00 |
fcvtau.s (scl) | 3.00 | 4.00 |
fcvtau.d (scl) | 3.00 | 4.00 |
fcvtau.h (vec) | 3.00 | 4.00 |
fcvtau.s (vec) | 3.00 | 4.00 |
fcvtau.d (vec) | 3.00 | 4.00 |
fcvtau.h (scl -> reg) | 10.00 | 2.00 |
fcvtau.s (scl -> reg) | 10.00 | 2.00 |
fcvtau.d (scl -> reg) | 10.00 | 2.00 |
fjcvtzs | 10.00 | 1.00 |
frinta.h (scl) | 3.00 | 4.00 |
frinta.s (scl) | 3.00 | 4.00 |
frinta.d (scl) | 3.00 | 4.00 |
frinta.h (vec) | 3.00 | 4.00 |
frinta.s (vec) | 3.00 | 4.00 |
frinta.d (vec) | 3.00 | 4.01 |
frinti.h (scl) | 3.00 | 4.00 |
frinti.s (scl) | 3.00 | 4.00 |
frinti.d (scl) | 3.00 | 4.00 |
frinti.h (vec) | 3.00 | 4.00 |
frinti.s (vec) | 3.00 | 4.00 |
frinti.d (vec) | 3.00 | 4.00 |
frintm.h (scl) | 3.00 | 4.00 |
frintm.s (scl) | 3.00 | 4.01 |
frintm.d (scl) | 3.00 | 4.00 |
frintm.h (vec) | 3.00 | 4.00 |
frintm.s (vec) | 3.00 | 4.00 |
frintm.d (vec) | 3.00 | 4.00 |
frintn.h (scl) | 3.00 | 4.00 |
frintn.s (scl) | 3.00 | 4.00 |
frintn.d (scl) | 3.00 | 4.00 |
frintn.h (vec) | 3.00 | 4.00 |
frintn.s (vec) | 3.00 | 4.00 |
frintn.d (vec) | 3.00 | 4.00 |
frintp.h (scl) | 3.00 | 4.00 |
frintp.s (scl) | 3.00 | 4.00 |
frintp.d (scl) | 3.00 | 4.00 |
frintp.h (vec) | 3.00 | 4.00 |
frintp.s (vec) | 3.00 | 4.00 |
frintp.d (vec) | 3.00 | 4.00 |
frintx.h (scl) | 3.00 | 4.01 |
frintx.s (scl) | 3.00 | 4.00 |
frintx.d (scl) | 3.00 | 4.00 |
frintx.h (vec) | 3.00 | 4.00 |
frintx.s (vec) | 3.00 | 4.00 |
frintx.d (vec) | 3.00 | 4.00 |
frintz.h (scl) | 3.00 | 4.00 |
frintz.s (scl) | 3.00 | 4.00 |
frintz.d (scl) | 3.00 | 4.00 |
frintz.h (vec) | 3.00 | 4.00 |
frintz.s (vec) | 3.00 | 4.00 |
frintz.d (vec) | 3.00 | 4.00 |