Skip to content

Commit

Permalink
Categorize dispatch name better for linalg.generic cases (iree-org#16677
Browse files Browse the repository at this point in the history
)

This could be quite useful for viewing trace captures.
  • Loading branch information
antiagainst authored Apr 18, 2024
1 parent d284154 commit ff624dd
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,22 @@ static std::string summarizeLinalgOp(linalg::LinalgOp op) {
}
}

// Categorize linalg.generic ops better.
if (prefix.empty() && isa<linalg::GenericOp>(op)) {
if (llvm::all_of(op.getIndexingMapsArray(),
[](AffineMap m) { return m.isIdentity(); })) {
prefix = "elementwise";
} else if (llvm::all_of(op.getIndexingMapsArray(),
[](AffineMap m) { return m.isMinorIdentity(); })) {
// We have checked that this is not pure elementwise in the above.
prefix = "broadcast";
} else if (linalg::isaContractionOpInterface(op)) {
prefix = "contract";
} else if (linalg::isaConvolutionOpInterface(op)) {
prefix = "conv";
}
}

if (prefix.empty()) {
// By default, use the op name as prefix.
auto opName = op->getName().getStringRef();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-opt --allow-unregistered-dialect --split-input-file --iree-flow-annotate-dispatches %s | FileCheck %s
// RUN: iree-opt --split-input-file --iree-flow-annotate-dispatches %s | FileCheck %s

// Dispatches containing some ops get a heuristics-driven summary in their name.
// This also tests symbol reference renaming.
Expand Down Expand Up @@ -88,7 +88,7 @@ flow.executable private @ex {
// Dispatch key op with multiple datatypes should be reflected in summary.

flow.executable private @ex {
// CHECK: flow.executable.export public @dispatch_generic_4x8_i32xf32
// CHECK: flow.executable.export public @dispatch_elementwise_4x8_i32xf32
flow.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !flow.dispatch.tensor<writeonly:tensor<4x8xf32>>) {
Expand Down Expand Up @@ -211,7 +211,7 @@ flow.executable private @ex {
#map = affine_map<(d0, d1) -> (d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
flow.executable private @ex {
// CHECK: flow.executable.export public @ex_unpack_generic_384x512_f32_pack
// CHECK: flow.executable.export public @ex_unpack_broadcast_384x512_f32_pack
flow.executable.export public @ex
builtin.module {
func.func @ex(%arg0: !flow.dispatch.tensor<readonly:tensor<24x32x16x16xf32>>, %arg1: !flow.dispatch.tensor<readonly:tensor<512xf32>>, %arg2: !flow.dispatch.tensor<writeonly:tensor<24x512x16x1xf32>>) {
Expand Down Expand Up @@ -266,3 +266,80 @@ flow.executable private @ex {
}
}
}

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>

flow.executable private @ex {
// CHECK: flow.executable.export public @dispatch_contract_16x32x8_f32
flow.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !flow.dispatch.tensor<readwrite:tensor<16x32xf32>>) {
%0 = tensor.empty() : tensor<16x8xf32>
%1 = tensor.empty() : tensor<8x32xf32>
%init = flow.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [16, 32], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x32xf32>> -> tensor<16x32xf32>
%2 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
ins(%0, %1 : tensor<16x8xf32>, tensor<8x32xf32>) outs(%init : tensor<16x32xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%3 = arith.mulf %in, %in_0 : f32
%4 = arith.addf %out, %3 : f32
linalg.yield %4 : f32
} -> tensor<16x32xf32>
flow.dispatch.tensor.store %2, %arg0, offsets = [0, 0], sizes = [16, 32], strides = [1, 1] : tensor<16x32xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x32xf32>>
return
}
}
}

// -----

#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d5, d2 + d6, d3)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d5, d6, d3, d4)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4)>

flow.executable private @ex {
// CHECK: flow.executable.export public @dispatch_conv_2x3x4x2x3x2x2_f32
flow.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !flow.dispatch.tensor<readwrite:tensor<2x3x4x2x3xf32>>) {
%0 = tensor.empty() : tensor<2x4x5x2xf32>
%1 = tensor.empty() : tensor<2x2x2x3xf32>
%init = flow.dispatch.tensor.load %arg0, offsets = [0, 0, 0, 0, 0], sizes = [2, 3, 4, 2, 3], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<2x3x4x2x3xf32>> -> tensor<2x3x4x2x3xf32>
%2 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
ins(%0, %1 : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) outs(%init : tensor<2x3x4x2x3xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%3 = arith.mulf %in, %in_0 : f32
%4 = arith.addf %out, %3 : f32
linalg.yield %4 : f32
} -> tensor<2x3x4x2x3xf32>
flow.dispatch.tensor.store %2, %arg0, offsets = [0, 0, 0, 0, 0], sizes = [2, 3, 4, 2, 3], strides = [1, 1, 1, 1, 1] : tensor<2x3x4x2x3xf32> -> !flow.dispatch.tensor<readwrite:tensor<2x3x4x2x3xf32>>
return
}
}
}

// -----

#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>

flow.executable private @ex {
// CHECK: flow.executable.export public @dispatch_elementwise_8x16x32_f32
flow.executable.export public @dispatch
builtin.module {
func.func @dispatch(%arg0: !flow.dispatch.tensor<readwrite:tensor<8x16x32xf32>>) {
%0 = tensor.empty() : tensor<8x16x32xf32>
%init = flow.dispatch.tensor.load %arg0, offsets = [0, 0, 0], sizes = [8, 16, 32], strides = [1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<8x16x32xf32>> -> tensor<8x16x32xf32>
%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel"]}
ins(%0 : tensor<8x16x32xf32>) outs(%init : tensor<8x16x32xf32>) {
^bb0(%in: f32, %out: f32):
%3 = arith.maximumf %in, %out : f32
linalg.yield %3 : f32
} -> tensor<8x16x32xf32>
flow.dispatch.tensor.store %2, %arg0, offsets = [0, 0, 0], sizes = [8, 16, 32], strides = [1, 1, 1] : tensor<8x16x32xf32> -> !flow.dispatch.tensor<readwrite:tensor<8x16x32xf32>>
return
}
}
}
2 changes: 1 addition & 1 deletion tools/test/executable_benchmarks.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
// reduced/simplified. Dynamic shapes, for example, will usually stop a dispatch
// from being benchmarkable without explicit shape arguments.

// CHECK: BM_abs_dispatch_0_vmvx_bytecode_fb_abs_dispatch_0_generic
// CHECK: BM_abs_dispatch_0_vmvx_bytecode_fb_abs_dispatch_0_elementwise
func.func @abs(%input : tensor<f32>) -> (tensor<f32>) {
%result = math.absf %input : tensor<f32>
return %result : tensor<f32>
Expand Down
2 changes: 1 addition & 1 deletion tools/test/executable_configurations.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ func.func @abs(%input : tensor<f32>) -> tensor<f32> {
// CHECK: IR Dump Before SerializeExecutablesPass
// CHECK: hal.executable public @abs_dispatch_0
// CHECK: hal.executable.variant public @vmvx_bytecode_fb
// CHECK: vm.func private @abs_dispatch_0_generic
// CHECK: vm.func private @abs_dispatch_0_elementwise
2 changes: 1 addition & 1 deletion tools/test/executable_sources.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ func.func @abs(%input : tensor<f32>) -> (tensor<f32>) {
// CHECK: IR Dump Before SerializeExecutablesPass
// CHECK: hal.executable public @abs_dispatch_0
// CHECK: hal.executable.variant public @vmvx_bytecode_fb
// CHECK: vm.func private @abs_dispatch_0_generic
// CHECK: vm.func private @abs_dispatch_0_elementwise

0 comments on commit ff624dd

Please sign in to comment.