Skip to content

Commit

Permalink
[OSPP] Add MatMul and Conv2d optimization pass. (buddy-compiler#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
WLFJ authored Oct 30, 2022
1 parent ff35d75 commit 2737425
Show file tree
Hide file tree
Showing 10 changed files with 650 additions and 37 deletions.
62 changes: 62 additions & 0 deletions examples/MLIRLinalg/linalg-conv2d_nchw_fchw.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
module {
func.func private @printMemrefF32(memref<*xf32>)
func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: f32) -> memref<?x?x?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = memref.alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
scf.for %arg5 = %c0 to %arg0 step %c1 {
scf.for %arg6 = %c0 to %arg1 step %c1 {
scf.for %arg7 = %c0 to %arg2 step %c1 {
scf.for %arg8 = %c0 to %arg3 step %c1 {
memref.store %arg4, %0[%arg5, %arg6, %arg7, %arg8] : memref<?x?x?x?xf32>
}
}
}
}
return %0 : memref<?x?x?x?xf32>
}
func.func @conv_2d_nchw_fchw(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf32>, %arg2: memref<?x?x?x?xf32>) {
linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) outs(%arg2 : memref<?x?x?x?xf32>)
return
}
func.func @main() {
// Intput(image, filter) and output value.
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32

%current_image_n = arith.constant 2 : index
%current_image_c = arith.constant 2 : index
%current_image_h = arith.constant 7 : index
%current_image_w = arith.constant 7 : index

%current_filter_n = arith.constant 2 : index
%current_filter_c = arith.constant 2 : index
%current_filter_h = arith.constant 4 : index
%current_filter_w = arith.constant 4 : index

%current_output_n = arith.constant 2 : index
%current_output_c = arith.constant 2 : index
%current_output_h = arith.constant 4 : index
%current_output_w = arith.constant 4 : index

// Image.
%image = call @alloc_2d_filled_f32(%current_image_n, %current_image_c, %current_image_h, %current_image_w, %cst) : (index, index, index, index, f32) -> memref<?x?x?x?xf32>
// Filter.
%filter = call @alloc_2d_filled_f32(%current_filter_n, %current_filter_c, %current_filter_h, %current_filter_w, %cst) : (index, index, index, index, f32) -> memref<?x?x?x?xf32>
// Output.
%output = call @alloc_2d_filled_f32(%current_output_n, %current_output_c, %current_output_h, %current_output_w, %cst_0) : (index, index, index, index, f32) -> memref<?x?x?x?xf32>

call @conv_2d_nchw_fchw(%image, %filter, %output) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()

%3 = memref.cast %output : memref<?x?x?x?xf32> to memref<*xf32>

// Print output.
call @printMemrefF32(%3) : (memref<*xf32>) -> ()

memref.dealloc %output : memref<?x?x?x?xf32>
memref.dealloc %image : memref<?x?x?x?xf32>
memref.dealloc %filter : memref<?x?x?x?xf32>
return
}
}

74 changes: 37 additions & 37 deletions examples/MLIRLinalg/linalg-matmul.mlir
Original file line number Diff line number Diff line change
@@ -1,47 +1,47 @@
module {
func.func private @printMemrefF32(memref<*xf32>)

func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index, %arg2: f32) -> memref<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
scf.for %arg3 = %c0 to %arg0 step %c1 {
scf.for %arg4 = %c0 to %arg1 step %c1 {
memref.store %arg2, %0[%arg3, %arg4] : memref<?x?xf32>
}
module{
func.func private @printMemrefF32(memref<*xf32>)

func.func @matmul(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>) {
linalg.matmul
ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
outs(%c:memref<?x?xf32>)
return
}
return %0 : memref<?x?xf32>
}

func.func @matmul(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
linalg.matmul ins (%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
outs (%arg2: memref<?x?xf32>)
return
}
func.func @main(){
// Set up dims.
%cM = arith.constant 4 : index
%cN = arith.constant 4 : index
%cK = arith.constant 4 : index

func.func @main() {
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%c5 = arith.constant 5 : index
// Set Init Value.
%cf1 = arith.constant 1.0 : f32

// Initial data of input and output.
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%A = memref.alloc(%cM, %cK) : memref<?x?xf32>
%B = memref.alloc(%cK, %cN) : memref<?x?xf32>
%C = memref.alloc(%cM, %cN) : memref<?x?xf32>

%input1 = call @alloc_2d_filled_f32(%c5, %c3, %cst) : (index, index, f32) -> memref<?x?xf32>
%input2 = call @alloc_2d_filled_f32(%c3, %c2, %cst) : (index, index, f32) -> memref<?x?xf32>
%output = call @alloc_2d_filled_f32(%c5, %c2, %cst_0) : (index, index, f32) -> memref<?x?xf32>
linalg.fill
ins(%cf1 : f32)
outs(%A:memref<?x?xf32>)

call @matmul(%input1, %input2, %output) : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
linalg.fill
ins(%cf1 : f32)
outs(%B:memref<?x?xf32>)

// Print output.
%print_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
call @printMemrefF32(%print_output) : (memref<*xf32>) -> ()
linalg.fill
ins(%cf1 : f32)
outs(%C:memref<?x?xf32>)

memref.dealloc %input1 : memref<?x?xf32>
memref.dealloc %input2 : memref<?x?xf32>
memref.dealloc %output : memref<?x?xf32>
call @matmul(%A, %B, %C) : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()

return
}
// Print output.
%print_C = memref.cast %C : memref<?x?xf32> to memref<*xf32>
call @printMemrefF32(%print_C) : (memref<*xf32>) -> ()

memref.dealloc %C : memref<?x?xf32>
memref.dealloc %B : memref<?x?xf32>
memref.dealloc %A : memref<?x?xf32>
return
}
}
61 changes: 61 additions & 0 deletions examples/MLIRLinalg/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,64 @@ linalg-matmul-run:
-convert-vector-to-llvm -convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

linalg-matmul-optimize-lower:
@${BUDDY_OPT} linalg-matmul.mlir ${MLIR_OPT_OPTIONS} \
--matmul-optimize="vec-size=16 kernel-m=2 kernel-n=4" \
-o ./log.mlir

linalg-matmul-optimize-translate:
@${BUDDY_OPT} linalg-matmul.mlir ${MLIR_OPT_OPTIONS} \
--matmul-optimize="vec-size=16 kernel-m=2 kernel-n=4" -convert-linalg-to-loops \
-lower-affine -convert-scf-to-cf -convert-vector-to-llvm \
-convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll

linalg-matmul-optimize-run:
@${BUDDY_OPT} linalg-matmul.mlir ${MLIR_OPT_OPTIONS} \
--matmul-optimize="vec-size=16 kernel-m=2 kernel-n=4" -convert-linalg-to-loops \
-lower-affine -convert-scf-to-cf -convert-vector-to-llvm \
-convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

linalg-conv2d_nchw_fchw-lower:
@${MLIR_OPT} ./linalg-conv2d_nchw_fchw.mlir \
-convert-linalg-to-loops -o ./log.mlir

linalg-conv2d_nchw_fchw-translate:
@${MLIR_OPT} ./linalg-conv2d_nchw_fchw.mlir \
-convert-linalg-to-loops -lower-affine -convert-scf-to-cf \
-convert-vector-to-llvm -convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll

linalg-conv2d_nchw_fchw-run:
@${MLIR_OPT} linalg-conv2d_nchw_fchw.mlir ${MLIR_OPT_OPTIONS} \
-convert-linalg-to-loops -lower-affine -convert-scf-to-cf \
-convert-vector-to-llvm -convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

linalg-conv2d_nchw_fchw-optimize-lower:
@${BUDDY_OPT} ./linalg-conv2d_nchw_fchw.mlir \
--conv-optimize="kernel-m=2 kernel-n=2 vec-size=16" \
-o ./log.mlir

linalg-conv2d_nchw_fchw-optimize-translate:
@${BUDDY_OPT} ./linalg-conv2d_nchw_fchw.mlir \
--conv-optimize="kernel-m=2 kernel-n=3 vec-size=16" \
-convert-linalg-to-loops -lower-affine -convert-scf-to-cf \
-convert-vector-to-llvm -convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_TRANSLATE} --mlir-to-llvmir -o log.ll

linalg-conv2d_nchw_fchw-optimize-run:
@${BUDDY_OPT} ./linalg-conv2d_nchw_fchw.mlir ${MLIR_OPT_OPTIONS} \
--conv-optimize="kernel-m=2 kernel-n=3 vec-size=16" \
-convert-linalg-to-loops -lower-affine -convert-scf-to-cf \
-convert-vector-to-llvm -convert-memref-to-llvm -convert-arith-to-llvm \
-convert-func-to-llvm -reconcile-unrealized-casts | \
${MLIR_CPU_RUNNER} ${OPT_FLAG} -e main -entry-point-result=void -shared-libs=${MLIR_RUNNER_UTILS} -shared-libs=${MLIR_C_RUNNER_UTILS}

2 changes: 2 additions & 0 deletions lib/Conversion/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ add_subdirectory(LowerBud)
add_subdirectory(LowerDIP)
add_subdirectory(LowerRVV)
add_subdirectory(LowerDAP)
add_subdirectory(MatMulOptimization)
add_subdirectory(ConvOptimization)
add_subdirectory(LowerVectorExp)
3 changes: 3 additions & 0 deletions lib/Conversion/ConvOptimization/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_mlir_library(ConvOptimization
ConvOptimize.cpp
)
Loading

0 comments on commit 2737425

Please sign in to comment.