forked from buddy-compiler/buddy-mlir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconv2d-buddy-template
82 lines (69 loc) · 2.95 KB
/
conv2d-buddy-template
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#map0 = affine_map<(d0, d1) -> (d0 + d1 - 1)>
module {
func.func private @printMemrefF32(memref<*xf32>)
func.func private @print_flops(f64)
func.func private @rtclock() -> f64
func.func @alloc_2d_filled_f32(%arg0: index, %arg1: index, %arg2: f32) -> memref<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
scf.for %arg3 = %c0 to %arg0 step %c1 {
scf.for %arg4 = %c0 to %arg1 step %c1 {
memref.store %arg2, %0[%arg3, %arg4] : memref<?x?xf32>
}
}
return %0 : memref<?x?xf32>
}
func.func @conv_2d(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
linalg.conv_2d ins (%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
outs (%arg2: memref<?x?xf32>)
return
}
func.func @main() {
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
// Image and Output value.
%cst = arith.constant 1.000000e+00 : f32
%cst_0 = arith.constant 0.000000e+00 : f32
%current_filter = arith.constant TEMPLATE_FILTER : index
%current_output = arith.constant TEMPLATE_OUTPUT : index
%current_image = affine.apply #map0(%current_output, %current_filter)
// Filter.
%filter = call @alloc_2d_filled_f32(%current_filter, %current_filter, %cst) : (index, index, f32) -> memref<?x?xf32>
// Image.
%image = call @alloc_2d_filled_f32(%current_image, %current_image, %cst) : (index, index, f32) -> memref<?x?xf32>
// Output.
%output = call @alloc_2d_filled_f32(%current_output, %current_output, %cst_0) : (index, index, f32) -> memref<?x?xf32>
// Execution times.
%reps = arith.constant 20 : index
// Record start time.
%t_start = call @rtclock() : () -> f64
// Execute convolution for specific times.
affine.for %arg0 = 0 to %reps {
func.call @conv_2d(%image, %filter, %output) : (memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>) -> ()
}
// Record end time.
%t_end = call @rtclock() : () -> f64
// Get the total running time.
%t = arith.subf %t_end, %t_start : f64
// Print output.
%print_output = memref.cast %output : memref<?x?xf32> to memref<*xf32>
// call @printMemrefF32(%print_output) : (memref<*xf32>) -> ()
// 2 * [filter size]^2 * [output size]^2.
%flops1 = arith.muli %current_output, %current_output : index
%flops2 = arith.muli %current_filter, %current_filter : index
%flops3 = arith.muli %c2, %flops2 : index
%flops4 = arith.muli %flops1, %flops3 : index
// Calculate FLOPS.
%num_flops = arith.muli %reps, %flops4 : index
%num_flops_i = arith.index_cast %num_flops : index to i64
%num_flops_f = arith.sitofp %num_flops_i : i64 to f64
%flops = arith.divf %num_flops_f, %t : f64
// Print the FLOPS.
vector.print %flops : f64
memref.dealloc %image : memref<?x?xf32>
memref.dealloc %filter : memref<?x?xf32>
memref.dealloc %output : memref<?x?xf32>
return
}
}