Fix Inductor bench BC change (pytorch#638)

* Fix Inductor bench BC change * update * push * pish
yiliu30 · Aug 8, 2024 · 0a3b328 · 0a3b328
1 parent 34b24f7
commit 0a3b328
Showing 1 changed file with 13 additions and 4 deletions.
diff --git a/torchao/quantization/autoquant.py b/torchao/quantization/autoquant.py
@@ -15,14 +15,19 @@
 from .quant_primitives import (
     safe_int_mm,
 )
-from torchao.utils import TORCH_VERSION_AFTER_2_3
+from torchao.utils import TORCH_VERSION_AFTER_2_3, TORCH_VERSION_AFTER_2_5
 from torchao.quantization.utils import quantize_activation_per_token_absmax
 
 import torch.nn.functional as F
+
 try:
     from torch._inductor.utils import do_bench
-except:
-    from torch._inductor.runtime.runtime_utils import do_bench
+except ImportError:
+    try:
+        from torch._inductor.runtime.runtime_utils import do_bench
+    except ImportError:
+        from torch._inductor.runtime.benchmarking import benchmarker
+        do_bench = benchmarker.benchmark
 
 __all__ = [
     "AutoQuantizableLinearWeight",
@@ -227,9 +232,13 @@ def do_autoquant_bench(op, *args, **kwargs):
         graph = torch.cuda.CUDAGraph()
         with torch.cuda.graph(graph, stream=stream):
             op(*args, **kwargs)
-        if TORCH_VERSION_AFTER_2_3:
+        if TORCH_VERSION_AFTER_2_3 and not TORCH_VERSION_AFTER_2_5:
             from torch._inductor.runtime.runtime_utils import do_bench_gpu
             res = do_bench_gpu(lambda: graph.replay(), warmup=warmup, rep=rep, return_mode="median")
+        elif TORCH_VERSION_AFTER_2_5 and torch.cuda.is_available():
+            from torch._inductor.runtime.benchmarking import benchmarker
+            do_bench_gpu = benchmarker.benchmark_gpu
+            res = do_bench_gpu(lambda: graph.replay(), warmup=warmup, rep=rep, return_mode="median")
         else:
             res = do_bench(lambda: graph.replay(), warmup=warmup, rep=rep, return_mode="median")
     return res