Skip to content

Commit

Permalink
Run test_torchinductor_opinfo CPU tests if triton not installed (pyto…
Browse files Browse the repository at this point in the history
…rch#88934)

These test are not run currently because normal CI workers don't have
triton installed.

Pull Request resolved: pytorch#88934
Approved by: https://github.com/ngimel
  • Loading branch information
peterbell10 authored and pytorchmergebot committed Nov 14, 2022
1 parent 072920c commit 8371bb8
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 28 deletions.
20 changes: 2 additions & 18 deletions test/inductor/test_torchinductor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from torch.fx.experimental.proxy_tensor import make_fx
from torch.nn import functional as F
from torch.testing._internal.common_utils import (
IS_FBCODE,
TEST_WITH_ASAN,
TEST_WITH_ROCM,
TestCase as TorchTestCase,
Expand All @@ -41,7 +40,7 @@
from torch._inductor.compile_fx import compile_fx, complex_memory_overlap
from torch._inductor.ir import IndexingDiv, ModularIndexing
from torch._inductor.sizevars import SizeVarAllocator
from torch._inductor.utils import has_torchvision_roi_align, has_triton, timed
from torch._inductor.utils import has_torchvision_roi_align, timed

# This will only pass on pytorch builds newer than roughly 5/15/2022
assert get_decompositions([torch.ops.aten.trace])
Expand All @@ -53,25 +52,10 @@
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")

HAS_CPU = False
try:
from subprocess import CalledProcessError

from torch._inductor.codecache import CppCodeCache

CppCodeCache.load("")
HAS_CPU = not IS_FBCODE
except (
CalledProcessError,
OSError,
torch._inductor.exc.InvalidCxxCompiler,
torch._inductor.exc.CppCompileError,
):
pass
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

aten = torch.ops.aten

HAS_CUDA = has_triton()
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")

torch._inductor.config.triton.autotune = False # too slow
Expand Down
28 changes: 18 additions & 10 deletions test/inductor/test_torchinductor_opinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,22 @@
onlyNativeDeviceTypes,
OpDTypes,
ops,
skipCPUIf,
skipCUDAIf,
)
from torch.testing._internal.common_methods_invocations import op_db
from torch.testing._internal.common_utils import (
dtype_abbrs,
run_tests,
skipCUDAMemoryLeakCheckIf,
skipIfCrossRef,
skipIfTorchDynamo,
suppress_warnings,
TEST_WITH_ROCM,
TestCase,
)
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

try:
from torch._inductor.utils import has_triton

try:
from .test_torchinductor import check_model, check_model_cuda
except ImportError:
Expand Down Expand Up @@ -120,6 +122,7 @@ def process(device_type):

inductor_skips["cpu"] = {
"linalg.ldl_solve": {b8, f16, f32, f64, i32, i64}, # segfault
"linalg.ldl_factor": {f32, f64}, # flaky
"__rdiv__": {b8, f16, f32, f64, i32, i64}, # flaky
}

Expand Down Expand Up @@ -169,6 +172,8 @@ def process(device_type):
"argwhere": {b8, f16, f32, f64, i32, i64},
"bernoulli": {f32, f64},
"bincount": {i32, i64},
"cdouble": {b8, f16, f32, f64, i32, i64},
"cfloat": {b8, f16, f32, f64, i32, i64},
"chalf": {b8, f16, f32, f64, i32, i64},
"cholesky": {f32, f64},
"combinations": {b8, f16, f32, f64, i32, i64},
Expand Down Expand Up @@ -209,11 +214,10 @@ def process(device_type):
"linalg.lstsq.grad_oriented": {f32, f64},
"linalg.matrix_rank": {f32, f64},
"linalg.matrix_rank.hermitian": {f32, f64},
"linalg.lu_solve": {f32, f64},
"lu_solve": {f32, f64},
"lu_unpack": {f32, f64},
"linalg.pinv.singular": {f32, f64},
"logdet": {f32, f64},
"masked.norm": {f16},
"masked.normalize": {f16},
"masked_fill": {f16},
"masked_scatter": {f16, f32, f64},
"masked_select": {b8, f16, f32, f64, i32, i64},
Expand All @@ -225,8 +229,8 @@ def process(device_type):
"nan_to_num": {f16},
"nanquantile": {f32, f64},
"nn.functional.avg_pool1d": {i64},
"nn.functional.avg_pool2d": {i64},
"nn.functional.adaptive_avg_pool2d": {f16},
"nn.functional.avg_pool2d": {i64, f64},
"nn.functional.adaptive_avg_pool2d": {f16, f64},
"nn.functional.ctc_loss": {f32, f64},
"nn.functional.gaussian_nll_loss": {f32, f64},
"nn.functional.gelu": {f64},
Expand All @@ -243,6 +247,7 @@ def process(device_type):
"quantile": {f32, f64},
"rand_like": {f16, f32, f64},
"randint_like": {f16, f32, f64, i32, i64},
"randint": {f16, f32, f64, i32, i64},
"randn_like": {f16, f32, f64},
"repeat_interleave": {b8, f16, f32, f64, i32, i64},
"scatter_add": {f16},
Expand Down Expand Up @@ -455,6 +460,10 @@ class TestInductorOpInfo(TestCase):
@skipCUDAMemoryLeakCheckIf(
True
) # inductor kernels failing this test intermittently
@skipCUDAIf(not HAS_CUDA, "Skipped! Triton not found")
@skipCPUIf(not HAS_CPU, "Skipped! Supported CPU compiler not found")
@skipIfTorchDynamo("Test uses dynamo already")
@skipIfCrossRef
@_ops(op_db[START:END])
@patch("torch._dynamo.config.raise_on_unsafe_aot_autograd", True)
def test_comprehensive(self, device, dtype, op):
Expand Down Expand Up @@ -599,5 +608,4 @@ def fn(*args, **kwargs):
instantiate_device_type_tests(TestInductorOpInfo, globals())

if __name__ == "__main__":
if has_triton() and not TEST_WITH_ROCM:
run_tests()
run_tests()
23 changes: 23 additions & 0 deletions torch/testing/_internal/inductor_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from subprocess import CalledProcessError

from torch._inductor.codecache import CppCodeCache
from torch._inductor.utils import has_triton
from torch.testing._internal.common_utils import (
IS_FBCODE,
TEST_WITH_ROCM,
)
import torch

HAS_CPU = False
try:
CppCodeCache.load("")
HAS_CPU = not IS_FBCODE
except (
CalledProcessError,
OSError,
torch._inductor.exc.InvalidCxxCompiler,
torch._inductor.exc.CppCompileError,
):
pass

HAS_CUDA = has_triton() and not TEST_WITH_ROCM

0 comments on commit 8371bb8

Please sign in to comment.