Skip to content

Commit

Permalink
Revert D25607505: Add formulas and basic tests
Browse files Browse the repository at this point in the history
Test Plan: revert-hammer

Differential Revision:
D25607505 (pytorch@70f5905)

Original commit changeset: fe2315d58768

fbshipit-source-id: 519d7426a6f32f0db51c4f360e5d5a79dbaac99d
  • Loading branch information
samestep authored and facebook-github-bot committed Apr 14, 2021
1 parent ed03a07 commit 817fd93
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 680 deletions.
253 changes: 0 additions & 253 deletions test/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
# Autograd tests use double as the default dtype
torch.set_default_dtype(torch.double)

# TODO(alband) Remove this when this flag is not needed anymore
torch._C._set_forward_AD_enabled(True)

from torch import nn
from torch._six import inf, nan
from torch.autograd.function import once_differentiable
Expand Down Expand Up @@ -6974,13 +6971,6 @@ def foo(a):
self.assertEqual(vhp, torch.mm(v.unsqueeze(0), hes).squeeze(0))

class TestAutogradForwardMode(TestCase):
def tearDown(self):
# Ensure that a failing test won't make others fail
while fwAD._current_level >= 0:
fwAD.exit_dual_level()

super().tearDown()

def test_forward_level_cleanup(self):
def get_tensor_and_weak_ref():
# Create a new Tensor and weak reference
Expand Down Expand Up @@ -7026,249 +7016,6 @@ def test_size_check(self):

dual = fwAD.make_dual(foo, tangent[1:])

# The following test functions want to ensure all the following behaviors:
# - Ensure that default level system in the python binding works
# - Ensure that only level 0 exists and nesting is properly disabled
# - Ensure that printing works fine
# - Ensure that basic packing/unpacking works
# - Ensure that advanced packing/unpacking works
# - For memory / version counter share
# - For backward AD (regular ops)
# - Ensure that view + inplace for both modes work fine
# - Ensure we do proper cleanup on exit of a level

def test_default_level(self):
foo = torch.rand(2)
bar = torch.rand(2)

with fwAD.dual_level():
baz = fwAD.make_dual(foo, bar)
baz_primal, baz_tangent = fwAD.unpack_dual(baz)
self.assertEqual(baz_primal, foo)
# We don't actually need to enforce that these two are the exact same python
# object, feel free to relax in the future
self.assertIs(baz_tangent, bar)

baz_primal, baz_tangent = fwAD.unpack_dual(baz)
self.assertEqual(baz_primal, foo)
self.assertEqual(baz_tangent, None)

def test_nested_level(self):
with fwAD.dual_level() as level:
# For now only level 0 exists
self.assertEqual(level, 0)

with fwAD.dual_level():
with self.assertRaisesRegex(RuntimeError, "Nested forward mode AD is not supported at the moment"):
nest_level = fwAD.enter_dual_level()

def test_print(self):
with fwAD.dual_level() as level:
a = torch.rand(3)
self.assertFalse("tangent=" in str(a))

b = fwAD.make_dual(a, torch.rand(3))
self.assertFalse("tangent=" in str(a))
self.assertTrue("tangent=" in str(b))

b_primal, b_tangent = fwAD.unpack_dual(b)
self.assertFalse("tangent=" in str(b_primal))
self.assertFalse("tangent=" in str(b_tangent))

def test_basic_packing_unpacking(self):
foo = torch.rand(2)
bar = torch.rand(2)

with fwAD.dual_level():
baz = fwAD.make_dual(foo, bar)
baz_primal, baz_tangent = fwAD.unpack_dual(baz)
self.assertEqual(baz_primal, foo)
self.assertIs(baz_tangent, bar)

# Check that packing/unpacking did not change the input
foo_primal, foo_tangent = fwAD.unpack_dual(foo)
self.assertEqual(foo_primal, foo)
self.assertIsNone(foo_tangent)

def test_advanced_packing_unpacking(self):
foo = torch.rand(2)
bar = torch.ones(2)

# Memory and version counter check
with fwAD.dual_level():
dual = fwAD.make_dual(foo, bar)

# Ensure that they are sharing memory and version counter
self.assertEqual(dual.storage().data_ptr(), foo.storage().data_ptr())

# Ensure we properly share the version counter
self.assertEqual(foo._version, dual._version)
foo.add_(1)
self.assertEqual(foo._version, dual._version)

# Unpacking should only create aliases as well
dual_primal, dual_tangent = fwAD.unpack_dual(dual)
self.assertEqual(dual_primal.storage().data_ptr(), foo.storage().data_ptr())
self.assertEqual(dual_tangent.storage().data_ptr(), bar.storage().data_ptr())
# And the tangent is actually re-used as-is so it is still the same Tensor
self.assertIs(dual_tangent, bar)

# Ensure we properly share the version counter
self.assertEqual(foo._version, dual_primal._version)
foo.add_(1)
self.assertEqual(foo._version, dual_primal._version)
self.assertEqual(bar._version, dual_tangent._version)
bar.add_(1)
self.assertEqual(bar._version, dual_tangent._version)

# backward mode check
with fwAD.dual_level():
foo.requires_grad_()
bar.requires_grad_()

# Check that backward gradients properly propagates through packing/unpacking
dual = fwAD.make_dual(foo, bar)
p, t = fwAD.unpack_dual(dual)

gfoo, gbar = torch.autograd.grad(p.sum(), (foo, bar), retain_graph=True, allow_unused=True)
self.assertEqual(gfoo, torch.ones_like(foo))
self.assertIsNone(gbar)

gfoo, gbar = torch.autograd.grad(t.sum(), (foo, bar), retain_graph=True, allow_unused=True)
self.assertIsNone(gfoo)
self.assertEqual(gbar, torch.ones_like(bar))

# Check that forward gradients are not impacted by detach
detached_dual = dual.detach()
out = detached_dual * 2
p, t = fwAD.unpack_dual(out)
self.assertFalse(p.requires_grad)
self.assertFalse(t.requires_grad)
self.assertEqual(p, foo * 2)
self.assertEqual(t, bar * 2)

# Check that forward gradients are not impacted by no_grad
with torch.no_grad():
out = dual * 3
p, t = fwAD.unpack_dual(out)
self.assertFalse(p.requires_grad)
self.assertFalse(t.requires_grad)
self.assertEqual(p, foo * 3)
self.assertEqual(t, bar * 3)

# Check that forward gradients are not impacted by inplace detach
dual = dual.clone()
dual.detach_()
out = dual * 2
p, t = fwAD.unpack_dual(out)
self.assertFalse(p.requires_grad)
self.assertFalse(t.requires_grad)
self.assertEqual(p, foo * 2)
self.assertEqual(t, bar * 2)

def test_view_inplace_non_differentiable_views(self):
original_foo = torch.rand(2)
original_bar = torch.ones(2)

# Do clones to be able to compare the values updated inplace
# with the original content of these Tensors
foo = original_foo.clone()
bar = original_bar.clone()

with fwAD.dual_level():
# Note that in this test, we use "update" to mean computing the right tangent for the dual
# All the inplace operations here are expected to update the primal value of the Tensors but
# not always their tangents.
# Also all mentions of "non differentiable view" here means non forward differentiable view
# unless specified otherwise.
# See note [Forward Grad View/inplace] for more details on how these views work.

# Check that inplace ops do not update non-differentiable views
# Non differentiable view
dual = fwAD.make_dual(foo, bar)
dual *= 2
# Check that non differentiable view's tangent was not updated
self.assertIsNone(fwAD.unpack_dual(foo)[1])
# Check that the computed result is correct
self.assertEqual(bar, original_bar * 2)
self.assertEqual(fwAD.unpack_dual(dual)[1], original_bar * 2)
self.assertEqual(foo, original_foo * 2)
self.assertEqual(fwAD.unpack_dual(dual)[0], original_foo * 2)
# Other non differentiable view
dual_primal, dual_tangent = fwAD.unpack_dual(dual)
self.assertIsNone(fwAD.unpack_dual(dual_primal)[1])
self.assertIsNone(fwAD.unpack_dual(dual_tangent)[1])
dual_primal *= 2
# Ensure dual's tangent did not change
self.assertEqual(fwAD.unpack_dual(dual)[0], original_foo * 4)
self.assertEqual(fwAD.unpack_dual(dual)[1], original_bar * 2)
dual_tangent *= 2
# Ensure dual's primal did not change
self.assertEqual(fwAD.unpack_dual(dual)[0], original_foo * 4)
self.assertEqual(fwAD.unpack_dual(dual)[1], original_bar * 4)


def test_view_inplace_differentiable_views(self):
original_foo = torch.rand(2)
original_bar = torch.ones(2)

# Do clones to be able to compare the values updated inplace
# with the original content of these Tensors
foo = original_foo.clone()
bar = original_bar.clone()

with fwAD.dual_level():
# Check that inplace ops do update differentiable view but stop at non differentiable ones
# A non differentiable view
dual = fwAD.make_dual(foo, bar)
# A differentiable view
view = dual.narrow(0, 0, 1)
view *= 2
# Check that non differentiable view was not updated
self.assertIsNone(fwAD.unpack_dual(foo)[1])
# Check that differentiable view was updated
self.assertEqual(fwAD.unpack_dual(dual)[1], torch.tensor([2., 1.]))
self.assertEqual(fwAD.unpack_dual(view)[1], torch.tensor([2.]))

# Check that we track differentiable view even for Tensors that are not dual
baz = torch.rand(2)
baz += dual
self.assertEqual(fwAD.unpack_dual(baz)[1], fwAD.unpack_dual(dual)[1])
# Updates on view should as well
baz = torch.rand(2)
baz[0] = dual[0]
self.assertEqual(fwAD.unpack_dual(baz)[1][0], fwAD.unpack_dual(dual)[1][0])
# Unused values get a gradient of 0
self.assertEqual(fwAD.unpack_dual(baz)[1][1], 0.)

# Check that backward non-differentiable views don't prevent gradient update
baz = torch.rand(2)
view = baz.detach()
view += dual
self.assertEqual(fwAD.unpack_dual(baz)[1], fwAD.unpack_dual(dual)[1])

def test_grad_cleanup(self):
foo = torch.rand(2)
bar = torch.rand(2)
baz = torch.rand(2)

with fwAD.dual_level():
dual = fwAD.make_dual(foo, bar)
self.assertIsNone(fwAD.unpack_dual(foo)[1])
self.assertIs(fwAD.unpack_dual(dual)[1], bar)

self.assertIsNone(fwAD.unpack_dual(dual)[1])

with fwAD.dual_level():
self.assertIsNone(fwAD.unpack_dual(foo)[1])
new_dual = fwAD.make_dual(foo, baz)

dual_primal, dual_tangent = fwAD.unpack_dual(dual)
new_dual_primal, new_dual_tangent = fwAD.unpack_dual(new_dual)
self.assertEqual(dual_primal, new_dual_primal)
self.assertIsNone(dual_tangent)
self.assertEqual(new_dual_tangent, baz)


# Generic device type autograd tests.
class TestAutogradDeviceType(TestCase):
Expand Down
34 changes: 0 additions & 34 deletions tools/autograd/derivatives.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,24 +97,6 @@
# like 'grad_output', and (2) the gradient to multiply with is always
# called 'grad' (even though it really is a grad-grad).
#
# You can also add forward derivative definition by defining a formula for
# a returned value (in general "result" if the name is not specified). This
# formula works the same way as the backward one and advanced implementations
# should also be placed in the FunctionsManual file.
# This formula should compute a single Jacobian vector product using the (primal)
# value of the argument "foo_p", its forward grad "foo_t" and the result of the
# function as "result".
# Note that the forward derivative can be automatically generated in two cases:
# - if your function is linear (NOT affine or multi-linear), then you can
# specify so by just using the string "auto_linear" for the formula.
# - if your function is applied element wise (and has a single input), you
# can specify so by just using the string "auto_element_wise" for the formula.
#
# Note that to avoid unpacking overhead, functions taking TensorList as inputs
# will always have their forward grad formula called. This function is responsible
# to check if any computation is needed and should return an undefined Tensor when
# there is nothing to do. You can check "cat_forward" for a full example.
#
# NB: There are a number of gradient definitions in here which are bogus
# (implemented using zeros_like). These gradients are (hopefully) not
# used by our frontend. You MUST check the frontend code; search for
Expand Down Expand Up @@ -178,20 +160,13 @@
# in Decalarations.yaml
- name: abs(Tensor self) -> Tensor
self: grad * self.sgn()
result: auto_element_wise

- name: acos(Tensor self) -> Tensor
self: grad * -((-self * self + 1).rsqrt()).conj()

- name: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
self: handle_r_to_c(self.scalar_type(), grad)
other: handle_r_to_c(other.scalar_type(), maybe_multiply(grad, alpha.conj()))
result: self_t + maybe_multiply(other_t, alpha)

- name: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
self: handle_r_to_c(self.scalar_type(), grad)
other: handle_r_to_c(other.scalar_type(), maybe_multiply(grad, alpha.conj()))
result: self_t.add_(maybe_multiply(other_t, alpha))

- name: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
self: handle_r_to_c(self.scalar_type(), grad)
Expand Down Expand Up @@ -341,7 +316,6 @@

- name: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
self: grad
result: auto_linear

- name: _coalesce(Tensor self) -> Tensor
self: grad
Expand Down Expand Up @@ -816,12 +790,6 @@
- name: mul.Tensor(Tensor self, Tensor other) -> Tensor
self: mul_tensor_backward(grad, other, self.scalar_type())
other: mul_tensor_backward(grad, self, other.scalar_type())
result: other_t * self_p.conj() + self_t * other_p.conj()

- name: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
self: mul_tensor_backward(grad, other, self.scalar_type())
other: mul_tensor_backward(grad, self, other.scalar_type())
result: self_t.mul_(other_p.conj()).add_(other_t * (self_p / other_p).conj())

- name: mul.Scalar(Tensor self, Scalar other) -> Tensor
self: mul_tensor_backward(grad, at::scalar_to_tensor(other), self.scalar_type())
Expand Down Expand Up @@ -1010,7 +978,6 @@

- name: select.int(Tensor(a) self, int dim, int index) -> Tensor(a)
self: select_backward(grad, self.sizes(), dim, index)
result: auto_linear

- name: sigmoid(Tensor self) -> Tensor
self: sigmoid_backward(grad, result)
Expand All @@ -1035,7 +1002,6 @@

- name: slice.Tensor(Tensor(a) self, int dim=0, int? start=0, int? end=9223372036854775807, int step=1) -> Tensor(a)
self: slice_backward_wrapper(grad, self.sizes(), dim, start, end, step)
result: auto_linear

- name: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
self: slogdet_backward(grad, self, sign, logabsdet)
Expand Down
Loading

0 comments on commit 817fd93

Please sign in to comment.