docs/master/notes/autograd.html



<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Autograd mechanics &mdash; PyTorch master documentation</title>
  

    <link rel="canonical" href="https://pytorch.org/docs/stable/notes/autograd.html"/>
  

  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/jit.css" type="text/css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="../_static/katex-math.css" type="text/css" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="Broadcasting semantics" href="broadcasting.html" />
    <link rel="prev" title="Automatic Mixed Precision examples" href="amp_examples.html" /> 

  
  <script src="../_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <div class="ecosystem-dropdown">
              <a id="dropdownMenuButton" data-toggle="ecosystem-dropdown">
                Ecosystem
              </a>
              <div class="ecosystem-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/hub"">
                  <span class=dropdown-title>Models (Beta)</span>
                  <p>Discover, publish, and reuse pre-trained models</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem">
                  <span class=dropdown-title>Tools & Libraries</span>
                  <p>Explore the ecosystem of tools and libraries</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li class="active">
            <a href="https://pytorch.org/docs/stable/index.html">Docs</a>
          </li>

          <li>
            <div class="resources-dropdown">
              <a id="resourcesDropdownButton" data-toggle="resources-dropdown">
                Resources
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/resources"">
                  <span class=dropdown-title>Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/features">
                  <span class=dropdown-title>About</span>
                  <p>Learn about PyTorch’s features and capabilities</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <a href="https://github.com/pytorch/pytorch">Github</a>
          </li>
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>

  </div>
</div>


<body class="pytorch-body">

   
    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">
            

                <div class="version">
                  <a href='http://pytorch.org/docs/versions.html'>1.6.0a0+f2f8027 &#x25BC</a>
                </div>
              
            
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search Docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

            
          </div>

          
<div>
  <a style="color:#F05732" href="https://pytorch.org/docs/stable/notes/autograd.html">
    You are viewing unstable developer preview docs.
    Click here to view docs for latest stable release.
  </a>
</div>

            
              <p class="caption"><span class="caption-text">Notes</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="amp_examples.html">Automatic Mixed Precision examples</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Autograd mechanics</a></li>
<li class="toctree-l1"><a class="reference internal" href="broadcasting.html">Broadcasting semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="cpu_threading_torchscript_inference.html">CPU threading and TorchScript inference</a></li>
<li class="toctree-l1"><a class="reference internal" href="cuda.html">CUDA semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="ddp.html">Distributed Data Parallel</a></li>
<li class="toctree-l1"><a class="reference internal" href="extending.html">Extending PyTorch</a></li>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="large_scale_deployments.html">Features for large-scale deployments</a></li>
<li class="toctree-l1"><a class="reference internal" href="multiprocessing.html">Multiprocessing best practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="randomness.html">Reproducibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="serialization.html">Serialization semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="windows.html">Windows FAQ</a></li>
</ul>
<p class="caption"><span class="caption-text">Language Bindings</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/cppdocs/">C++ API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../packages.html">Javadoc</a></li>
</ul>
<p class="caption"><span class="caption-text">Python API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../torch.html">torch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../nn.html">torch.nn</a></li>
<li class="toctree-l1"><a class="reference internal" href="../nn.functional.html">torch.nn.functional</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensors.html">torch.Tensor</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensor_attributes.html">Tensor Attributes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensor_view.html">Tensor Views</a></li>
<li class="toctree-l1"><a class="reference internal" href="../autograd.html">torch.autograd</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cuda.html">torch.cuda</a></li>
<li class="toctree-l1"><a class="reference internal" href="../amp.html">torch.cuda.amp</a></li>
<li class="toctree-l1"><a class="reference internal" href="../distributed.html">torch.distributed</a></li>
<li class="toctree-l1"><a class="reference internal" href="../distributions.html">torch.distributions</a></li>
<li class="toctree-l1"><a class="reference internal" href="../hub.html">torch.hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="../jit.html">torch.jit</a></li>
<li class="toctree-l1"><a class="reference internal" href="../nn.init.html">torch.nn.init</a></li>
<li class="toctree-l1"><a class="reference internal" href="../onnx.html">torch.onnx</a></li>
<li class="toctree-l1"><a class="reference internal" href="../optim.html">torch.optim</a></li>
<li class="toctree-l1"><a class="reference internal" href="../quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../rpc.html">Distributed RPC Framework</a></li>
<li class="toctree-l1"><a class="reference internal" href="../random.html">torch.random</a></li>
<li class="toctree-l1"><a class="reference internal" href="../sparse.html">torch.sparse</a></li>
<li class="toctree-l1"><a class="reference internal" href="../storage.html">torch.Storage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../bottleneck.html">torch.utils.bottleneck</a></li>
<li class="toctree-l1"><a class="reference internal" href="../checkpoint.html">torch.utils.checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cpp_extension.html">torch.utils.cpp_extension</a></li>
<li class="toctree-l1"><a class="reference internal" href="../data.html">torch.utils.data</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dlpack.html">torch.utils.dlpack</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model_zoo.html">torch.utils.model_zoo</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tensorboard.html">torch.utils.tensorboard</a></li>
<li class="toctree-l1"><a class="reference internal" href="../type_info.html">Type Info</a></li>
<li class="toctree-l1"><a class="reference internal" href="../named_tensor.html">Named Tensors</a></li>
<li class="toctree-l1"><a class="reference internal" href="../name_inference.html">Named Tensors operator coverage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../__config__.html">torch.__config__</a></li>
</ul>
<p class="caption"><span class="caption-text">Libraries</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="http://pytorch.org/xla/">PyTorch on XLA Devices</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/elastic/">PyTorch Elastic (torchelastic)</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/audio">torchaudio</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/text">torchtext</a></li>
<li class="toctree-l1"><a class="reference internal" href="../torchvision/index.html">torchvision</a></li>
</ul>
<p class="caption"><span class="caption-text">Community</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../community/contribution_guide.html">PyTorch Contribution Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../community/governance.html">PyTorch Governance</a></li>
<li class="toctree-l1"><a class="reference internal" href="../community/persons_of_interest.html">PyTorch Governance | Persons of Interest</a></li>
</ul>

            
        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">
          

<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">
    
      <li>
        <a href="../index.html">
          
            Docs
          
        </a> &gt;
      </li>

        
      <li>Autograd mechanics</li>
    
    
      <li class="pytorch-breadcrumbs-aside">
        
            
            <a href="../_sources/notes/autograd.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
          
        
      </li>
    
  </ul>

  
</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">

        
          <div class="rst-content">
          
            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
              
  <div class="section" id="autograd-mechanics">
<span id="id1"></span><h1>Autograd mechanics<a class="headerlink" href="#autograd-mechanics" title="Permalink to this headline">¶</a></h1>
<p>This note will present an overview of how autograd works and records the
operations. It’s not strictly necessary to understand all this, but we recommend
getting familiar with it, as it will help you write more efficient, cleaner
programs, and can aid you in debugging.</p>
<div class="section" id="excluding-subgraphs-from-backward">
<span id="excluding-subgraphs"></span><h2>Excluding subgraphs from backward<a class="headerlink" href="#excluding-subgraphs-from-backward" title="Permalink to this headline">¶</a></h2>
<p>Every Tensor has a flag: <code class="xref py py-attr docutils literal notranslate"><span class="pre">requires_grad</span></code> that allows for fine grained
exclusion of subgraphs from gradient computation and can increase efficiency.</p>
<div class="section" id="requires-grad">
<span id="excluding-requires-grad"></span><h3><code class="docutils literal notranslate"><span class="pre">requires_grad</span></code><a class="headerlink" href="#requires-grad" title="Permalink to this headline">¶</a></h3>
<p>If there’s a single input to an operation that requires gradient, its output
will also require gradient. Conversely, only if all inputs don’t require
gradient, the output also won’t require it. Backward computation is never
performed in the subgraphs, where all Tensors didn’t require gradients.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>  <span class="c1"># requires_grad=False by default</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">y</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>  <span class="c1"># requires_grad=False by default</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">z</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">((</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">x</span> <span class="o">+</span> <span class="n">y</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">requires_grad</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">a</span> <span class="o">+</span> <span class="n">z</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span><span class="o">.</span><span class="n">requires_grad</span>
<span class="go">True</span>
</pre></div>
</div>
<p>This is especially useful when you want to freeze part of your model, or you
know in advance that you’re not going to use gradients w.r.t. some parameters.
For example if you want to finetune a pretrained CNN, it’s enough to switch the
<code class="xref py py-attr docutils literal notranslate"><span class="pre">requires_grad</span></code> flags in the frozen base, and no intermediate buffers will
be saved, until the computation gets to the last layer, where the affine
transform will use weights that require gradient, and the output of the network
will also require them.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">torchvision</span><span class="o">.</span><span class="n">models</span><span class="o">.</span><span class="n">resnet18</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">for</span> <span class="n">param</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
    <span class="n">param</span><span class="o">.</span><span class="n">requires_grad</span> <span class="o">=</span> <span class="kc">False</span>
<span class="c1"># Replace the last fully-connected layer</span>
<span class="c1"># Parameters of newly constructed modules have requires_grad=True by default</span>
<span class="n">model</span><span class="o">.</span><span class="n">fc</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">512</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>

<span class="c1"># Optimize only the classifier</span>
<span class="n">optimizer</span> <span class="o">=</span> <span class="n">optim</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">fc</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="mf">1e-2</span><span class="p">,</span> <span class="n">momentum</span><span class="o">=</span><span class="mf">0.9</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="how-autograd-encodes-the-history">
<span id="how-autograd-encodes-history"></span><h2>How autograd encodes the history<a class="headerlink" href="#how-autograd-encodes-the-history" title="Permalink to this headline">¶</a></h2>
<p>Autograd is reverse automatic differentiation system.  Conceptually,
autograd records a graph recording all of the operations that created
the data as you execute operations, giving you a directed acyclic graph
whose leaves are the input tensors and roots are the output tensors.
By tracing this graph from roots to leaves, you can automatically
compute the gradients using the chain rule.</p>
<p>Internally, autograd represents this graph as a graph of
<code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code> objects (really expressions), which can be
<code class="xref py py-meth docutils literal notranslate"><span class="pre">apply()</span></code> ed to compute the result of
evaluating the graph.  When computing the forwards pass, autograd
simultaneously performs the requested computations and builds up a graph
representing the function that computes the gradient (the <code class="docutils literal notranslate"><span class="pre">.grad_fn</span></code>
attribute of each <a class="reference internal" href="../tensors.html#torch.Tensor" title="torch.Tensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">torch.Tensor</span></code></a> is an entry point into this graph).
When the forwards pass is completed, we evaluate this graph in the
backwards pass to compute the gradients.</p>
<p>An important thing to note is that the graph is recreated from scratch at every
iteration, and this is exactly what allows for using arbitrary Python control
flow statements, that can change the overall shape and size of the graph at
every iteration. You don’t have to encode all possible paths before you
launch the training - what you run is what you differentiate.</p>
</div>
<div class="section" id="in-place-operations-with-autograd">
<h2>In-place operations with autograd<a class="headerlink" href="#in-place-operations-with-autograd" title="Permalink to this headline">¶</a></h2>
<p>Supporting in-place operations in autograd is a hard matter, and we discourage
their use in most cases. Autograd’s aggressive buffer freeing and reuse makes
it very efficient and there are very few occasions when in-place operations
actually lower memory usage by any significant amount. Unless you’re operating
under heavy memory pressure, you might never need to use them.</p>
<p>There are two main reasons that limit the applicability of in-place operations:</p>
<ol class="arabic simple">
<li><p>In-place operations can potentially overwrite values required to compute
gradients.</p></li>
<li><p>Every in-place operation actually requires the implementation to rewrite the
computational graph. Out-of-place versions simply allocate new objects and
keep references to the old graph, while in-place operations, require
changing the creator of all inputs to the <code class="xref py py-class docutils literal notranslate"><span class="pre">Function</span></code> representing
this operation. This can be tricky, especially if there are many Tensors
that reference the same storage (e.g. created by indexing or transposing),
and in-place functions will actually raise an error if the storage of
modified inputs is referenced by any other <code class="xref py py-class docutils literal notranslate"><span class="pre">Tensor</span></code>.</p></li>
</ol>
</div>
<div class="section" id="in-place-correctness-checks">
<h2>In-place correctness checks<a class="headerlink" href="#in-place-correctness-checks" title="Permalink to this headline">¶</a></h2>
<p>Every tensor keeps a version counter, that is incremented every time it is
marked dirty in any operation. When a Function saves any tensors for backward,
a version counter of their containing Tensor is saved as well. Once you access
<code class="docutils literal notranslate"><span class="pre">self.saved_tensors</span></code> it is checked, and if it is greater than the saved value
an error is raised. This ensures that if you’re using in-place
functions and not seeing any errors, you can be sure that the computed
gradients are correct.</p>
</div>
<div class="section" id="multithreaded-autograd">
<h2>Multithreaded Autograd<a class="headerlink" href="#multithreaded-autograd" title="Permalink to this headline">¶</a></h2>
<p>The autograd engine is responsible for running all the backward operations
necessary to compute the backward pass. This section will describe all the details
that can help you make the best use of it in a multithreaded environment.(this is
relevant only for PyTorch 1.6+ as the behavior in previous version was different).</p>
<p>User could train their model with multithreading code (e.g. Hogwild training), and
does not block on the concurrent backward computations, example code could be:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="c1"># Define a train function to be used in different threads</span>
<span class="k">def</span> <span class="nf">train_fn</span><span class="p">():</span>
    <span class="n">x</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="n">requires_grad</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="c1"># forward</span>
    <span class="n">y</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span> <span class="o">+</span> <span class="mi">3</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">x</span> <span class="o">+</span> <span class="mi">4</span><span class="p">)</span> <span class="o">*</span> <span class="mf">0.5</span>
    <span class="c1"># backward</span>
    <span class="n">y</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
    <span class="c1"># potential optimizer update</span>


<span class="c1"># User write their own threading code to drive the train_fn</span>
<span class="n">threads</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
    <span class="n">p</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">train_fn</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">())</span>
    <span class="n">p</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>
    <span class="n">threads</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>

<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">threads</span><span class="p">:</span>
    <span class="n">p</span><span class="o">.</span><span class="n">join</span><span class="p">()</span>
</pre></div>
</div>
<p>Note that some behaviors that user should be aware of:</p>
<p>When you run <code class="docutils literal notranslate"><span class="pre">backward()</span></code> or <code class="docutils literal notranslate"><span class="pre">grad()</span></code> via python or C++ API in multiple
threads on CPU, you are expecting to see extra concurrency instead of
serializing all the backward calls in a specific order during execution
(behavior before PyTorch 1.6).</p>
<p>If you are calling <code class="docutils literal notranslate"><span class="pre">backward()</span></code> on multiple thread concurrently but with
shared inputs (i.e. Hogwild CPU training). Since parameters are automatically
shared across threads, gradient accumulation might become non-deterministic on
backward calls across threads, because two backward calls might access and try
to accumulate the same <code class="docutils literal notranslate"><span class="pre">.grad</span></code> attribute. This is technically not safe, and
it might result in racing condition and the result might be invalid to use.</p>
<p>But this is expected pattern if you are using the multithreading approach to
drive the whole training process but using shared parameters, user who use
multithreading should have the threading model in mind and should expect this
to happen. User could use the functional API <a class="reference internal" href="../autograd.html#torch.autograd.grad" title="torch.autograd.grad"><code class="xref py py-func docutils literal notranslate"><span class="pre">torch.autograd.grad()</span></code></a> to
calculate the gradients instead of <code class="docutils literal notranslate"><span class="pre">backward()</span></code> to avoid non-determinism.</p>
<p>If part of the autograd graph is shared between threads, i.e. run first
part of forward single thread, then run second part in multiple threads,
then the first part of graph is shared. In this case different threads
execute <code class="docutils literal notranslate"><span class="pre">grad()</span></code> or <code class="docutils literal notranslate"><span class="pre">backward()</span></code> on the same graph might have issue of
destroying the graph on the fly of one thread, and the other thread will
crash in this case. Autograd will error out to the user similar to what call
<code class="docutils literal notranslate"><span class="pre">backward()</span></code> twice with out <code class="docutils literal notranslate"><span class="pre">retain_graph=True</span></code>, and let the user know
they should use <code class="docutils literal notranslate"><span class="pre">retain_graph=True</span></code>.</p>
<p>Since Autograd allows the caller thread to drive its backward execution for
potential parallelism, it’s important that we ensure thread safety on CPU with
parallel backwards that share part/whole of the GraphTask.</p>
<p>Custom Python <code class="docutils literal notranslate"><span class="pre">autograd.function</span></code> is automatically thread safe because of GIL.
for built-in C++ Autograd Nodes(e.g. AccumulateGrad, CopySlices) and custom
<code class="docutils literal notranslate"><span class="pre">autograd::Function</span></code>, the Autograd Engine uses thread mutex locking to protect
thread safety on autograd Nodes that might have state write/read.</p>
<p>Autograd relies on the user to write thread safe C++ hooks. If you want the hook
to be correctly applied in multithreading environment, you will need to write
proper thread locking code to ensure the hooks are thread safe.</p>
</div>
</div>


             </article>
             
            </div>
            <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="broadcasting.html" class="btn btn-neutral float-right" title="Broadcasting semantics" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
      
      
        <a href="amp_examples.html" class="btn btn-neutral" title="Automatic Mixed Precision examples" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
      
    </div>
  

    <hr>

  
  <div role="contentinfo">
    <p>
        &copy; Copyright 2019, Torch Contributors.

    </p>
  </div>
    
      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>
     

</footer>

          </div>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">Autograd mechanics</a><ul>
<li><a class="reference internal" href="#excluding-subgraphs-from-backward">Excluding subgraphs from backward</a><ul>
<li><a class="reference internal" href="#requires-grad"><code class="docutils literal notranslate"><span class="pre">requires_grad</span></code></a></li>
</ul>
</li>
<li><a class="reference internal" href="#how-autograd-encodes-the-history">How autograd encodes the history</a></li>
<li><a class="reference internal" href="#in-place-operations-with-autograd">In-place operations with autograd</a></li>
<li><a class="reference internal" href="#in-place-correctness-checks">In-place correctness checks</a></li>
<li><a class="reference internal" href="#multithreaded-autograd">Multithreaded Autograd</a></li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>

  
       <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
         <script src="../_static/jquery.js"></script>
         <script src="../_static/underscore.js"></script>
         <script src="../_static/doctools.js"></script>
         <script src="../_static/language_data.js"></script>
     

  <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="../_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>
 
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-90545585-1', 'auto');
  ga('send', 'pageview');

</script>

<script async src="https://www.googletagmanager.com/gtag/js?id=UA-117752657-2"></script>

<script>
  window.dataLayer = window.dataLayer || [];

  function gtag(){dataLayer.push(arguments);}

  gtag('js', new Date());
  gtag('config', 'UA-117752657-2');
</script>

<img height="1" width="1" style="border-style:none;" alt="" src="https://www.googleadservices.com/pagead/conversion/795629140/?label=txkmCPmdtosBENSssfsC&amp;guid=ON&amp;script=0"/>


  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col follow-us-col">
          <ul>
            <li class="list-title">Stay Connected</li>
            <li>
              <div id="mc_embed_signup">
                <form
                  action="https://twitter.us14.list-manage.com/subscribe/post?u=75419c71fe0a935e53dfa4a3f&id=91d0dccd39"
                  method="post"
                  id="mc-embedded-subscribe-form"
                  name="mc-embedded-subscribe-form"
                  class="email-subscribe-form validate"
                  target="_blank"
                  novalidate>
                  <div id="mc_embed_signup_scroll" class="email-subscribe-form-fields-wrapper">
                    <div class="mc-field-group">
                      <label for="mce-EMAIL" style="display:none;">Email Address</label>
                      <input type="email" value="" name="EMAIL" class="required email" id="mce-EMAIL" placeholder="Email Address">
                    </div>

                    <div id="mce-responses" class="clear">
                      <div class="response" id="mce-error-response" style="display:none"></div>
                      <div class="response" id="mce-success-response" style="display:none"></div>
                    </div>    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->

                    <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_75419c71fe0a935e53dfa4a3f_91d0dccd39" tabindex="-1" value=""></div>

                    <div class="clear">
                      <input type="submit" value="" name="subscribe" id="mc-embedded-subscribe" class="button email-subscribe-button">
                    </div>
                  </div>
                </form>
              </div>

            </li>
          </ul>

          <div class="footer-social-icons">
            <a href="https://www.facebook.com/pytorch" target="_blank" class="facebook"></a>
            <a href="https://twitter.com/pytorch" target="_blank" class="twitter"></a>
            <a href="https://www.youtube.com/pytorch" target="_blank" class="youtube"></a>
          </div>
        </div>
      </div>
    </div>
  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="../_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <a href="https://pytorch.org/features">Features</a>
          </li>

          <li>
            <a href="https://pytorch.org/ecosystem">Ecosystem</a>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/hub">PyTorch Hub</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li class="active">
            <a href="https://pytorch.org/docs/stable/index.html">Docs</a>
          </li>

          <li>
            <a href="https://pytorch.org/resources">Resources</a>
          </li>

          <li>
            <a href="https://github.com/pytorch/pytorch">Github</a>
          </li>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="../_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>