first commit

yaolubrain · Sep 10, 2019 · db4cf84 · db4cf84
commit db4cf84
Show file tree

Hide file tree

Showing 90 changed files with 2,511 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,17 @@
+# C++/CUDA Extensions in PyTorch
+
+An example of writing a C++ extension for PyTorch. See
+[here](http://pytorch.org/tutorials/advanced/cpp_extension.html) for the accompanying tutorial.
+
+There are a few "sights" you can metaphorically visit in this repository:
+
+- Inspect the C++ and CUDA extensions in the `cpp/` and `cuda/` folders,
+- Build C++ and/or CUDA extensions by going into the `cpp/` or `cuda/` folder and executing `python setup.py install`,
+- JIT-compile C++ and/or CUDA extensions by going into the `cpp/` or `cuda/` folder and calling `python jit.py`, which will JIT-compile the extension and load it,
+- Benchmark Python vs. C++ vs. CUDA by running `python benchmark.py {py, cpp, cuda} [--cuda]`,
+- Run gradient checks on the code by running `python grad_check.py {py, cpp, cuda} [--cuda]`.
+- Run output checks on the code by running `python check.py {forward, backward} [--cuda]`.
+
+## Authors
+
+[Peter Goldsborough](https://github.com/goldsborough)
diff --git a/__pycache__/pred_layer.cpython-37.pyc b/__pycache__/pred_layer.cpython-37.pyc
diff --git a/cpp/.blsolver.py.swn b/cpp/.blsolver.py.swn
diff --git a/cpp/.blsolver.py.swo b/cpp/.blsolver.py.swo
diff --git a/cpp/.blsolver.py.swp b/cpp/.blsolver.py.swp
diff --git a/cpp/.conv4d.cpp.swp b/cpp/.conv4d.cpp.swp
diff --git a/cpp/.conv4d.py.swo b/cpp/.conv4d.py.swo
diff --git a/cpp/.conv4d.py.swp b/cpp/.conv4d.py.swp
diff --git a/cpp/.cost_volume.cpp.swp b/cpp/.cost_volume.cpp.swp
diff --git a/cpp/.grad_check.py.swp b/cpp/.grad_check.py.swp
diff --git a/cpp/.im2col.cpp.swp b/cpp/.im2col.cpp.swp
diff --git a/cpp/.test.py.swo b/cpp/.test.py.swo
diff --git a/cpp/.test.py.swp b/cpp/.test.py.swp
diff --git a/cpp/.vscode/settings.json b/cpp/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "files.associations": {
+        "array": "cpp"
+    }
+}
diff --git a/cpp/__init__.py b/cpp/__init__.py
diff --git a/cpp/__pycache__/__init__.cpython-36.pyc b/cpp/__pycache__/__init__.cpython-36.pyc
diff --git a/cpp/__pycache__/__init__.cpython-37.pyc b/cpp/__pycache__/__init__.cpython-37.pyc
diff --git a/cpp/__pycache__/blsolver.cpython-36.pyc b/cpp/__pycache__/blsolver.cpython-36.pyc
diff --git a/cpp/__pycache__/blsolver.cpython-37.pyc b/cpp/__pycache__/blsolver.cpython-37.pyc
diff --git a/cpp/__pycache__/conv4d.cpython-37.pyc b/cpp/__pycache__/conv4d.cpython-37.pyc
diff --git a/cpp/__pycache__/gradcheck.cpython-37.pyc b/cpp/__pycache__/gradcheck.cpython-37.pyc
diff --git a/cpp/__pycache__/im2col.cpython-36.pyc b/cpp/__pycache__/im2col.cpython-36.pyc
diff --git a/cpp/__pycache__/lltm.cpython-36.pyc b/cpp/__pycache__/lltm.cpython-36.pyc
diff --git a/cpp/__pycache__/sgmflow.cpython-36.pyc b/cpp/__pycache__/sgmflow.cpython-36.pyc
diff --git a/cpp/__pycache__/sgmflow.cpython-37.pyc b/cpp/__pycache__/sgmflow.cpython-37.pyc
diff --git a/cpp/build/lib.linux-x86_64-3.7/conv4d_cpp.cpython-37m-x86_64-linux-gnu.so b/cpp/build/lib.linux-x86_64-3.7/conv4d_cpp.cpython-37m-x86_64-linux-gnu.so
diff --git a/cpp/build/lib.linux-x86_64-3.7/sgmflow_cpp.cpython-37m-x86_64-linux-gnu.so b/cpp/build/lib.linux-x86_64-3.7/sgmflow_cpp.cpython-37m-x86_64-linux-gnu.so
diff --git a/cpp/build/temp.linux-x86_64-3.7/conv4d.o b/cpp/build/temp.linux-x86_64-3.7/conv4d.o
diff --git a/cpp/build/temp.linux-x86_64-3.7/cost_volume.o b/cpp/build/temp.linux-x86_64-3.7/cost_volume.o
diff --git a/cpp/build/temp.linux-x86_64-3.7/prop_hori_neg.o b/cpp/build/temp.linux-x86_64-3.7/prop_hori_neg.o
diff --git a/cpp/build/temp.linux-x86_64-3.7/prop_hori_pos.o b/cpp/build/temp.linux-x86_64-3.7/prop_hori_pos.o
diff --git a/cpp/build/temp.linux-x86_64-3.7/prop_vert_neg.o b/cpp/build/temp.linux-x86_64-3.7/prop_vert_neg.o
diff --git a/cpp/build/temp.linux-x86_64-3.7/prop_vert_pos.o b/cpp/build/temp.linux-x86_64-3.7/prop_vert_pos.o
diff --git a/cpp/build/temp.linux-x86_64-3.7/sgmflow.o b/cpp/build/temp.linux-x86_64-3.7/sgmflow.o
diff --git a/cpp/common.h b/cpp/common.h
@@ -0,0 +1,40 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+using namespace std;
+
+template <typename scalar_t>
+scalar_t sigmoid(scalar_t x) {
+  return 0.5 * x / (1 + abs(x)) + 0.5;
+}
+
+template <typename scalar_t>
+scalar_t softplus(scalar_t x) {
+  if (x > 0) {
+    scalar_t exp_minus_x = exp(-x);
+    return -log( exp_minus_x / (1 + exp_minus_x) );
+  } else {
+    return log( 1 + exp(x) );
+  }
+}
+
+template <typename scalar_t> 
+scalar_t sign(scalar_t val) {
+    return (scalar_t(0) < val) - (val < scalar_t(0));
+}
+
+inline vector<int> label2hw(int l, int max_offset_h, int max_offset_w) {
+  vector<int> hw(2);
+  hw[0] = l / (2*max_offset_w+1) - max_offset_h;
+  hw[1] = l % (2*max_offset_w+1) - max_offset_w;
+  return hw;
+}
+
+inline int hw2label(int h, int w, int max_offset_h, int max_offset_w) {
+  int l = (h+max_offset_h)*(2*max_offset_w+1) + w+max_offset_w;
+  return l;
+}
+
+
+
+#endif
diff --git a/cpp/conv4d.cpp b/cpp/conv4d.cpp
@@ -0,0 +1,227 @@
+#include <limits>
+#include <torch/extension.h>
+#include <torch/csrc/autograd/variable.h>
+#include "common.h"
+#include "cost_volume.h"
+
+
+at::Tensor conv4d_forward(
+    at::Tensor inputs,
+    at::Tensor weight,
+    int input_channels,
+    int output_channels,
+    int ksize,
+    int stride,
+    int padding) {
+
+  int B_in = inputs.size(0);
+  int C_in = inputs.size(1);
+  int U_in = inputs.size(2);
+  int V_in = inputs.size(3);
+  int H_in = inputs.size(4);
+  int W_in = inputs.size(5);
+
+  int B_out = B_in;
+  int C_out = output_channels;
+  int U_out = (U_in - ksize / 2) / stride + padding;
+  int V_out = (V_in - ksize / 2) / stride + padding;
+  int H_out = (H_in - ksize / 2) / stride + padding;
+  int W_out = (W_in - ksize / 2) / stride + padding;
+
+  at::Tensor outputs = torch::zeros({B_out, C_out, U_out, V_out, H_out, W_out}, inputs.type());
+
+  AT_DISPATCH_FLOATING_TYPES(inputs.type(), "forward", ([&] {
+
+    auto inputs_data = inputs.data<scalar_t>();
+    auto outputs_data = outputs.data<scalar_t>();
+    auto weight_data = weight.data<scalar_t>();
+
+    for (int i = 0; i < B_out; ++i) {
+
+      auto input = inputs_data + i * C_in*U_in*V_in*H_in*W_in;
+      auto output = outputs_data + i * C_out*U_out*V_out*H_out*W_out;
+
+      for (int u = 0; u < U_out; ++u) {
+        for (int v = 0; v < V_out; ++v) {
+          for (int h = 0; h < H_out; ++h) {
+            for (int w = 0; w < W_out; ++w) {
+
+              for (int du = -ksize/2; du <= ksize/2; ++du) {
+                for (int dv = -ksize/2; dv <= ksize/2; ++dv) {
+                  for (int dh = -ksize/2; dh <= ksize/2; ++dh) {
+                    for (int dw = -ksize/2; dw <= ksize/2; ++dw) {
+
+                      int u1 = u + du;
+                      int v1 = v + dv;
+                      int h1 = h + dh;
+                      int w1 = w + dw;
+
+                      if (u1 < 0 || u1 >= U_in) {
+                        continue;
+                      }
+                      if (v1 < 0 || v1 >= V_in) {
+                        continue;
+                      }
+                      if (h1 < 0 || h1 >= H_in) {
+                        continue;
+                      }
+                      if (w1 < 0 || w1 >= W_in) {
+                        continue;
+                      }
+
+                      for (int c_out = 0; c_out < C_out; c_out++) {
+                        for (int c_in = 0; c_in < C_in; c_in++) {
+
+                          int out_idx = c_out*U_out*V_out*H_out*W_out
+                                      + u*V_out*H_out*W_out
+                                      + v*H_out*W_out
+                                      + h*W_out + w;
+
+                          int in_idx = c_in*U_out*V_out*H_out*W_out
+                                     + u1*V_out*H_out*W_out
+                                     + v1*H_out*W_out
+                                     + h1*W_out + w1;
+
+                          int weight_idx = c_out*C_in*ksize*ksize*ksize*ksize
+                                         + c_in*ksize*ksize*ksize*ksize
+                                         + (du+ksize/2)*ksize*ksize*ksize
+                                         + (dv+ksize/2)*ksize*ksize
+                                         + (dh+ksize/2)*ksize
+                                         + (dw+ksize/2);
+
+                          output[out_idx] += input[in_idx] * weight_data[weight_idx];
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+  }));
+
+  return outputs;
+}
+
+
+vector<at::Tensor> conv4d_backward(
+    at::Tensor grad_outputs,
+    at::Tensor inputs,
+    at::Tensor weight, 
+    int input_channels, 
+    int output_channels, 
+    int ksize, 
+    int stride, 
+    int padding) {
+
+  int B_out = grad_outputs.size(0);
+  int C_out = grad_outputs.size(1);
+  int U_out = grad_outputs.size(2);
+  int V_out = grad_outputs.size(3);
+  int H_out = grad_outputs.size(4);
+  int W_out = grad_outputs.size(5);
+
+  int B_in = B_out;
+  int C_in = input_channels;
+  int U_in = (U_out + ksize / 2) * stride - padding;
+  int V_in = (V_out + ksize / 2) * stride - padding;
+  int H_in = (H_out + ksize / 2) * stride - padding;
+  int W_in = (W_out + ksize / 2) * stride - padding;
+
+  at::Tensor grad_inputs = torch::zeros({B_in, C_in, U_in, V_in, H_in, W_in}, grad_outputs.type());
+  at::Tensor grad_weight = torch::zeros_like(weight);
+
+  AT_DISPATCH_FLOATING_TYPES(grad_outputs.type(), "backward", ([&] {
+
+    auto grad_inputs_data = grad_inputs.data<scalar_t>();
+    auto grad_outputs_data = grad_outputs.data<scalar_t>();
+    auto grad_weight_data = grad_weight.data<scalar_t>();
+    auto inputs_data = inputs.data<scalar_t>();
+    auto weight_data = weight.data<scalar_t>();
+
+    for (int i = 0; i < B_out; ++i) {
+
+      auto grad_input = grad_inputs_data + i * C_in*U_in*V_in*H_in*W_in;
+      auto grad_output = grad_outputs_data + i * C_out*U_out*V_out*H_out*W_out;
+      auto input = inputs_data + i * C_in*U_in*V_in*H_in*W_in;
+
+      for (int u = 0; u < U_out; ++u) {
+        for (int v = 0; v < V_out; ++v) {
+          for (int h = 0; h < H_out; ++h) {
+            for (int w = 0; w < W_out; ++w) {
+
+              for (int du = -ksize/2; du <= ksize/2; ++du) {
+                for (int dv = -ksize/2; dv <= ksize/2; ++dv) {
+                  for (int dh = -ksize/2; dh <= ksize/2; ++dh) {
+                    for (int dw = -ksize/2; dw <= ksize/2; ++dw) {
+
+                      int u1 = u + du;
+                      int v1 = v + dv;
+                      int h1 = h + dh;
+                      int w1 = w + dw;
+
+                      if (u1 < 0 || u1 >= U_in) {
+                        continue;
+                      }
+                      if (v1 < 0 || v1 >= V_in) {
+                        continue;
+                      }
+                      if (h1 < 0 || h1 >= H_in) {
+                        continue;
+                      }
+                      if (w1 < 0 || w1 >= W_in) {
+                        continue;
+                      }
+
+                      for (int c_out = 0; c_out < C_out; ++c_out) {
+                        for (int c_in = 0; c_in < C_in; ++c_in) {
+
+                          int out_idx = c_out*U_out*V_out*H_out*W_out
+                                      + u*V_out*H_out*W_out
+                                      + v*H_out*W_out
+                                      + h*W_out + w;
+
+                          int in_idx = c_in*U_out*V_out*H_out*W_out
+                                     + u1*V_out*H_out*W_out
+                                     + v1*H_out*W_out
+                                     + h1*W_out + w1;
+
+                          int weight_idx = c_out*C_in*ksize*ksize*ksize*ksize
+                                         + c_in*ksize*ksize*ksize*ksize
+                                         + (du+ksize/2)*ksize*ksize*ksize
+                                         + (dv+ksize/2)*ksize*ksize
+                                         + (dh+ksize/2)*ksize
+                                         + (dw+ksize/2);
+
+                          grad_input[in_idx] += grad_output[out_idx] * weight_data[weight_idx];
+                          grad_weight_data[weight_idx] += input[in_idx] * grad_output[out_idx];
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+  }));
+
+
+  return {grad_inputs, grad_weight};
+}
+
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("compute_cost_volume_forward", &compute_cost_volume_forward, "match forward");
+  m.def("compute_cost_volume_backward", &compute_cost_volume_backward, "match backward");
+  m.def("conv4d_forward", &conv4d_forward, "conv4d forward");
+  m.def("conv4d_backward", &conv4d_backward, "conv4d backward");
+}
+
diff --git a/cpp/conv4d.cpp.egg-info/PKG-INFO b/cpp/conv4d.cpp.egg-info/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: conv4d.cpp
+Version: 0.0.0
+Summary: UNKNOWN
+Home-page: UNKNOWN
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/cpp/conv4d.cpp.egg-info/SOURCES.txt b/cpp/conv4d.cpp.egg-info/SOURCES.txt
@@ -0,0 +1,7 @@
+conv4d.cpp
+cost_volume.cpp
+setup.py
+conv4d.cpp.egg-info/PKG-INFO
+conv4d.cpp.egg-info/SOURCES.txt
+conv4d.cpp.egg-info/dependency_links.txt
+conv4d.cpp.egg-info/top_level.txt
diff --git a/cpp/conv4d.cpp.egg-info/dependency_links.txt b/cpp/conv4d.cpp.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/cpp/conv4d.cpp.egg-info/top_level.txt b/cpp/conv4d.cpp.egg-info/top_level.txt
@@ -0,0 +1 @@
+conv4d_cpp