Merge branch 'master' of github.com:yuanming-hu/taichi_lang

firedtoad · Sep 30, 2019 · 5436b1f · 5436b1f
2 parents 68bdc69 + 37df384
commit 5436b1f
Show file tree

Hide file tree

Showing 10 changed files with 107 additions and 79 deletions.
diff --git a/taichi_lang/README.md b/taichi_lang/README.md
@@ -1,24 +1,35 @@
-# The DiffSim Programming Language
-### High-Performance Differentiable Physical Simulation
+# The **Taichi** Programming Language
+### High-Performance Computing on Spatially Sparse Data Structures
+
+# [Python Frontend Tutorial](https://github.com/yuanming-hu/taichi_lang/blob/master/python/README.md#the-taichi-python-frontend)
 
 # Installation
 Supports Ubuntu 14.04/16.04/18.04, ArchLinux, Mac OS X. For GPU support, CUDA 9.0+ is needed.
 
  - Execute `python3 -m pip install astpretty astor pytest opencv-python pybind11==2.2.4`
- - Install `taichi` with the [installation script](https://taichi.readthedocs.io/en/latest/installation.html#ubuntu-arch-linux-and-mac-os-x). **Checkout branch `llvm-debug`**.
- - Put this repo under `taichi/projects/`
+ - Install `taichi` with the [installation script](https://taichi.readthedocs.io/en/latest/installation.html#ubuntu-arch-linux-and-mac-os-x). **Checkout branch `llvm`**.
+ - (Optional) If you use the experimental LLVM backend, make sure you have LLVM 8 built from scratch, with
+  ```
+  mkdir build
+  cd build
+  cmake .. -DLLVM_ENABLE_RTTI:BOOL=ON -DBUILD_SHARED_LIBS:BOOL=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" -DLLVM_ENABLE_ASSERTIONS=ON
+  make -j 8
+  sudo make install
+  ```
+ - Execute `ti install https://github.com/yuanming-hu/taichi_lang` to install the DSL project
  - Add the following line to your `~/.bashrc` or `~/.zshrc` for the python frontend.
  ```bash
  export PYTHONPATH=$TAICHI_REPO_DIR/projects/taichi_lang/python:$PYTHONPATH
  ```
  - Execute `source ~/.bashrc` (or `source ~/.zshrc`) to reload shell config.
- - Execute `ti build` to build.
  - Execute `ti test` to run all the tests. It may take a around 20 minutes to run all tests.
- - Check out `examples` for runnable DiffSim examples. Run them with `python3`.
+ - Check out `examples` for runnable examples. Run them with `python3`.
 
 # Folder Structure
 Key folders are
- - *examples* : example programs written in DiffSim
+ - *examples* : example programs written in Taichi
+   - *cpp*: benchmarking examples in the SIGGRAPH Asia paper (mpm_benchmark.cpp, smoke_renderer.cpp, cnn.cpp)
+   - *fem*: the FEM benchmark
  - *include*: language runtime
  - *src*: the compiler implementation (The functionality is briefly documented in each file)
    - *analysis*: static analysis passes
@@ -32,3 +43,15 @@ Key folders are
 # Troubleshooting
  - Run with debug mode to see if there's any illegal memory access;
  - Disable compiler optimizations to quickly confirm that the issue is not cause by optimization;
+
+# Bibtex
+```
+@inproceedings{hu2019taichi,
+  title={Taichi: A Language for High-Performance Computation on Spatially Sparse Data Structures},
+  author={Hu, Yuanming and Li, Tzu-Mao and Anderson, Luke and Ragan-Kelley, Jonathan and Durand, Fr\'edo},
+  booktitle={SIGGRAPH Asia 2019 Technical Papers},
+  pages={201},
+  year={2019},
+  organization={ACM}
+}
+```
diff --git a/taichi_lang/examples/diffmpm_benchmark.py b/taichi_lang/examples/diffmpm_benchmark.py
@@ -7,6 +7,7 @@
 
 real = ti.f32
 ti.set_default_fp(real)
+ti.cfg.enable_profiler = False
 
 dim = 2
 n_particles = 6400
@@ -211,6 +212,38 @@ def backward():
   set_v.grad()
   return init_v.grad[None]
 
+def benchmark():
+  print('Also check "nvprof --print-gpu-trace python3 diffmpm_benchmark.py" for more accurate results')
+  iters = 100000
+  for i in range(1):
+    p2g(0)
+    grid_op()
+    g2p(0)
+  t = time.time()
+  ti.runtime.sync()
+  for i in range(iters):
+    # clear_grid()
+    p2g(0)
+    grid_op()
+    g2p(0)
+  ti.runtime.sync()
+  print('forward ', (time.time() - t) / iters * 1000 * 3, 'ms')
+  ti.profiler_print()
+
+  for i in range(1):
+    p2g.grad(0)
+    grid_op.grad()
+    g2p.grad(0)
+  t = time.time()
+  ti.runtime.sync()
+  for i in range(iters):
+    # clear_grid()
+    g2p.grad(0)
+    grid_op.grad()
+    p2g.grad(0)
+  ti.runtime.sync()
+  print('backward ', (time.time() - t) / iters * 1000 * 3, 'ms')
+  ti.profiler_print()
 
 def main():
   # initialization
@@ -225,13 +258,8 @@ def main():
 
 
   set_v()
-  for i in range(1024):
-    # clear_grid()
-    p2g(0)
-    grid_op()
-    g2p(0)
-  ti.profiler_print()
-
+  benchmark()
+
   losses = []
   img_count = 0
   for i in range(30):

diff --git a/taichi_lang/examples/liquid.py b/taichi_lang/examples/liquid.py
@@ -438,60 +438,25 @@ def main():
   ax = fig.add_subplot(111, projection='3d')
 
   losses = []
-  for iter in range(100):
+  for iter in range(501):
     ti.clear_all_gradients()
     l = forward()
     losses.append(l)
     loss.grad[None] = 1
     backward()
     print('i=', iter, 'loss=', l)
-    learning_rate = 30
+    learning_rate = 10
 
     for i in range(n_actuators):
       for j in range(n_sin_waves):
         # print(weights.grad[i, j])
         weights[i, j] -= learning_rate * weights.grad[i, j]
       bias[i] -= learning_rate * bias.grad[i]
 
-    if iter % 20 == 0 and iter > 0:
+    if iter % 50 == 0:
       # visualize
-      forward()
+      print("Dumping particles...")
       for s in range(7, steps, 2):
-        '''
-        print(s)
-        img = np.zeros((res[1] * res[0] * 3,), dtype=np.float32)
-        splat(s)
-        copy_back_and_clear(img)
-        img = img.reshape(res[1], res[0], 3)
-        img = np.sqrt(img)
-        cv2.imshow('img', img)
-        cv2.waitKey(1)
-        '''
-
-        '''
-        xs, ys, zs = [], [], []
-        aas, bs, cs = [], [], []
-        for i in range(n_particles):
-          if particle_type[i] == 0:
-            xs.append(x[s, i][0])
-            ys.append(x[s, i][2])
-            zs.append(x[s, i][1])
-          else:
-            aas.append(x[s, i][0])
-            bs.append(x[s, i][2])
-            cs.append(x[s, i][1])
-
-        ax.scatter(aas, bs, cs, marker='o')
-        ax.scatter(xs, ys, zs, marker='o')
-        ax.set_xlim(0, 1)
-        ax.set_ylim(0, 1)
-        ax.set_zlim(0, 1)
-        plt.draw()
-        plt.pause(0.001)
-        plt.cla()
-        '''
-
-
         def to255(x):
           return int(max(min(x * 255, 255), 0))
         xs, ys, zs = [], [], []
@@ -527,15 +492,7 @@ def to255(x):
           cs.append(color)
         data = np.array(xs + ys + zs + us + vs + ws + cs, dtype=np.float32)
         data.tofile(open('{}/{:04}.bin'.format(folder, s), 'wb'))
-
-
-
-  # ti.profiler_print()
-  plt.title("Optimization of Initial Velocity")
-  plt.ylabel("Loss")
-  plt.xlabel("Gradient Descent Iterations")
-  plt.plot(losses)
-  plt.show()
+      print("Particles dumped")
 
 
 if __name__ == '__main__':

diff --git a/taichi_lang/examples/render_diffmpm3d.py b/taichi_lang/examples/render_diffmpm3d.py
@@ -1,4 +1,4 @@
 import os
 
 for i in range(7, 512, 2):
-  os.system('python3 diffmpm_renderer.py snow 0080 {}'.format(i))
+  os.system('python3 diffmpm_renderer.py snow 0000 {}'.format(i))
diff --git a/taichi_lang/python/README.md b/taichi_lang/python/README.md
@@ -1,6 +1,6 @@
 # The Taichi Python Frontend
 
-Make sure you also check out the DiffSym paper (section "Language design" and "Appendix A") to learn more about the language.
+Make sure you also check out the DiffTaichi paper (section "Language design" and "Appendix A") to learn more about the language.
 
 ## Global Tensors
  - Every global variable is an N-dimensional tensor. Global scalars are treated as 0-D tensors.

diff --git a/taichi_lang/python/taichi_lang/impl.py b/taichi_lang/python/taichi_lang/impl.py
@@ -130,6 +130,9 @@ def clear(self):
   def get_tape(self, loss=None):
     from .tape import Tape
     return Tape(self, loss)
+
+  def sync(self):
+    self.prog.synchronize()
 
 
 pytaichi = PyTaichi()

diff --git a/taichi_lang/src/backends/gpu.cpp b/taichi_lang/src/backends/gpu.cpp
@@ -16,12 +16,14 @@ class GPUIRCodeGen : public IRVisitor {
   bool debug;
   int grid_dim;
   int for_stmt_counter;
+  CompileConfig cfg;
   std::set<SNode *> ldg;
 
   GPUIRCodeGen(GPUCodeGen *codegen) : codegen(codegen), loopgen(codegen) {
     current_struct_for = nullptr;
     current_scratch_pads = nullptr;
-    debug = codegen->prog->config.debug;
+    cfg = codegen->prog->config;
+    debug = cfg.debug;
     grid_dim = loopgen.grid_dim;
     for_stmt_counter = 0;
   }
@@ -229,14 +231,16 @@ class GPUIRCodeGen : public IRVisitor {
       emit("");
 
       // generate the list
-      emit(R"(GPUProfiler::get_instance().start("{}_list_gen");)",
-           codegen->func_name);
+      if (cfg.enable_profiler)
+        emit(R"(GPUProfiler::get_instance().start("{}_list_gen");)",
+             codegen->func_name);
 
       std::reverse(path.begin(), path.end());
       for (auto &s : path) {
         emit("{}(context);", loopgen.listgen_func_name(s, listgen_suffix));
       }
-      emit(R"(GPUProfiler::get_instance().stop();)");
+      if (cfg.enable_profiler)
+        emit(R"(GPUProfiler::get_instance().stop();)");
 
       emit("");
 
@@ -260,10 +264,13 @@ class GPUIRCodeGen : public IRVisitor {
       }
       emit("");
       emit("reset_execution_tail<{}><<<1, 1>>>();", leaf->node_type_name);
-      emit(R"(GPUProfiler::get_instance().start("{}");)", current_func_name());
+      if (cfg.enable_profiler)
+        emit(R"(GPUProfiler::get_instance().start("{}");)",
+             current_func_name());
       emit("{}_kernel<<<{}, blockDim>>>(context);", current_func_name(),
            grid_dim);
-      emit(R"(GPUProfiler::get_instance().stop();)");
+      if (cfg.enable_profiler)
+        emit(R"(GPUProfiler::get_instance().stop();)");
       emit("");
       if (debug) {
         emit("cudaEventRecord(stop);");
@@ -345,10 +352,12 @@ class GPUIRCodeGen : public IRVisitor {
       }
       emit("gpu_runtime_init();");
       int num_blocks = (end - begin + block_size - 1) / block_size;
-      emit(R"(GPUProfiler::get_instance().start("{}");)", codegen->func_name);
+      if (cfg.enable_profiler)
+        emit(R"(GPUProfiler::get_instance().start("{}");)", codegen->func_name);
       emit("{}_kernel<<<{}, {}>>>(context);", current_func_name(), num_blocks,
            block_size);
-      emit(R"(GPUProfiler::get_instance().stop();)");
+      if (cfg.enable_profiler)
+        emit(R"(GPUProfiler::get_instance().stop();)");
       emit("}}");
     } else {
       auto for_stmt = for_stmt_->as<StructForStmt>();
@@ -918,22 +927,26 @@ class GPUIRCodeGen : public IRVisitor {
     emit("");
 
     // generate the list
-    emit(R"(GPUProfiler::get_instance().start("{}_list_gen");)",
-         codegen->func_name);
+    if (cfg.enable_profiler)
+      emit(R"(GPUProfiler::get_instance().start("{}_list_gen");)",
+           codegen->func_name);
 
     std::reverse(path.begin(), path.end());
     for (auto &s : path) {
       emit("{}(context);", loopgen.listgen_func_name(s));
     }
-    emit(R"(GPUProfiler::get_instance().stop();)");
+    if (cfg.enable_profiler)
+      emit(R"(GPUProfiler::get_instance().stop();)");
 
     emit("");
 
     emit("reset_execution_tail<{}><<<1, 1>>>();", leaf->node_type_name);
-    emit(R"(GPUProfiler::get_instance().start("clear_{}");)",
-         snode->node_type_name);
+    if (cfg.enable_profiler)
+      emit(R"(GPUProfiler::get_instance().start("clear_{}");)",
+           snode->node_type_name);
     emit("{}_kernel<<<{}, blockDim>>>(context);", codegen->func_name, grid_dim);
-    emit(R"(GPUProfiler::get_instance().stop();)");
+    if (cfg.enable_profiler)
+      emit(R"(GPUProfiler::get_instance().stop();)");
 
     emit("");
     emit("}}");

diff --git a/taichi_lang/src/python_bindings.cpp b/taichi_lang/src/python_bindings.cpp
@@ -77,6 +77,7 @@ PYBIND11_MODULE(taichi_lang_core, m) {
       .def_readwrite("simplify_after_lower_access",
                      &CompileConfig::simplify_after_lower_access)
       .def_readwrite("lower_access", &CompileConfig::lower_access)
+      .def_readwrite("enable_profiler", &CompileConfig::enable_profiler)
       .def_readwrite("gradient_dt", &CompileConfig::gradient_dt);
 
   m.def("default_compile_config",
@@ -87,7 +88,8 @@ PYBIND11_MODULE(taichi_lang_core, m) {
       .def(py::init<>())
       .def("clear_all_gradients", &Program::clear_all_gradients)
       .def("profiler_print", &Program::profiler_print)
-      .def("profiler_print", &Program::profiler_clear);
+      .def("profiler_print", &Program::profiler_clear)
+      .def("synchronize", &Program::synchronize);
 
   m.def("get_current_program", get_current_program,
         py::return_value_policy::reference);

diff --git a/taichi_lang/src/util.cpp b/taichi_lang/src/util.cpp
@@ -386,6 +386,7 @@ CompileConfig::CompileConfig() {
   simplify_after_lower_access = true;
   attempt_vectorized_load_cpu = true;
   gradient_dt = DataType::f32;
+  enable_profiler = true;
 }
 
 std::string CompileConfig::compiler_name() {

diff --git a/taichi_lang/src/util.h b/taichi_lang/src/util.h
@@ -346,6 +346,7 @@ struct CompileConfig {
   bool use_llvm;
   bool print_struct_llvm_ir;
   bool print_kernel_llvm_ir;
+  bool enable_profiler;
   DataType gradient_dt;
   std::string extra_flags;