Merge branch 'master' into merge_icode

weiqiruan · Sep 14, 2016 · c7762da · c7762da
2 parents 9a9de92 + 487dc67
commit c7762da
Show file tree

Hide file tree

Showing 20 changed files with 483 additions and 127 deletions.
diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst
@@ -245,10 +245,10 @@ addto_layer
     :members: addto_layer
     :noindex:
 
-convex_comb_layer
+linear_comb_layer
 -----------------
 ..  automodule:: paddle.trainer_config_helpers.layers
-    :members: convex_comb_layer
+    :members: linear_comb_layer
     :noindex:
 
 interpolation_layer
@@ -280,7 +280,13 @@ tensor_layer
 ..  automodule:: paddle.trainer_config_helpers.layers
     :members: tensor_layer
     :noindex:
-
+
+cos_sim
+-------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: cos_sim
+    :noindex:
+
 trans_layer
 ------------
 ..  automodule:: paddle.trainer_config_helpers.layers
@@ -341,12 +347,6 @@ rank_cost
     :members: rank_cost
     :noindex:
 
-cos_sim
--------
-..  automodule:: paddle.trainer_config_helpers.layers
-    :members: cos_sim
-    :noindex:
-
 crf_layer
 -----------------
 ..  automodule:: paddle.trainer_config_helpers.layers

diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc
@@ -150,7 +150,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
 
 
 // APIs available after R4:
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
 #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro)             \
   __macro(cudnnBatchNormalizationForwardTraining)            \
   __macro(cudnnBatchNormalizationForwardInference)           \
@@ -999,7 +999,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
                                     double epsilon,
                                     real *savedMean,
                                     real *savedVar) {
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
   if ((NULL != runningMean && NULL == runningInvVar) ||
       (NULL == runningMean && NULL != runningInvVar)) {
     LOG(FATAL) << "runningMean and runningInvVar can be NULL "
@@ -1024,7 +1024,7 @@ void hl_batch_norm_forward_training(hl_tensor_descriptor inputDesc,
 
   CHECK_SYNC("hl_batch_norm_forward_training failed");
 #else
-  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
+  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
              << "But cudnn lib version is " << g_cudnn_lib_version;
 #endif
 }
@@ -1039,7 +1039,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
                                     real *estimatedMean,
                                     real *estimatedInvVar,
                                     double epsilon) {
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
   cudnnTensorDescriptor_t xDesc = GET_TENSOR_DESCRIPTOR(inputDesc);
   cudnnTensorDescriptor_t yDesc = GET_TENSOR_DESCRIPTOR(outputDesc);
   cudnnTensorDescriptor_t bnDesc = GET_TENSOR_DESCRIPTOR(bnParamDesc);
@@ -1053,7 +1053,7 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
 
   CHECK_SYNC("hl_batch_norm_forward_inference failed");
 #else
-  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
+  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
              << "But cudnn lib version is " << g_cudnn_lib_version;
 #endif
 }
@@ -1071,7 +1071,7 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
                             double epsilon,
                             real *savedMean,
                             real *savedInvVar) {
-#if CUDNN_VERSION >= 4000
+#if CUDNN_VERSION >= 4007
   if ((NULL != savedMean && NULL == savedInvVar) ||
       (NULL == savedMean && NULL != savedInvVar)) {
     LOG(FATAL) << "savedMean and savedVar can be NULL "
@@ -1087,16 +1087,14 @@ void hl_batch_norm_backward(hl_tensor_descriptor inputDesc,
   cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
   CHECK_CUDNN(dynload::cudnnBatchNormalizationBackward(
               t_resource.cudnn_handle, mode, &alpha, &beta,
-#if CUDNN_VERSION >= 5000
               &alpha, &beta,
-#endif
               xDesc, input, dyDesc, outGrad, dxDesc, inGrad,
               bnDesc, scale, scaleGrad, biasGrad, epsilon,
               savedMean, savedInvVar));
 
   CHECK_SYNC("hl_batch_norm_backward failed");
 #else
-  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4000. "
+  LOG(FATAL) << "CudnnBatchNorm requires cudnn version >= 4007. "
              << "But cudnn lib version is " << g_cudnn_lib_version;
 #endif
 }
diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp
@@ -277,6 +277,7 @@ void NeuralNetwork::getState(MachineState& machineState) {
 }
 
 void NeuralNetwork::backward(const UpdateCallback& callback) {
+  gLayerStackTrace.pop("");  // tell layer trace is during backward.
   FOR_EACH_R(layer, layers_) {
     REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str());
     if ((*layer)->needGradient()) {

diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp
@@ -21,18 +21,20 @@ limitations under the License. */
 namespace paddle {
 
 /**
- * @brief A layer for convex weighted average of vectors,
+ * @brief A layer for weighted sum of vectors,
  * which is used in NEURAL MACHINE TRANSLATION BY JOINTLY LEARNING TO ALIGN AND
  * TRANSLATE
- * - Input: the first input contains the convex weights (batchSize x weightDim),
- *          and the shape of second input is (batchSize x (weightdim*dataDim)).
- * - Output: the shape of output is (batchSize x dataDim).
+ * - Input: the the size of the first input is weightDim,
+ *          and the size of the second input is weightdim * dataDim.
+ * - Output: the sizeof the output is dataDim
  * \f[
- *   out[i][j] = \sum_{j}(in0(i, j) * in1(i,j + i * dataDim)),
- *               i = 0,1,...,(batchSize-1); j = 0, 1,...,(dataDim-1)
+ *   out(j) = \sum_{i}(in0(i) * in1(i,j + i * dataDim)),
+ *               i = 0,1,...,(weightDim-1); j = 0, 1,...,(dataDim-1)
  * \f]
+ * Note that the above computation is for one sample. Multiple samples are
+ * processed in one batch.
  *
- * The config file api is convex_comb_layer.
+ * The config file api is linear_comb_layer.
  */
 class ConvexCombinationLayer : public Layer {
 protected:

diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp
@@ -48,7 +48,7 @@ void CosSimLayer::forward(PassType passType) {
     REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str());
     MatrixPtr prevOut1 = getInputValue(0);
     MatrixPtr prevOut2 = getInputValue(1);
-    outV->cosSim(*prevOut1, *prevOut2, kCosSimScale_);
+    outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale());
   }
 }
 
@@ -59,7 +59,7 @@ void CosSimLayer::backward(const UpdateCallback& callback) {
 
     outG->cosSimDerivative(*this->getOutputValue(), *getInputValue(0),
                            *getInputValue(1), *getInputGrad(0),
-                           *getInputGrad(1), kCosSimScale_);
+                           *getInputGrad(1), config_.cos_scale());
   }
 }
 

diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h
@@ -36,16 +36,14 @@ namespace paddle {
 class CosSimLayer : public Layer {
 public:
   explicit CosSimLayer(const LayerConfig& config)
-      : Layer(config), kCosSimScale_(5.0f) {}
+      : Layer(config) {}
 
   ~CosSimLayer() {}
 
   bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
 
   void forward(PassType passType);
   void backward(const UpdateCallback& callback = nullptr);
-
-  const real kCosSimScale_;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
@@ -115,29 +115,11 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
     create(tmpBiasGrad_, 1, channels_, &betaGrad);
   }
 
-  // because of the different api of cudnn v4 and v5.
-  if (hl_get_cudnn_lib_version() < 5000) {
-    if (weight_->getWGrad()) {
-      create(tmpWGrad_, 1, channels_, &gammaGrad);
-    }
-    if (biases_ && biases_->getWGrad()) {
-      create(tmpBiasGrad_, 1, channels_, &betaGrad);
-    }
-  }
-
   hl_batch_norm_backward(ioDesc_, input, ioDesc_, outGrad,
                          ioDesc_, inGrad, bnParamDesc_,
                          gamma, gammaGrad, betaGrad,
                          EPS, savedMean, savedInvVar);
 
-  // because of the different api of cudnn v4 and v5.
-  if (hl_get_cudnn_lib_version() < 5000) {
-    if (weight_->getWGrad() && biases_->getWGrad()) {
-      weight_->getWGrad()->add(*tmpWGrad_);
-      biases_->getWGrad()->add(*tmpBiasGrad_);
-    }
-  }
-
   {
     REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
     biases_->getParameterPtr()->incUpdate(callback);

diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp
@@ -14,9 +14,44 @@ limitations under the License. */
 
 
 #include "CustomStackTrace.h"
+#include "CommandLineParser.h"
+#include <iostream>
+
+P_DEFINE_bool(layer_stack_error_only_current_thread,
+    true,
+    "Dump current thread or whole process layer stack when signal error "
+    "occurred. true means only dump current thread layer stack");
 
 namespace paddle {
 
 CustomStackTrace<std::string> gLayerStackTrace;
 
+static std::mutex gLayerStackTraceMtx;
+void installLayerStackTracer() {
+  logging::installFailureWriter([](const char* data, int sz) {
+    std::lock_guard<std::mutex> guard(gLayerStackTraceMtx);
+    if (!gLayerStackTrace.empty()) {
+      size_t curTid = -1UL;
+      std::hash<std::thread::id> hasher;
+      gLayerStackTrace.dump([&curTid, &hasher](std::thread::id tid,
+                            bool* isForwarding,
+                            const std::string& layerName) {
+        if (curTid != hasher(tid)) {
+          if (curTid != -1UL) {
+            std::cerr << std::endl;
+          }
+          curTid = hasher(tid);
+          std::cerr << "Thread [" << tid << "] ";
+          if (isForwarding) {
+            std::cerr << (*isForwarding ? "Forwarding ": "Backwarding ");
+          }
+        }
+        std::cerr << layerName << ", ";
+      }, FLAGS_layer_stack_error_only_current_thread);
+      std::cerr << std::endl;
+    }
+    std::cerr.write(data, sz);
+  });
+}
+
 }  // namespace paddle