Skip to content

Commit

Permalink
Bert inference perf improve (intel#1555)
Browse files Browse the repository at this point in the history
Split big Dense into several small dense in TransformerLayer
Ensure bert is using MKL to do math operations.
  • Loading branch information
dding3 authored Aug 10, 2019
1 parent e63b6ba commit 09c0cc0
Show file tree
Hide file tree
Showing 7 changed files with 350 additions and 181 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright 2018 Analytics Zoo Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.intel.analytics.zoo.common

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.reflect.ClassTag

object TensorOperation {

def expandSize[T: ClassTag](tensor: Tensor[T], other: Tensor[T]): Array[Int] = {
val errorMsg = s"tensor size not match ${tensor.size.mkString("x")} " +
s"${other.size.mkString("x")}"
val longTensor = if (tensor.dim() > other.dim()) tensor else other
val shortTensor = if (tensor.dim() > other.dim()) other else tensor
val ndim = longTensor.nDimension()
val delta = longTensor.nDimension() - shortTensor.nDimension()
val size = new Array[Int](ndim)
var i = ndim - 1
while (i >= delta) {
require(longTensor.size(i + 1) == shortTensor.size(i + 1 - delta) ||
longTensor.size(i + 1) == 1 ||
shortTensor.size(i + 1 - delta) == 1, errorMsg)
size(i) = math.max(longTensor.size(i + 1), shortTensor.size(i + 1 - delta))
i -= 1
}

while (i >= 0) {
size(i) = longTensor.size(i + 1)
i -= 1
}

size
}

def expandTensor[T: ClassTag](tensor: Tensor[T], tensor2: Tensor[T])
(implicit ev: TensorNumeric[T]): Tensor[T] = {
val targetSize = expandSize(tensor, tensor2)
val expandStrides = new Array[Int](targetSize.length)

val expandStridesX = new Array[Int](targetSize.length)
var i = targetSize.length - 1
val delta2 = targetSize.length - tensor2.nDimension
while(i >= delta2) {
if (tensor2.size(i + 1- delta2) != 1) expandStridesX(i) = tensor2.stride(i + 1- delta2)
i -= 1
}
val expandX = Tensor[T](
tensor2.storage(),
tensor2.storageOffset(),
targetSize,
expandStridesX
)
if (targetSize.product != tensor.nElement()) {
i = targetSize.length - 1
val delta1 = targetSize.length - tensor.nDimension
while (i >= delta1) {
if (tensor.size(i + 1 - delta1) != 1) expandStrides(i) = tensor.stride(i + 1 - delta1)
i -= 1
}
val tensor1 = Tensor[T](
tensor.storage,
tensor.storageOffset(),
targetSize,
expandStrides
)
val newTensor = Tensor[T]().resize(targetSize).add(tensor1)
tensor.set(newTensor)
}
expandX
}

def subTensor[T: ClassTag](tensor: Tensor[T], tensor2: Tensor[T])
(implicit ev: TensorNumeric[T]): Tensor[T] = {
val expandedTensor = expandTensor(tensor, tensor2).contiguous()
tensor.sub(expandedTensor)
tensor
}

def divTensor[T: ClassTag](tensor: Tensor[T], tensor2: Tensor[T])
(implicit ev: TensorNumeric[T]): Tensor[T] = {
val expandedTensor = expandTensor(tensor, tensor2).contiguous()
tensor.div(expandedTensor)
tensor
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,3 @@ object Dense {
wRegularizer, bRegularizer, bias, inputShape)
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@

package com.intel.analytics.zoo.pipeline.api.keras.layers.internal

import com.intel.analytics.bigdl.nn.{Mean, Sum}
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, TensorModule}
import com.intel.analytics.bigdl.nn.abstractnn.TensorModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.zoo.common.TensorOperation

import scala.reflect.ClassTag

private[zoo] class InternalLayerNorm[T: ClassTag](val nOutput: Int = 768, val eps: Double = 1e-5)
private[zoo] class InternalLayerNorm[T: ClassTag](
val nOutput: Int = 768, val eps: Double = 1e-5)
(implicit ev: TensorNumeric[T]) extends TensorModule[T]{
val weight = Tensor.ones[T](nOutput).view(1, nOutput)
val bias = Tensor[T](nOutput).view(1, nOutput)
Expand All @@ -39,12 +40,13 @@ private[zoo] class InternalLayerNorm[T: ClassTag](val nOutput: Int = 768, val ep
override def updateOutput(input: Tensor[T]): Tensor[T] = {
val dim = input.dim()
val u = input.sum(dim).div(ev.fromType(input.size(dim)))
divInput1 = input.clone().sub(u) // x - u

divInput1 = TensorOperation.subTensor(input.clone(), u)
val square = divInput1.clone().square()
val s = square.sum(square.dim()).div(ev.fromType(square.size(square.dim())))
sqrtInput = s.add(ev.fromType(eps))
divInput2 = sqrtInput.clone().sqrt()
y = divInput1.clone.div(divInput2)
y = TensorOperation.divTensor(divInput1.clone(), divInput2)
output = y.clone().cmul(weight).add(bias)
output
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package com.intel.analytics.zoo.pipeline.api.keras.layers.internal

import com.intel.analytics.bigdl.nn.abstractnn.TensorModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.{DoubleType, FloatType, Tensor}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.Shape

Expand All @@ -30,18 +30,20 @@ import scala.reflect.ClassTag
* where shift = max_i(x_i).
* Currently only support apply softmax normalization to the last dim.
*/
private[zoo] class InternalSoftMax[T: ClassTag]()(implicit ev: TensorNumeric[T])
extends TensorModule[T] {
private[zoo] class InternalSoftMax[T: ClassTag]()
(implicit ev: TensorNumeric[T]) extends TensorModule[T] {

override def updateOutput(input: Tensor[T]): Tensor[T] = {
val dim = input.dim()
val sizes = input.size()
val shift = input.max(dim)._1

val shiftedInput = input.sub(shift.expand(sizes))
val shiftedInput = input.clone().sub(shift.expand(sizes).contiguous())
val exp = shiftedInput.exp()

val sum = exp.sum(dim)
output = exp.div(sum.expand(sizes))
output = exp.div(sum.expand(sizes).contiguous())

output
}

Expand All @@ -54,7 +56,7 @@ private[zoo] class InternalSoftMax[T: ClassTag]()(implicit ev: TensorNumeric[T])
}

private[zoo] object InternalSoftMax{
def apply[@specialized(Float, Double) T: ClassTag]()
def apply[T: ClassTag]()
(implicit ev: TensorNumeric[T]) : InternalSoftMax[T] = {
new InternalSoftMax[T]()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,14 @@ private[layers] class TransformerLayer[T: ClassTag](

def multiHeadSelfAttention(x: Variable[T], hiddenSize: Int,
attention_mask: Variable[T] = null): Variable[T] = {
val c = projectionLayer(hiddenSize * 3).from(x)
val query = c.slice(2, 0, hiddenSize)
val key = c.slice(2, hiddenSize, hiddenSize)
val value = c.slice(2, hiddenSize * 2, hiddenSize)
// val c = projectionLayer(hiddenSize * 3).from(x)
// val query = c.slice(2, 0, hiddenSize)
// val key = c.slice(2, hiddenSize, hiddenSize)
// val value = c.slice(2, hiddenSize * 2, hiddenSize)
val query = projectionLayer(hiddenSize).from(x)
val key = projectionLayer(hiddenSize).from(x)
val value = projectionLayer(hiddenSize).from(x)

val q = splitHeads(query, nHead)
val k = splitHeads(key, nHead, k = true)
val v = splitHeads(value, nHead)
Expand Down
Loading

0 comments on commit 09c0cc0

Please sign in to comment.