Skip to content

Commit

Permalink
Add randomization to GPU NCE update
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyukoon committed Jul 10, 2015
1 parent 6168015 commit 9ea1406
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 30 deletions.
1 change: 1 addition & 0 deletions Math/Math/GPUMatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1959,6 +1959,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
}

template<class ElemType>
void GPUMatrix<ElemType>::AssignSoftmaxSum(const GPUMatrix<ElemType>& a, GPUMatrix<ElemType>& c)
{
Expand Down
36 changes: 6 additions & 30 deletions Math/Math/GPUMatrixCUDAKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3123,6 +3123,7 @@ __global__ void _assignNceDerivative(
}
}
}

template<class ElemType>
__global__ void _assignNceDerivativeNew(
const ElemType* val,
Expand All @@ -3144,13 +3145,8 @@ __global__ void _assignNceDerivativeNew(
// tmp is a matrix of precalculated error
// c is the output matrix to store calculated gradients

// assume a 1 dimensional thread array
int tx = threadIdx.x; // thread index in thread-block (0-indexed)
int bx = blockIdx.x; // block index (0-indexed)
int bdim = blockDim.x; // number of threads in thread block

// logical single index for this thread
int n = tx + bdim*bx;
int n = threadIdx.x + blockDim.x* blockIdx.x;

int batchId = n / sampleCount;
int total = numRows * sampleCount;
Expand All @@ -3159,47 +3155,27 @@ __global__ void _assignNceDerivativeNew(
{
int wid = (int)val[2 * n];
ElemType er = tmp[n];
//c[n] = a[n] + b[n]; // this thread does one addition
if (inputIndex == 1)
{
for (int j = 0; j < width; j++)
for (int i = 0; i < width; i++)
{
int j = (i + n) % width; //introduce randomization to avoid conflicts
ElemType val = -er * b[IDX2C(j, wid, width)];
atomicAdd(&c[IDX2C(j, batchId, width)], val);
}
}
else if (inputIndex == 2)
{
for (int j = 0; j < width; j++)
for (int i = 0; i < width; i++)
{
int j = (i + n) % width; //introduce randomization to avoid conflicts
ElemType val = -er * a[IDX2C(j, batchId, width)];
atomicAdd(&c[IDX2C(j, wid, width)], val);
}
}
else
atomicAdd(&c[wid], -er);
}
/*
int loadPerBlock = (total + gridDim.x - 1) / gridDim.x;
// find out the items this block is responsible for
int start = loadPerBlock * blockIdx.x;
int end = min(total, loadPerBlock * (blockIdx.x + 1));
// find out the items this block is responsible for
for (int i = start; i < end; i++)
{
int wid = (int)val[2 * i];
ElemType er = tmp[i]; // precalculated error for this output node
if (inputIndex == 3) //bias vector
{
//ElemType val = -er;
atomicAdd(&c[wid], -er);
//c[wid] -= er;
}
}
*/
}
// compute gradients of weights in cross entropy node
template<class ElemType>
Expand Down

0 comments on commit 9ea1406

Please sign in to comment.