Skip to content

Commit

Permalink
Entire matrix A is owned by one Worker_A
Browse files Browse the repository at this point in the history
Computations are done in other workers and sent back to Worker_A
hpl_par.j still does not have spawns, but this is preparing to get there
Code works correctly
  • Loading branch information
ViralBShah committed Jun 20, 2011
1 parent f9bad94 commit f627d98
Showing 2 changed files with 54 additions and 47 deletions.
99 changes: 53 additions & 46 deletions test/bench/hpl_par.j
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
## Based on "Multi-Threading and One-Sided Communication in Parallel LU Factorization"
## http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.138.4361&rank=7

function hpl (A::Matrix, b::Vector)
# This version is written for a shared memory implementation.
# The matrix A is local to the first Worker, which allocates work to other Workers
# All updates to A are carried out by the first Worker. Thus A is not distributed

function hpl_par(A::Matrix, b::Vector)

blocksize = 5

@@ -12,23 +16,53 @@ function hpl (A::Matrix, b::Vector)
B_rows[end] = n
B_cols = [B_rows, [n+1]]
nB = length(B_rows)
#depend = zeros(Bool, nB, nB) # In parallel, depend needs to be able to hold futures
depend = Array(RemoteRef, nB, nB)
depend = zeros(Bool, nB, nB) # In parallel, depend needs to be able to hold futures

## Small matrix case
if nB <= 1
x = A[1:n, 1:n] \ A[:,n+1]
return x
end

## Add a ghost row of dependencies to boostrap the computation
for j=1:nB; depend[1,j] = true; end

for i=1:(nB-1)
## Threads for panel factorizations
K = (B_rows[i]+1):n
I = (B_rows[i]+1):B_rows[i+1]
#(depend[i+1,i], panel_p) = panel_factor(A, I, depend[i,i])
rref_pf = @spawn panel_factor(A[K,I], depend[i,i])
depend[i+1,i] = @spawnlocal spawn_trailing_updates ()
K = I[1]:n
(A_KI, panel_p) = panel_factor(A[K,I], depend[i,i])

## Write the factorized panel back to A
A[K,I] = A_KI

## Panel permutation
panel_p = K[panel_p]
depend[i+1,i] = true

## Apply permutation from pivoting
J = (B_cols[i+1]+1):B_cols[nB+1]
A[K, J] = A[panel_p, J]

## Threads for trailing updates
L_II = tril(A[I,I], -1) + eye(length(I))
K = (I[end]+1):n

for j=(i+1):nB
J = (B_cols[j]+1):B_cols[j+1]

## Do the trailing update (Compute U, and DGEMM - all flops are here)
(A_IJ, A_KJ) = trailing_update(L_II, A[I,J], A[K,I], A[K,J],
depend[i+1,i], depend[i,j])
## Write updates back to A
A[I,J] = A_IJ
A[K,J] = A_KJ
depend[i+1,j] = true
end
end

## Completion of the last diagonal block signals termination
#wait(depend[nB, nB])
@assert depend[nB, nB]

## Solve the triangular system
x = triu(A[1:n,1:n]) \ A[:,n+1]
@@ -40,53 +74,26 @@ end ## hpl()

### Panel factorization ###

function panel_factor(A, col_dep)

## Enforce dependencies
wait(col_dep)

function panel_factor(A_KI, col_dep)

## Factorize a panel
panel_p = lu(A, true) # Economy mode. A is overwritten with factors.
(A_KI, panel_p) = lu(A_KI, true) # Economy mode

return (A, panel_p)
return (A_KI, panel_p)

end ## panel_factor()

### spawn trailing updates ###

function spawn_trailing_updates(A, depend, rref_pf)
### Trailing update ###

(A[K,I], p) = fetch(rref_pf)
panel_p = K[p]

for j=(i+1):nB
J = (B_cols[j]+1):B_cols[j+1]
#depend[i+1,j] = trailing_update(A, I, J, panel_p, depend[i+1,i],depend[i,j])
rref_tu = @spawn trailing_update(A, I, J, panel_p, depend[i+1,i],depend[i,j])
depend[i+1,j] = @spawnlocal (A[I,J], A[K,J]) = fetch(rref_tu)
end

end

### One block column trailing update ###

function trailing_update(A_II, A_IJ, A_KI, A_IJ, panel_p, row_dep, col_dep)

## Enforce dependencies
wait(row_dep)
wait(col_dep)

## Apply permutation from pivoting
K = (I[end]+1):n
A[I[1]:n, J] = A[panel_p, J]
function trailing_update(L_II, A_IJ, A_KI, A_KJ, row_dep, col_dep)

## Apply pivot sequence to Compute blocks of U
L = tril(A_II, -1) + eye(length(I))
A_IJ = L \ A_IJ
## Compute blocks of U
A_IJ = L_II \ A_IJ

## Trailing submatrix update
if !isempty(K)
A_KJ = A_KJ - A_KI*A_IJ
## Trailing submatrix update - All flops are here
if !isempty(A_KJ)
A_KJ = A_KJ - A_KI*A_IJ
end

return (A_IJ, A_KJ)
2 changes: 1 addition & 1 deletion test/bench/hpl_seq.j
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Based on "Multi-Threading and One-Sided Communication in Parallel LU Factorization"
## http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.138.4361&rank=7

function hpl (A::Matrix, b::Vector)
function hpl_seq(A::Matrix, b::Vector)

blocksize = 5

0 comments on commit f627d98

Please sign in to comment.