random_mpi/random_mpi.cpp

# include <cmath>
# include <cstdlib>
# include <ctime>
# include <fstream>
# include <iomanip>
# include <iostream>
# include <mpi.h>

using namespace std;

int main ( int argc, char *argv[] );
int congruence ( int a, int b, int c, int *error );
int i4_gcd ( int i, int j );
int i4_max ( int i1, int i2 );
int i4_min ( int i1, int i2 );
int i4_sign ( int i );
void lcrg_anbn ( int a, int b, int c, int n, int *an, int *bn );
int lcrg_evaluate ( int a, int b, int c, int x );
int power_mod ( int a, int n, int m );
void timestamp ( );

//****************************************************************************80

int main ( int argc, char *argv[] )

//****************************************************************************80
//
//  Purpose:
//
//    MAIN is the main program for RANDOM_MPI.
//
//  Discussion:
//
//    This program demonstrates how P processors can generate the same
//    sequence of random numbers as 1 processor.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    16 June 2016
//
//  Author:
//
//    John Burkardt
//
//  Reference:
//
//    William Gropp, Ewing Lusk, Anthony Skjellum,
//    Using MPI: Portable Parallel Programming with the
//    Message-Passing Interface,
//    Second Edition,
//    MIT Press, 1999,
//    ISBN: 0262571323,
//    LC: QA76.642.G76.
//
{
  int a;
  int an;
  int b;
  int bn;
  int c;
  int error;
  int id;
  int j;
  int k;
  int k_hi;
  int p;
  int u;
  int v;
//
//  Initialize MPI.
//
  MPI_Init ( &argc, &argv );
//
//  Get the number of processors.
//
  MPI_Comm_size ( MPI_COMM_WORLD, &p );
//
//  Get the rank of this processor.
//
  MPI_Comm_rank ( MPI_COMM_WORLD, &id );
//
//  Print a message.
//
  if ( id == 0 )
  {
    timestamp ( );
    cout << "\n";
    cout << "RANDOM_MPI - Master process:\n";
    cout << "  C++ version\n";
    cout << "  The number of processors is P = " << p << "\n";
    cout << "\n";
    cout << "  This program shows how a stream of random numbers\n";
    cout << "  can be computed 'in parallel' in an MPI program.\n";
    cout << "\n";
    cout << "  We assume we are using a linear congruential\n";
    cout << "  random number generator or LCRG, which takes\n";
    cout << "  an integer input and returns a new integer output:\n";
    cout << "\n";
    cout << "    U = ( A * V + B ) mod C\n";
    cout << "\n";
    cout << "  We assume that we want the MPI program to produce\n";
    cout << "  the same sequence of random values as a sequential\n";
    cout << "  program would - but we want each processor to compute\n";
    cout << "  one part of that sequence.\n";
    cout << "\n";
    cout << "  We do this by computing a new LCRG which can compute\n";
    cout << "  every P'th entry of the original one.\n";
    cout << "\n";
    cout << "  Our LCRG works with integers, but it is easy to\n";
    cout << "  turn each integer into a real number between [0,1].\n";
  }
//
//  A, B and C define the linear congruential random number generator.
//
  a = 16807;
  b = 0;
  c = 2147483647;

  if ( id == 0 )
  {
    cout << "\n";
    cout << "  LCRG parameters:\n";
    cout << "\n";
    cout << "  A  = " << a << "\n";
    cout << "  B  = " << b << "\n";
    cout << "  C  = " << c << "\n";
  }

  k_hi = p * 10;
//
//  Processor 0 generates 10 * P random values.
//
  if ( id == 0 )
  {
    cout << "\n";
    cout << "  Let processor 0 generate the entire random number sequence.\n";
    cout << "\n";
    cout << "     K    ID         Input        Output\n";
    cout << "\n";

    k = 0;
    v = 12345;
    cout << "  " << setw(4) << k
         << "  " << setw(4) << id
         << "  " << "            "
         << "  " << setw(12) << v << "\n";

    for ( k = 1; k <= k_hi; k++ )
    {
      u = v;
      v = lcrg_evaluate ( a, b, c, u );
      cout << "  " << setw(4) << k
           << "  " << setw(4) << id
           << "  " << setw(12) << u
           << "  " << setw(12) << v << "\n";
    }
  }
//
//  Processor P now participates by computing the P-th part of the sequence.
//
  lcrg_anbn ( a, b, c, p, &an, &bn );

  if ( id == 0 )
  {
    cout << "\n";
    cout << "  LCRG parameters for P processors:\n";
    cout << "\n";
    cout << "  AN = " << an << "\n";
    cout << "  BN = " << bn << "\n";
    cout << "  C  = " << c << "\n";
    cout << "\n";
    cout << "  Have ALL the processors participate in computing\n";
    cout << "  the same random number sequence.\n";
    cout << "\n";
    cout << "     K    ID         Input        Output\n";
    cout << "\n";
  }
//
//  On System X, just to try to keep the output of the various processors 
//  sorted, I had to resort to a SLEEP command to keep the processors from
//  competing.  Obviously, this is only to make the output readable
//  so we can verify it.  It's NOT what you want to do in a production
//  situation!
//
//  sleep ( id * 2 );
//
//  Use the basis LCRG to get the ID-th value in the sequence.
//
  v = 12345;
  for ( j = 1; j <= id; j++ )
  {
    u = v;
    v = lcrg_evaluate ( a, b, c, u );
  }
  k = id;

  cout << "  " << setw(4) << k
       << "  " << setw(4) << id
       << "  " << "            "
       << "  " << setw(12) << v << "\n";
//
//  Now use the "skipping" LCRG to compute the values with indices
//  ID, ID+P, ID+2P, ...,
//
  for ( k = id + p; k <= k_hi; k = k + p )
  {
    u = v;
    v = lcrg_evaluate ( an, bn, c, u );
    cout << "  " << setw(4) << k
         << "  " << setw(4) << id
         << "  " << setw(12) << u
         << "  " << setw(12) << v << "\n";
  }
//
//  Terminate MPI.
//
  MPI_Finalize ( );
//
//  Terminate.
//
  if ( id == 0 )
  {
    cout << "\n";
    cout << "RANDOM_MPI:\n";
    cout << "  Normal end of execution.\n";
    cout << "\n";
    timestamp ( );
  }
  return 0;
}
//****************************************************************************80

int congruence ( int a, int b, int c, bool *error )

//****************************************************************************80
//
//  Purpose:
//
//    CONGRUENCE solves a congruence of the form ( A * X = C ) mod B.
//
//  Discussion:
//
//    A, B and C are given integers.  The equation is solvable if and only
//    if the greatest common divisor of A and B also divides C.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    22 May 2008
//
//  Author:
//
//    John Burkardt
//
//  Reference:
//
//    Eric Weisstein, editor,
//    CRC Concise Encylopedia of Mathematics,
//    CRC Press, 2002,
//    Second edition,
//    ISBN: 1584883472,
//    LC: QA5.W45.
//
//  Parameters:
//
//    Input, int A, B, C, the coefficients of the Diophantine equation.
//
//    Output, bool *ERROR, error flag, is TRUE if an error occurred..
//
//    Output, int CONGRUENCE, the solution of the Diophantine equation.
//    X will be between 0 and B-1.
//
{
# define N_MAX 100

  int a_copy;
  int a_mag;
  int a_sign;
  int b_copy;
  int b_mag;
  int b_sign;
  int c_copy;
  int g;
  int k;
  int n;
  float norm_new;
  float norm_old;
  int q[N_MAX];
  bool swap;
  int temp;
  int x;
  int xnew;
  int y;
  int ynew;
  int z;
//
//  Defaults for output parameters.
//
  *error = false;
  x = 0;
  y = 0;
//
//  Special cases.
//
  if ( a == 0 && b == 0 && c == 0 )
  {
    x = 0;
    return x;
  }
  else if ( a == 0 && b == 0 && c != 0 )
  {
    *error = true;
    x = 0;
    return x;
  }
  else if ( a == 0 && b != 0 && c == 0 )
  {
    x = 0;
    return x;
  }
  else if ( a == 0 && b != 0 && c != 0 )
  {
    x = 0;
    if ( ( c % b ) != 0 )
    {
      *error = true;
    }
    return x;
  }
  else if ( a != 0 && b == 0 && c == 0 )
  {
    x = 0;
    return x;
  }
  else if ( a != 0 && b == 0 && c != 0 )
  {
    x = c / a;
    if ( ( c % a ) != 0 )
    {
      *error = true;
    }
    return x;
  }
  else if ( a != 0 && b != 0 && c == 0 )
  {
//  g = i4_gcd ( a, b );
//  x = b / g;
    x = 0;
    return x;
  }
//
//  Now handle the "general" case: A, B and C are nonzero.
//
//  Step 1: Compute the GCD of A and B, which must also divide C.
//
  g = i4_gcd ( a, b );

  if ( ( c % g ) != 0 )
  {
    *error = true;
    return x;
  }

  a_copy = a / g;
  b_copy = b / g;
  c_copy = c / g;
//
//  Step 2: Split A and B into sign and magnitude.
//
  a_mag = abs ( a_copy );
  a_sign = i4_sign ( a_copy );
  b_mag = abs ( b_copy );
  b_sign = i4_sign ( b_copy );
//
//  Another special case, A_MAG = 1 or B_MAG = 1.
//
  if ( a_mag == 1 )
  {
    x = a_sign * c_copy;
    return x;
  }
  else if ( b_mag == 1 )
  {
    x = 0;
    return x;
  }
//
//  Step 3: Produce the Euclidean remainder sequence.
//
  if ( b_mag <= a_mag )
  {
    swap = false;
    q[0] = a_mag;
    q[1] = b_mag;
  }
  else
  {
    swap = true;
    q[0] = b_mag;
    q[1] = a_mag;
  }

  n = 3;

  for ( ; ; )
  {
    q[n-1] = ( q[n-3] % q[n-2] );

    if ( q[n-1] == 1 )
    {
      break;
    }

    n = n + 1;

    if ( N_MAX < n )
    {
      *error = true;
      cout << "\n";
      cout << "CONGRUENCE - Fatal error!\n";
      cout << "  Exceeded number of iterations.\n";
      exit ( 1 );
    }
  }
//
//  Step 4: Now go backwards to solve X * A_MAG + Y * B_MAG = 1.
//
  y = 0;
  for ( k = n; 2 <= k; k-- )
  {
    x = y;
    y = ( 1 - x * q[k-2] ) / q[k-1];
  }
//
//  Step 5: Undo the swapping.
//
  if ( swap )
  {
    z = x;
    x = y;
    y = z;
  }
//
//  Step 6: Now apply signs to X and Y so that X * A + Y * B = 1.
//
  x = x * a_sign;
//
//  Step 7: Multiply by C, so that X * A + Y * B = C.
//
  x = x * c_copy;
//
//  Step 8: Now force 0 <= X < B.
//
  x = x % b;
//
//  Step 9: Force positivity.
//
  if ( x < 0 )
  {
    x = x + b;
  }

  return x;
# undef N_MAX
}
//****************************************************************************80

int i4_gcd ( int i, int j )

//****************************************************************************80
//
//  Purpose:
//
//    I4_GCD finds the greatest common divisor of I and J.
//
//  Discussion:
//
//    Only the absolute values of I and J are considered, so that the 
//    result is always nonnegative.
//
//    If I or J is 0, I4_GCD is returned as max ( 1, abs ( I ), abs ( J ) ).
//
//    If I and J have no common factor, I4_GCD is returned as 1.
//
//    Otherwise, using the Euclidean algorithm, I4_GCD is the
//    largest common factor of I and J.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    07 May 2003
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    Input, int I, J, two numbers whose greatest common divisor
//    is desired.
//
//    Output, int I4_GCD, the greatest common divisor of I and J.
//
{
  int ip;
  int iq;
  int ir;
//
//  Return immediately if either I or J is zero.
//
  if ( i == 0 )
  {
    return i4_max ( 1, abs ( j ) );
  }
  else if ( j == 0 )
  {
    return i4_max ( 1, abs ( i ) );
  }
//
//  Set IP to the larger of I and J, IQ to the smaller.
//  This way, we can alter IP and IQ as we go.
//
  ip = i4_max ( abs ( i ), abs ( j ) );
  iq = i4_min ( abs ( i ), abs ( j ) );
//
//  Carry out the Euclidean algorithm.
//
  for ( ; ; )
  {
    ir = ip % iq;

    if ( ir == 0 )
    {
      break;
    }

    ip = iq;
    iq = ir;
  }

  return iq;
}
//****************************************************************************80

int i4_max ( int i1, int i2 )

//****************************************************************************80
//
//  Purpose:
//
//    I4_MAX returns the maximum of two I4's.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    05 May 2003
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    Input, int I1, I2, two integers to be compared.
//
//    Output, int I4_MAX, the larger of I1 and I2.
//
{
  int value;

  if ( i2 < i1 ) 
  {
    value = i1;
  }
  else 
  {
    value = i2;
  }
  return value;
}
//****************************************************************************80

int i4_min ( int i1, int i2 )

//****************************************************************************80
//
//  Purpose:
//
//    I4_MIN returns the smaller of two I4's.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    05 May 2003
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    Input, int I1, I2, two integers to be compared.
//
//    Output, int I4_MIN, the smaller of I1 and I2.
//
{
  int value;

  if ( i1 < i2 ) 
  {
    value = i1;
  }
  else 
  {
    value = i2;
  }
  return value;
}
//****************************************************************************80

int i4_sign ( int i )

//****************************************************************************80
//
//  Purpose:
//
//    I4_SIGN returns the sign of an I4.
//
//  Discussion:
//
//    The sign of 0 and all positive integers is taken to be +1.
//    The sign of all negative integers is -1.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    06 May 2003
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    Input, int I, the integer whose sign is desired.
//
//    Output, int I4_SIGN, the sign of I.
{
  int value;

  if ( i < 0 ) 
  {
    value = -1;
  }
  else
  {
    value = 1;
  }
  return value;
}
//****************************************************************************80

void lcrg_anbn ( int a, int b, int c, int n, int *an, int *bn )

//****************************************************************************80
//
//  Purpose:
//
//    LCRG_ANBN computes the "N-th power" of a linear congruential generator.
//
//  Discussion:
//
//    We are considering a linear congruential random number generator.
//    The LCRG takes as input an integer value called SEED, and returns
//    an updated value of SEED,
//
//      SEED(out) = ( a * SEED(in) + b ) mod c.
//
//    and an associated pseudorandom real value
//
//      U = SEED(out) / c.
//
//    In most cases, a user is content to call the LCRG repeatedly, with
//    the updating of SEED being taken care of automatically.
//
//    The purpose of this routine is to determine the values of AN and BN
//    that describe the LCRG that is equivalent to N applications of the
//    original LCRG.
//
//    One use for such a facility would be to do random number computations
//    in parallel.  If each of N processors is to compute many random values,
//    you can guarantee that they work with distinct random values
//    by starting with a single value of SEED, using the original LCRG to generate
//    the first N-1 "iterates" of SEED, so that you now have N "seed" values,
//    and from now on, applying the N-th power of the LCRG to the seeds.
//
//    If the K-th processor starts from the K-th seed, it will essentially
//    be computing every N-th entry of the original random number sequence,
//    offset by K.  Thus the individual processors will be using a random
//    number stream as good as the original one, and without repeating, and
//    without having to communicate.
//
//    To evaluate the N-th value of SEED directly, we start by ignoring
//    the modular arithmetic, and working out the sequence of calculations
//    as follows:
//
//      SEED(0)   =     SEED.
//      SEED(1)   = a * SEED      + b
//      SEED(2)   = a * SEED(1)   + b = a^2 * SEED           + a * b + b
//      SEED(3)   = a * SEED(2)   + b = a^3 * SEED + a^2 * b + a * b + b
//      ...
//      SEED(N-1) = a * SEED(N-2) + b
//
//      SEED(N) = a * SEED(N-1) + b = a^N * SEED
//                                    + ( a^(n-1) + a^(n-2) + ... + a + 1 ) * b
//
//    or, using the geometric series,
//
//      SEED(N) = a^N * SEED + ( a^N - 1) / ( a - 1 ) * b
//              = AN * SEED + BN
//
//    Thus, from any SEED, we can determine the result of N applications of the
//    original LCRG directly if we can solve
//
//      ( a - 1 ) * BN = ( a^N - 1 ) * b in modular arithmetic,
//
//    and evaluate:
//
//      AN = a^N
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license.
//
//  Modified:
//
//    23 April 2008
//
//  Author:
//
//    John Burkardt
//
//  Reference:
//
//    Barry Wilkinson, Michael Allen,
//    Parallel Programming:
//    Techniques and Applications Using Networked Workstations and Parallel Computers,
//    Prentice Hall,
//    ISBN: 0-13-140563-2,
//    LC: QA76.642.W54.
//
//  Parameters:
//
//    Input, int A, the multiplier for the LCRG.
//
//    Input, int  B, the added value for the LCRG.
//
//    Input, int  C, the base for the modular arithmetic.
//    For 32 bit arithmetic, this is often 2^31 - 1, or 2147483647.  It is
//    required that 0 < C.
//
//    Input, int N, the "index", or number of times that the
//    LCRG is to be applied.  It is required that 0 <= N.
//
//    Output, int *AN, *BN, the multiplier and added value for
//    the LCRG that represent N applications of the original LCRG.
//
{
  int am1;
  int anm1tb;
  bool ierror;

  if ( n < 0 )
  {
    cerr << "\n";
    cerr << "LCRG_ANBN - Fatal error!\n";
    cerr << "  Illegal input value of N = " << n << "\n";
    exit ( 1 );
  }

  if ( c <= 0 )
  {
    cerr << "\n";
    cerr << "LCRG_ANBN - Fatal error!\n";
    cerr << "  Illegal input value of C = " << c << "\n";
    exit ( 1 );
  }

  if ( n == 0 )
  {
    *an = 1;
    *bn = 0;
  }
  else if ( n == 1 )
  {
    *an = a;
    *bn = b;
  }
  else
  {
//
//  Compute A^N.
//
    *an = power_mod ( a, n, c );
//
//  Solve 
//    ( a - 1 ) * BN = ( a^N - 1 ) mod B
//  for BN.
//
    am1 = a - 1;
    anm1tb = ( *an - 1 ) * b;

    *bn = congruence ( am1, c, anm1tb, &ierror );

    if ( ierror )
    {
      cerr << "\n";
      cerr << "LCRG_ANBN - Fatal error!\n";
      cerr << "  An error occurred in the CONGRUENCE routine.\n";
      exit ( 1 );
    }
  }

  return;
}
//****************************************************************************80

int lcrg_evaluate ( int a, int b, int c, int x )

//****************************************************************************80
//
//  Purpose:
//
//    LCRG_EVALUATE evaluates an LCRG, y = ( A * x + B ) mod C.
//
//  Discussion:
//
//    This routine cannot be recommended for production use.  Because we want
//    to do modular arithmetic, but the base is not a power of 2, we need to
//    use "double precision" integers to keep accuracy.
//
//    If we knew the base C, we could try to avoid overflow while not changing
//    precision.
//
//    If the base C was a power of 2, we could rely on the usual properties of
//    integer arithmetic on computers, in which overflow bits, which are always
//    ignored, don ot actually matter.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license.
//
//  Modified:
//
//    23 April 2008
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    Input, int A, the multiplier for the LCRG.
//
//    Input, int B, the added value for the LCRG.
//
//    Input, int C, the base for the modular arithmetic.
//    For 32 bit arithmetic, this is often 2^31 - 1, or 2147483647.  It is
//    required that 0 < C.
//
//    Input, int X, the value to be processed.
//
//    Output, int LCRG_EVALUATE, the processed value.
//
{
  long long int a8;
  long long int b8;
  long long int c8;
  long long int x8;
  int y;
  long long int y8;
//
//  To avoid roundoff issues, we need to go to "double precision" integers.
//  (Not available on all planets.)
//
  a8 = ( long long int ) a;
  b8 = ( long long int ) b;
  c8 = ( long long int ) c;
  x8 = ( long long int ) x;

  y8 = ( a8 * x8 + b8 ) % c8;

  y = ( int ) ( y8 );

  if ( y < 0 )
  {
    y = y + c;
  }

  return y;
}
//****************************************************************************80

int power_mod ( int a, int n, int m )

//****************************************************************************80
//
//  Purpose:
//
//    POWER_MOD computes ( A^N ) mod M.
//
//  Discussion:
//
//    Some programming tricks are used to speed up the computation, and to
//    allow computations in which A^N is much too large to store in a
//    real word.
//
//    First, for efficiency, the power A**N is computed by determining
//    the binary expansion of N, then computing A, A^2, A^4, and so on
//    by repeated squaring, and multiplying only those factors that
//    contribute to A^N.
//
//    Secondly, the intermediate products are immediately "mod'ed", which
//    keeps them small.
//
//    For instance, to compute mod ( A**13, 11 ), we essentially compute
//
//       13 = 1 + 4 + 8
//
//       A^13 = A * A^4 * A^8
//
//       A^13 mod ( 11 ) = A mod ( 11 ) * A^4 mod ( 11 ) * A^8 mod ( 11 ).
//
//    Fermat's little theorem says that if P is prime, and A is not divisible
//    by P, then ( A^(P-1) - 1 ) is divisible by P.
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    12 May 2007
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    Input, int A, the base of the expression to be tested.
//    A should be nonnegative.
//
//    Input, int N, the power to which the base is raised.
//    N should be nonnegative.
//
//    Input, int M, the divisor against which the expression is tested.
//    M should be positive.
//
//    Output, int POWER_MOD, the remainder when A^N is divided by M.
//
{
  long long int a_square2;
  int d;
  long long int m2;
  int x;
  long long int x2;

  if ( a < 0 )
  {
    return -1;
  }

  if ( m <= 0 )
  {
    return -1;
  }

  if ( n < 0 )
  {
    return -1;
  }
//
//  A_SQUARE contains the successive squares of A.
//
  a_square2 = ( long long int ) a;
  m2 = ( long long int ) m;
  x2 = ( long long int ) 1;

  while ( 0 < n )
  {
    d = n % 2;

    if ( d == 1 )
    {
      x2 = ( x2 * a_square2 ) % m2;
    }

    a_square2 = ( a_square2 * a_square2 ) % m2;
    n = ( n - d ) / 2;
  }
//
//  Ensure that 0 <= X.
//
  while ( x2 < 0 )
  {
    x2 = x2 + m2;
  }

  x = ( int ) x2;

  return x;
}
//****************************************************************************80

void timestamp ( )

//****************************************************************************80
//
//  Purpose:
//
//    TIMESTAMP prints the current YMDHMS date as a time stamp.
//
//  Example:
//
//    May 31 2001 09:45:54 AM
//
//  Licensing:
//
//    This code is distributed under the GNU LGPL license. 
//
//  Modified:
//
//    02 October 2003
//
//  Author:
//
//    John Burkardt
//
//  Parameters:
//
//    None
//
{
# define TIME_SIZE 40

  static char time_buffer[TIME_SIZE];
  const struct tm *tm;
  size_t len;
  time_t now;

  now = time ( NULL );
  tm = localtime ( &now );

  len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );

  cout << time_buffer << "\n";

  return;
# undef TIME_SIZE
}