Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
fbaru-dev authored Sep 13, 2017
1 parent 97b2048 commit 3db707f
Show file tree
Hide file tree
Showing 7 changed files with 597 additions and 0 deletions.
298 changes: 298 additions & 0 deletions ver6/GSimulation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
/*
This file is part of the example codes which have been used
for the "Code Optmization Workshop".
Copyright (C) 2016 Fabio Baruffa <[email protected]>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include "GSimulation.hpp"
#include "cpu_time.hpp"

GSimulation :: GSimulation()
{
std::cout << "===============================" << std::endl;
std::cout << " Initialize Gravity Simulation" << std::endl;
set_npart(2000);
set_nsteps(500);
set_tstep(0.1);
set_sfreq(50);
}

void GSimulation :: set_number_of_particles(int N)
{
set_npart(N);
}

void GSimulation :: set_number_of_steps(int N)
{
set_nsteps(N);
}

void GSimulation :: init_pos()
{
int gen = 42;
srand(gen);
real_type max = static_cast<real_type> ( R_MAX );

for(int i=0; i<get_npart(); ++i)
{
real_type r = static_cast<real_type>(rand()) / static_cast<real_type>(RAND_MAX);
r = (max - 1.0f) * r + 1.0f;
particles->pos_x[i] = -1.0f + 2.0f * r / max;
particles->pos_y[i] = -1.0f + 2.0f * r / max;
particles->pos_z[i] = -1.0f + 2.0f * r / max;
}
}

void GSimulation :: init_vel()
{
int gen = 42;
srand(gen);
real_type max = static_cast<real_type> (RAND_MAX);

for(int i=0; i<get_npart(); ++i)
{
real_type r = static_cast<real_type>(rand()) / static_cast<real_type>(RAND_MAX);
r = (max - 1.0f) * r + 1.0f;
particles->vel_x[i] = -1.0e-4f + 2.0f * r / max * 1.0e-4f;
particles->vel_y[i] = -1.0e-4f + 2.0f * r / max * 1.0e-4f;
particles->vel_z[i] = -1.0e-4f + 2.0f * r / max * 1.0e-4f;
}
}

void GSimulation :: init_acc()
{
for(int i=0; i<get_npart(); ++i)
{
particles->acc_x[i] = 0.f;
particles->acc_y[i] = 0.f;
particles->acc_z[i] = 0.f;
}
}

void GSimulation :: init_mass()
{
int gen = 42;
srand(gen);
real_type n = static_cast<real_type> (get_npart());
real_type max = static_cast<real_type> (RAND_MAX);

for(int i=0; i<get_npart(); ++i)
{
real_type r = static_cast<real_type>(rand()) / static_cast<real_type>(RAND_MAX);
r = (max - 1.0f) * r + 1.0f;
particles->mass[i] = n + n * r / max;
}
}

void GSimulation :: start()
{
real_type energy;
real_type dt = get_tstep();
int n = get_npart();
int i,j;

const int alignment = 64;
particles = (ParticleSoA*) _mm_malloc(sizeof(ParticleSoA),alignment);

particles->pos_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->pos_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->pos_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->vel_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->vel_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->vel_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->acc_x = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->acc_y = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->acc_z = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);
particles->mass = (real_type*) _mm_malloc(n*sizeof(real_type),alignment);

init_pos();
init_vel();
init_acc();
init_mass();

print_header();

_totTime = 0.;

const float softeningSquared = 1.e-3f;
const float G = 6.67259e-11f;

CPUTime time;
double ts0 = 0;
double ts1 = 0;
double nd = double(n);
double gflops = 1e-9 * ( (11. + 18. ) * nd*nd + nd * 19. );
double av=0.0, dev=0.0;
int nf = 0;

const int tileSize = 8;

const double t0 = time.start();
for (int s=1; s<=get_nsteps(); ++s)
{
ts0 += time.start();
for (int ii = 0; ii < n; ii += tileSize )
{
real_type acc_xtile[tileSize];
real_type acc_ytile[tileSize] ;
real_type acc_ztile[tileSize];
#pragma omp simd
for(int s=0; s<tileSize; s++)
{
acc_xtile[s] = 0.0f;
acc_ytile[s] = 0.0f;
acc_ztile[s] = 0.0f;
}
__assume_aligned(particles->pos_x, alignment);
__assume_aligned(particles->pos_y, alignment);
__assume_aligned(particles->pos_z, alignment);
__assume_aligned(particles->acc_x, alignment);
__assume_aligned(particles->acc_y, alignment);
__assume_aligned(particles->acc_z, alignment);
__assume_aligned(particles->mass, alignment);

real_type ax_i = particles->acc_x[i];
real_type ay_i = particles->acc_y[i];
real_type az_i = particles->acc_z[i];
#pragma omp simd
for (j = 0; j < n; j++)
{
for (int i = ii; i < ii + tileSize; i++)
{
real_type dx, dy, dz;
real_type distanceSqr = 0.0f;
real_type distanceInv = 0.0f;

dx = particles->pos_x[j] - particles->pos_x[i]; //1flop
dy = particles->pos_y[j] - particles->pos_y[i]; //1flop
dz = particles->pos_z[j] - particles->pos_z[i]; //1flop

distanceSqr = dx*dx + dy*dy + dz*dz + softeningSquared; //6flops
distanceInv = 1.0f / sqrtf(distanceSqr); //1div+1sqrt

acc_xtile[i-ii] += dx * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
acc_ytile[i-ii] += dy * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
acc_ztile[i-ii] += dz * G * particles->mass[j] * distanceInv * distanceInv * distanceInv; //6flops
}
}
#pragma omp simd
for(int s=0; s<tileSize; s++)
{
particles->acc_x[s+ii] = acc_xtile[s];
particles->acc_y[s+ii] = acc_ytile[s];
particles->acc_z[s+ii] = acc_ztile[s];
}
}
energy = 0;

for (i = 0; i < n; ++i)// update position
{
particles->vel_x[i] += particles->acc_x[i] * dt; //2flops
particles->vel_y[i] += particles->acc_y[i] * dt; //2flops
particles->vel_z[i] += particles->acc_z[i] * dt; //2flops

particles->pos_x[i] += particles->vel_x[i] * dt; //2flops
particles->pos_y[i] += particles->vel_y[i] * dt; //2flops
particles->pos_z[i] += particles->vel_z[i] * dt; //2flops

particles->acc_x[i] = 0.;
particles->acc_y[i] = 0.;
particles->acc_z[i] = 0.;

energy += particles->mass[i] * (
particles->vel_x[i]*particles->vel_x[i] +
particles->vel_y[i]*particles->vel_y[i] +
particles->vel_z[i]*particles->vel_z[i]); //7flops
}

_kenergy = 0.5 * energy;

ts1 += time.stop();
if(!(s%get_sfreq()) )
{
nf += 1;
std::cout << " "
<< std::left << std::setw(8) << s
<< std::left << std::setprecision(5) << std::setw(8) << s*get_tstep()
<< std::left << std::setprecision(5) << std::setw(12) << _kenergy
<< std::left << std::setprecision(5) << std::setw(12) << (ts1 - ts0)
<< std::left << std::setprecision(5) << std::setw(12) << gflops*get_sfreq()/(ts1 - ts0)
<< std::endl;
if(nf > 2)
{
av += gflops*get_sfreq()/(ts1 - ts0);
dev += gflops*get_sfreq()*gflops*get_sfreq()/((ts1-ts0)*(ts1-ts0));
}

ts0 = 0;
ts1 = 0;
}

} //end of the time step loop

const double t1 = time.stop();
_totTime = (t1-t0);
_totFlops = gflops*get_nsteps();

av/=(double)(nf-2);
dev=sqrt(dev/(double)(nf-2)-av*av);

int nthreads=1;

std::cout << std::endl;
std::cout << "# Number Threads : " << nthreads << std::endl;
std::cout << "# Total Time (s) : " << _totTime << std::endl;
std::cout << "# Average Perfomance : " << av << " +- " << dev << std::endl;
std::cout << "===============================" << std::endl;

}


void GSimulation :: print_header()
{

std::cout << " nPart = " << get_npart() << "; "
<< "nSteps = " << get_nsteps() << "; "
<< "dt = " << get_tstep() << std::endl;

std::cout << "------------------------------------------------" << std::endl;
std::cout << " "
<< std::left << std::setw(8) << "s"
<< std::left << std::setw(8) << "dt"
<< std::left << std::setw(12) << "kenergy"
<< std::left << std::setw(12) << "time (s)"
<< std::left << std::setw(12) << "GFlops"
<< std::endl;
std::cout << "------------------------------------------------" << std::endl;


}

GSimulation :: ~GSimulation()
{
_mm_free(particles->pos_x);
_mm_free(particles->pos_y);
_mm_free(particles->pos_z);
_mm_free(particles->vel_x);
_mm_free(particles->vel_y);
_mm_free(particles->vel_z);
_mm_free(particles->acc_x);
_mm_free(particles->acc_y);
_mm_free(particles->acc_z);
_mm_free(particles->mass);
_mm_free(particles);
}
82 changes: 82 additions & 0 deletions ver6/GSimulation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
This file is part of the example codes which have been used
for the "Code Optmization Workshop".
Copyright (C) 2016 Fabio Baruffa <[email protected]>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef _GSIMULATION_HPP
#define _GSIMULATION_HPP

#include <iomanip>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <stdlib.h>

#include <omp.h>

#define R_MAX 1<<16
#include "Particle.hpp"

class GSimulation
{
public:
GSimulation();
~GSimulation();

void init();
void set_number_of_particles(int N);
void set_number_of_steps(int N);
void start();

private:
ParticleSoA *particles;

int _npart; //number of particles
int _nsteps; //number of integration steps
real_type _tstep; //time step of the simulation

int _sfreq; //sample frequency

real_type _kenergy; //kinetic energy

double _totTime; //total time of the simulation
double _totFlops; //total number of flops

void init_pos();
void init_vel();
void init_acc();
void init_mass();

inline void set_npart(const int &N){ _npart = N; }
inline int get_npart() const {return _npart; }

inline void set_tstep(const real_type &dt){ _tstep = dt; }
inline real_type get_tstep() const {return _tstep; }

inline void set_nsteps(const int &n){ _nsteps = n; }
inline int get_nsteps() const {return _nsteps; }

inline void set_sfreq(const int &sf){ _sfreq = sf; }
inline int get_sfreq() const {return _sfreq; }

void print_header();

};

#endif
Loading

0 comments on commit 3db707f

Please sign in to comment.