You are here: TWiki> Cimec Web>GameOfLife (2014-09-05, MarioStorti)
Comentarios: Esta version del codigo es best effort en el sentido de que he usado todas las posibilidades de C/C++ para obtener un codigo lo mas rapido posible. Por un lado esto hace que la comparacion con otros programas pueda ser un poco distorsionada ya que incluso en modo secuencial hay una gran ganancia. En principio el hacer el codigo muy rapido hace mas dificil obtener altas eficiencias en la paralelizacion, ya que el tiempo de comunicacion/sincronizacion pesa mas con respecto al de calculo. De todas formas he obtenido eficiencias del orden del 98% usando balance de carga en tableros de 4000x4000 celdas. (ver ProcessorPerformance)

    1//  Copyright (C) 1999-2002, Mario Alberto Storti, Centro
    2//  Internacional de Metodos Numericos en Ingenieria
    3//  (CIMEC-Argentina), Universidad Nacional del Litoral
    4//  (UNL-Argentina), Consejo Nacional de Investigaciones Cientificas y
    5//  Tecnicas (UNL-Argentina).
    6//  
    7//  This program is  free software; you can redistribute  it and/or modify
    8//  it under the  terms of the GNU General Public  License as published by
    9//  the Free Software Foundation; either  version 2 of the License, or (at
   10//  your option) any later version.
   11//  
   12//  This program  is distributed in the  hope that it will  be useful, but
   13//  WITHOUT	ANY  WARRANTY;	without  even	the  implied	warranty  of
   14//  MERCHANTABILITY  or FITNESS  FOR A  PARTICULAR PURPOSE.	See  the GNU
   15//  General Public License for more details.
   16//  
   17//  You  should have received  a copy  of the  GNU General  Public License
   18//  along  with  this  program;  if	not,  write  to  the  Free  Software
   19//  Foundation, Inc.,  59 Temple Place, Suite 330,  Boston, MA 02111-1307,
   20//  USA.
   21
   22// $Id: GameOfLife.txt,v 1.4 2014/09/05 23:06:01 MarioStorti Exp $
   23#define _GNU_SOURCE
   24#include <mpi.h>
   25#include <cstdio>
   26#include <cassert>
   27#include <unistd.h>
   28#include <stdlib.h>
   29#include <ctype.h>
   30#include <cstring>
   31#include <vector>
   32#include <cmath>
   33#include <time.h>
   34#include <sys/time.h>
   35
   36// Simulates the `Game of Life' in parallel using MPI. 
   37
   38/// This represents the board. 
   39class Board {
   40private:
   41  // N is the size of the board, so that it has N x N cells NN = N+2
   42  // is the size with two rows added at each end for simulating the
   43  // walls of the board.  We use a 0-based (C-style) indices i,j for
   44  // rows and columns.  Each processor owns a range of consecutive
   45  // rowns with indices i1<= i <i2. Currently the number of rows in
   46  // each processor is almost the same (no load balance). For
   47  // instance, if N=20 and size=2, then i1=0, i2=10 in processor 0 and
   48  // i1=10, i2=20 in processor 1.  So that each processor owns i2-i1
   49  // rows. We store a board that has one additional row at each end
   50  // (so called `ghost' rows), so that in fact at each processor we
   51  // store i2-i1+2 rows.  We define I1=i1-1 and I2=i2+1 so that the
   52  // rows stored at this processor are I1 <= i < I2. Ghost rows I1 and
   53  // I2-1 are computed at processors `myrank-1' and `myrank+1'. So
   54  // that they have to be transmitted at each time step with MPI.  The
   55  // flag `periodic' indicates whether we use periodic boundary
   56  // conditions or not (not used currently though!!).
   57  int N,NN,i1,i2,I1,I2,size,myrank,periodic;
   58  // Weights of processors
   59  vector<double> w;
   60  // Number of rows in each processor
   61  vector<int> rows_proc;
   62  // This arraus store the board. `board2' is a copy of the board for
   63  // when updating the board. 
   64  char **board,*row1,*row2;
   65  // returns the position in the board of cell j,k
   66  inline char *pos(int j, int k) { return &board[(k-I1)][j+1]; }
   67  void print_row(int k);
   68public:
   69  // Read weights file from "weights.dat"
   70  void read_weights_file(int flag);
   71  // Initializes the board.
   72  void init(int N_,int periodic=0);
   73  // Sets the the state of a cell. 
   74  void setv(int j,int k,int val) {
   75	 if (k>=i1 && k<i2) *pos(j,k) = val;
   76  }
   77  // Prints the board
   78  void print();
   79  // Advances the board one generation.
   80  void advance();
   81  Board() { board = NULL; }
   82  ~Board();
   83};
   84
   85void Board::read_weights_file(int flag) {
   86  // Read processors speed from file
   87  double r;
   88  char *line=NULL;
   89  size_t n=0;
   90  w.clear();
   91  if (flag) {
   92	 // if (myrank==0) printf("Reading weights file\n");
   93	 FILE *fid = fopen("weights.dat","r");
   94	 assert(fid);
   95	 int p=0;
   96	 while (1) {
   97		int nread = getline(&line,&n,fid);
   98		if (nread == EOF) break;
   99		sscanf(line,"%lf",&r);
  100		// if (myrank==0) printf("[%d] weight %f",++p,r);
  101		w.push_back(r);
  102	 }
  103	 fclose(fid);
  104	 if (line) free(line);
  105  }
  106}
  107
  108void Board::print() {
  109  // This is done in parallel and may not work
  110  // correctly sometimes. 
  111
  112  // Loop over processors. This is synchronized by the barrier below. 
  113  for (int r=0; r<size; r++ ) {
  114	 if (r==myrank) {
  115		for (int k=i1; k<i2; k++) {
  116	char *p = pos(0,k), *pe=p+N;
  117	while (p<pe) {
  118	  putc((*p++ ? '*' : '.'),stdout);
  119	  putc(' ',stdout);
  120	}
  121	putc('\n',stdout);
  122		}
  123		fflush(stdout);
  124	 }
  125	 MPI_Barrier(MPI_COMM_WORLD);
  126	 if (myrank==0) fflush(stdout);
  127  }
  128  if (myrank==0) printf("\n");
  129}
  130
  131void Board::print_row(int k) {
  132  char *p = pos(0,k), *pe = p+N;
  133  printf("row %3d -> ",k);
  134  while (p < pe) {
  135	 putc((*p++ ? '*' : '.'),stdout);
  136	 putc(' ',stdout);
  137  }
  138  putc('\n',stdout);
  139}
  140
  141// Computes next generation 
  142void Board::advance() {
  143
  144#define SEND(row,proc)	 \
  145		MPI_Send(pos(0,(row)),N,MPI_CHAR,(proc),0,MPI_COMM_WORLD)
  146#define RECEIVE(row,proc) \
  147	 MPI_Recv(pos(0,(row)),N,MPI_CHAR,(proc),MPI_ANY_TAG,MPI_COMM_WORLD,&stat)
  148
  149  // First stage 2j -> 2j+1
  150  MPI_Status stat;
  151  if (myrank % 2 == 0 ) {
  152	 if (myrank<size-1) {
  153		SEND(i2-1,myrank+1);
  154		RECEIVE(I2-1,myrank+1);
  155	 }
  156	 if (myrank>0) {
  157		SEND(i1,myrank-1);
  158		RECEIVE(I1,myrank-1);
  159	 }
  160  } else {
  161	 RECEIVE(I1,myrank-1);
  162	 SEND(i1,myrank-1);
  163	 if (myrank<size-1) {
  164		RECEIVE(I2-1,myrank+1);
  165		SEND(i2-1,myrank+1);
  166	 }
  167  }
  168
  169  // Initialize row2
  170  { char *q = row2, *qe = q+NN;
  171  while (q<qe) *q++ = 0;
  172  }
  173
  174  // Loop over rows in this processor (not included the `ghost' ones
  175  // (I1 and I2-1)
  176  int k; char *tmp;
  177  register char SW,S,SE, W,O,E, NW,NN,NE;
  178  for (k=i1; k<i2; k++) {
  179	 char *p = pos(0,k), 
  180		*pe = p+N, 
  181		*pNE = pos(1,k+1), 
  182		*pSE = pos(1,k-1),
  183		*pp = row1+1;
  184	 SW = *(pSE-2);
  185	 S  = *(pSE-1);
  186	 SE = *(pSE  );
  187	 
  188	 W = *(p-1);
  189	 O = *(p  );
  190	 E = *(p+1);
  191
  192	 NW = *(pNE-2);
  193	 NN  = *(pNE-1);
  194	 NE = *(pNE  );
  195	 
  196	 // Make it periodic
  197	 if (periodic) {
  198		*(p-1) = *(pe-1);
  199		*pe = *p;
  200		assert(size==1); // not implemented yet
  201	 }
  202
  203	 // Cells computed in this processor in this row have 
  204	 // p <= address < pe
  205	 int alive;
  206	 while (p < pe) {
  207		// Neighbors at positions one row below
  208		alive = SW + S + SE + W + E + NW + NN + NE;
  209		
  210		if (alive==0) {		// We treat this case before, perhaps we gain
  211	*pp = 0;		// some acceleration. Cell becomes dead
  212		} else if (alive==3) {
  213	*pp = 1;		// Cell becomes alive
  214		} else if (alive!=2) {
  215	*pp = 0;		// Cell becomes dead
  216		} else *pp = *p;		// cell remains in its state
  217		p++; pSE++; pNE++; pp++;	// update positions in board to East 
  218		SW = S; S = SE ; SE = *pSE;
  219		NW = NN; NN = NE ; NE = *pNE;
  220		W = O; O = E; E = *(p+1);
  221	 }
  222	 tmp = board[k-1-I1];
  223	 board[k-1-I1] = row2;
  224	 row2 = row1;
  225	 row1 = tmp;
  226  }
  227  tmp = board[k-1-I1];
  228  board[k-1-I1] = row2;
  229  row2 = row1;
  230  row1 = tmp;
  231
  232#if 0
  233  // Swap boards
  234  char *temp = board;
  235  board = board2;
  236  board2 = temp;
  237#endif
  238}
  239
  240// Initialize board
  241void Board::init(int N_,int per=0) {
  242
  243  // size = number of processors
  244  MPI_Comm_size(MPI_COMM_WORLD,&size);
  245  // my rank in the row of processors
  246  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
  247
  248  // flags whther periodic b.c.'s are used or not
  249  periodic = per;
  250  // size of board
  251  N = N_;
  252
  253  if (w.size()>0 && w.size()<size ) {
  254	 if (myrank==0) printf("number of weights in \"weights\" "
  255			  "file doesn't match number of procs.\n");
  256	 MPI_Finalize();
  257	 exit(1);
  258  } 
  259  if (w.size()==0) w.resize(size,1.);
  260  rows_proc.resize(size);
  261  double sum_w = 0., carry=0., a;
  262  for (int p=0; p<size; p++) sum_w += w[p];
  263  int j = 0;
  264  double tol = 1e-8;
  265  for (int p=0; p<size; p++) {
  266	 w[p] /= sum_w;
  267	 a = w[p]*N + carry + tol;
  268	 rows_proc[p] = int(floor(a));
  269	 carry = a - rows_proc[p] - tol;
  270	 if (p==myrank) {
  271		i1 = j;
  272		i2 = i1 + rows_proc[p];
  273	 }
  274	 j += rows_proc[p];
  275  }
  276  assert(j==N);
  277  assert(fabs(carry) < tol);
  278  
  279  I1=i1-1;
  280  I2=i2+1;
  281
  282  // Alloc memory
  283  NN = N+2;			// number of stored rows (include ghosts)
  284  board  = new (char *)[I2-I1]; // allocate memory for boards
  285  for (int k=0; k<I2-I1; k++) board[k] = new char[NN];
  286  row1 = new char[NN];
  287  row2 = new char[NN];
  288  
  289  // Initialize board
  290  for (int k=I1; k<I2; k++) {
  291	 char *p = pos(-1,k), *pe = p+N+2;
  292	 while (p < pe) *p++ = 0;
  293  }
  294}
  295
  296// Destructor
  297Board::~Board() {
  298  for (int k=0; k<I2-I1; k++) 
  299	 delete[] board[k];
  300  delete[] board;
  301  board=NULL;
  302  delete[] row1;
  303  row1 = NULL;
  304  delete[] row2;
  305  row2 = NULL;
  306}
  307
  308double etime(timeval &x,timeval &y) {
  309  return double(y.tv_sec)-double(x.tv_sec)
  310	 +(double(y.tv_usec)-double(x.tv_usec))/1e6;
  311}
  312
  313// Main program
  314int main(int argc,char **args) {
  315  int size,myrank;
  316  char *pattern="cross";;
  317
  318  // Initializes MPI
  319  MPI_Init(&argc,&args);
  320  // size = number of processors
  321  MPI_Comm_size(MPI_COMM_WORLD,&size);
  322  // my rank in the row of processors
  323  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
  324
  325  int N=20,nstep=30,opt_q=0,opt_v=0,opt_w=0;
  326  char c;
  327
  328  while ((c = getopt(argc, args, "N:s:hqp:vw")) != -1) {
  329	 switch (c) {
  330	 case 'h':
  331		if (myrank==0) {
  332	printf(" usage: $ mpirun [OPTIONS] life\n\n"
  333			 "MPI options: -np <number-of-processors>\n"
  334			 "				 -machinefile <machine-file>\n\n"
  335			 "Other options: -N <number of columns/rows>\n"
  336			 "					-s <numer of time steps>\n"
  337			 "					-q				 # quiet (no output)\n"
  338			 "					-v				 # verbose (print board at each step)\n"
  339			 "					-p <pattern>	# may be ('cross',line10)\n"
  340			 "					-w				 # read weights from \"weigths.dat\"\n"
  341			 "					-h				 # print this help\n"
  342			 );
  343		}
  344		MPI_Finalize();
  345	 case 'N':
  346		sscanf(optarg,"%d",&N);
  347		break;
  348	 case 'w':
  349		opt_w = 1;
  350		break;
  351	 case 's':
  352		sscanf(optarg,"%d",&nstep);
  353		break;
  354	 case 'q':
  355		opt_q = 1;
  356		break;
  357	 case 'v':
  358		opt_v = 1;
  359		break;
  360	 case 'p':
  361		pattern = new char[strlen(optarg+1)];
  362		strcpy(pattern,optarg);
  363		break;
  364	 case '?':
  365		if (isprint (optopt))
  366	fprintf (stderr, "Unknown option `-%c'.\n", optopt);
  367		else
  368	fprintf (stderr,
  369		 "Unknown option character `\\x%x'.\n",
  370		 optopt);
  371		return 1;
  372	 default:
  373		if (isprint (optopt))
  374	fprintf (stderr, "Unknown option `-%c'.\n", optopt);
  375		else
  376	fprintf (stderr,
  377		 "Unknown option character `\\x%x'.\n",
  378		 optopt);
  379		abort ();
  380	 }
  381  }
  382
  383  /// time related quantities
  384  struct timeval start, end;
  385
  386  // Defines and initializes the board
  387  Board board;
  388  board.read_weights_file(opt_w);
  389  board.init(N);
  390
  391  if (!strcmp(pattern,"cross")) {
  392	 for (int k=0; k<N; k++) {
  393		board.setv(N/2,k,1);
  394		board.setv(k,N/2,1);
  395	 }
  396  } else if (!strcmp(pattern,"line10")) {
  397	 assert(N>20);
  398	 for (int k=-5; k<5; k++) board.setv(N/2,N/2+k,1);
  399  } else if (!strcmp(pattern,"block")) {
  400	 assert(N>3);
  401	 board.setv(N/2  ,N/2  ,1);
  402	 board.setv(N/2  ,N/2-1,1);
  403	 board.setv(N/2-1,N/2  ,1);
  404	 board.setv(N/2-1,N/2-1,1);
  405  } else {
  406	 if (myrank==0) 
  407		printf("Not known pattern \"%s\"\n",pattern);
  408	 MPI_Finalize();
  409	 exit(0);
  410  }
  411
  412  if (!opt_q) board.print();
  413  gettimeofday (&start,NULL);
  414  for (int k=0; k<nstep; k++) {
  415	 board.advance();
  416	 if (opt_v) {
  417		if (myrank==0) printf("-- Generation %d:\n",k+1); 
  418		board.print();
  419	 }
  420  }
  421  if (myrank==0) {
  422	 gettimeofday (&end,NULL);
  423	 double elapsed = etime(start,end);
  424	 double rate = double(N)*double(N)*double(nstep)/elapsed/1e6;
  425	 printf("N %d, steps %d, elapsed %f, rate: %f Mcell/sec\n",
  426		N,nstep,elapsed,rate);
  427  }
  428
  429  gettimeofday (&end,NULL);
  430  if (!opt_q && !opt_v) {
  431	 if (myrank==0) printf("-- Final board:\n"); 
  432	 board.print();
  433  }
  434  MPI_Finalize();
  435}

-- MarioStorti - 03 May 2002

Topic revision: r4 - 2014-09-05 - 23:06:01 - MarioStorti
 

TWIKI.NET
This site is powered by the TWiki collaboration platformCopyright � by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback