#include "includes.h" #include //#define COM_BIT 0x40000000 //now defined in macros.h int global_com_bit=COM_BIT; /* Global variables */ int totnodes[4]; /* number of nodes in machine directions */ int Mynode[4], node_parity; int **neighbor; int offnode_even[8]; /* # of even sites that have off-node neighbors in a dir */ int offnode_odd[8]; /* # of odd sites that have off-node neighbors in a dir */ MPI_Comm comm_grid, comm_subgrid[4]; /* grid communicators */ /* print on 0 node only */ void node0_printf( const char *fmt, ... ) { va_list argp; if( this_node == 0 ) { va_start( argp, fmt ); vprintf( fmt, argp ); va_end( argp ); } fflush( 0 ); MPI_Barrier( MPI_COMM_WORLD ); } void node0_fprintf( FILE * file, const char *fmt, ... ) { va_list argp; if( this_node == 0 ) { va_start( argp, fmt ); vfprintf( file, fmt, argp ); va_end( argp ); } fflush( 0 ); MPI_Barrier( MPI_COMM_WORLD ); } void verbose_fprintf( FILE * file, const char *fmt, ... ) { va_list argp; if( verbose && this_node == 0 ) { va_start( argp, fmt ); vfprintf( file, fmt, argp ); va_end( argp ); } fflush( 0 ); MPI_Barrier( MPI_COMM_WORLD ); } /* JuBE no args needed */ void initialize_machine_KE() { int free_coords[4], wrap_around[4]; MPI_Comm_size(MPI_COMM_WORLD, &number_of_nodes); MPI_Comm_rank(MPI_COMM_WORLD, &this_node); /* get the totnodes[dir] from parameters file */ FILE *fpar; /* JuBE set para file to kernel_E.input*/ if( ( fpar = fopen( "kernel_E.input", "r" ) ) == 0 && this_node == 0 ) { printf( "ERROR initialize_machine: missing parameter file\n" ); fflush(0); exit( 1 ); } if (get_totnodes( fpar, "totnodes" ) && this_node==0) { printf( "ERROR initialize_machine: missing totnodes\n" ); fflush(0); exit(1); } if (totnodes[XUP]*totnodes[YUP]*totnodes[ZUP]*totnodes[TUP]!=number_of_nodes && this_node==0) { printf( "ERROR initialize_machine: bad total number of nodes\n" ); fflush(0); exit(1); } fclose(fpar); /* Cartesian grid */ wrap_around[0] = 1; wrap_around[1] = 1; wrap_around[2] = 1; wrap_around[3] = 1; MPI_Cart_create(MPI_COMM_WORLD, 4, totnodes, wrap_around, 1, &comm_grid); /* new coordinates */ MPI_Comm_rank(comm_grid, &this_node); MPI_Cart_coords(comm_grid, this_node, 4,Mynode); node_parity = ( Mynode[XUP] + Mynode[YUP] + Mynode[ZUP] + Mynode[TUP] ) % 2; node0_printf( "initialize_machine: topology: %dx%dx%dx%d, mynode: %d,%d,%d,%d, numnodes: %d\n", totnodes[XUP], totnodes[YUP], totnodes[ZUP], totnodes[TUP], Mynode[XUP], Mynode[YUP], Mynode[ZUP], Mynode[TUP], numnodes_KE( ) ); /* set up communicators */ free_coords[XUP] = 1; free_coords[YUP] = 0; free_coords[ZUP] = 0; free_coords[TUP] = 0; MPI_Cart_sub(comm_grid, free_coords,&comm_subgrid[XUP]); free_coords[XUP] = 0; free_coords[YUP] = 1; free_coords[ZUP] = 0; free_coords[TUP] = 0; MPI_Cart_sub(comm_grid, free_coords,&comm_subgrid[YUP]); free_coords[XUP] = 0; free_coords[YUP] = 0; free_coords[ZUP] = 1; free_coords[TUP] = 0; MPI_Cart_sub(comm_grid, free_coords,&comm_subgrid[ZUP]); free_coords[XUP] = 0; free_coords[YUP] = 0; free_coords[ZUP] = 0; free_coords[TUP] = 1; MPI_Cart_sub(comm_grid, free_coords,&comm_subgrid[TUP]); } static char name[] = "Generic communication"; char *machine_type_KE( ) { return ( name ); } int mynode_KE( ) { return Mynode[TUP]+totnodes[TUP]*Mynode[ZUP]+ totnodes[TUP]*totnodes[ZUP]*Mynode[YUP]+ totnodes[TUP]*totnodes[ZUP]*totnodes[YUP]*Mynode[XUP]; } void mynode4( int *n_x, int *n_y, int *n_z, int *n_t ) { *n_x = Mynode[XUP]; *n_y = Mynode[YUP]; *n_z = Mynode[ZUP]; *n_t = Mynode[TUP]; } int numnodes_KE( ) { return ( totnodes[XUP] * totnodes[YUP] * totnodes[ZUP] * totnodes[TUP] ); } void numnodes4( int *n_x, int *n_y, int *n_z, int *n_t ) { *n_x = totnodes[XUP]; *n_y = totnodes[YUP]; *n_z = totnodes[ZUP]; *n_t = totnodes[TUP]; } void gen_send_recv( int dir, char *sbuf, char *rbuf, int size ) { MPI_Status status; int source; int dest; if (dir<0 || dir>7) { node0_fprintf(file_o1, "ERROR gen_send_recv: Bad direction %d\n",dir); exit(1); } if (dir<4) { /* positive direction */ source=(Mynode[dir]+1)%totnodes[dir]; dest=(Mynode[dir]-1+totnodes[dir])%totnodes[dir]; } else { /* negative direction */ dir=OPP_DIR(dir); dest=(Mynode[dir]+1)%totnodes[dir]; source=(Mynode[dir]-1+totnodes[dir])%totnodes[dir]; } MPI_Sendrecv(sbuf,size,MPI_BYTE,dest,0, rbuf,size,MPI_BYTE,source,0, comm_subgrid[dir],&status); } void make_nn_gathers( ) { int x, y, z, t, xp, yp, zp, tp, xm, ym, zm, tm; int i, ixp, ixm, iyp, iym, izp, izm, itp, itm, p; MEMALIGN(neighbor, int *, 8 ); for ( i = 0; i < 8; i++ ) MEMALIGN(neighbor[i],int, sites_on_node); /* neighbor = malloc( 8 * sizeof( int * ) ); */ /* for ( i = 0; i < 8; i++ ) */ /* { */ /* neighbor[i] = malloc( sites_on_node * sizeof( int ) ); */ /* } */ for ( i = 0; i < 8; i++ ) { offnode_even[i] = offnode_odd[i] = 0; } for ( x = 0; x < nx; x++ ) for ( y = 0; y < ny; y++ ) for ( z = 0; z < nz; z++ ) for ( t = 0; t < nt; t++ ) if( node_number_KE( x, y, z, t ) == mynode_KE( ) ) { i = node_index_KE( x, y, z, t ); p = lattice[i].parity; xp = ( x + 1 ) % nx; xm = ( x - 1 + nx ) % nx; yp = ( y + 1 ) % ny; ym = ( y - 1 + ny ) % ny; zp = ( z + 1 ) % nz; zm = ( z - 1 + nz ) % nz; tp = ( t + 1 ) % nt; tm = ( t - 1 + nt ) % nt; ixp = node_index_KE( xp, y, z, t ); ixm = node_index_KE( xm, y, z, t ); iyp = node_index_KE( x, yp, z, t ); iym = node_index_KE( x, ym, z, t ); izp = node_index_KE( x, y, zp, t ); izm = node_index_KE( x, y, zm, t ); itp = node_index_KE( x, y, z, tp ); itm = node_index_KE( x, y, z, tm ); if( node_number_KE( xp, y, z, t ) == mynode_KE( ) ) { neighbor[XUP][i] = ixp; } else { neighbor[XUP][i] = ixp + COM_BIT; if( p == EVEN ) { offnode_even[XUP]++; } else offnode_odd[XUP]++; } if( node_number_KE( xm, y, z, t ) == mynode_KE( ) ) { neighbor[XDOWN][i] = ixm; } else { neighbor[XDOWN][i] = ixm + COM_BIT; if( p == EVEN ) { offnode_even[XDOWN]++; } else offnode_odd[XDOWN]++; } if( node_number_KE( x, yp, z, t ) == mynode_KE( ) ) { neighbor[YUP][i] = iyp; } else { neighbor[YUP][i] = iyp + COM_BIT; if( p == EVEN ) { offnode_even[YUP]++; } else offnode_odd[YUP]++; } if( node_number_KE( x, ym, z, t ) == mynode_KE( ) ) { neighbor[YDOWN][i] = iym; } else { neighbor[YDOWN][i] = iym + COM_BIT; if( p == EVEN ) { offnode_even[YDOWN]++; } else offnode_odd[YDOWN]++; } if( node_number_KE( x, y, zp, t ) == mynode_KE( ) ) { neighbor[ZUP][i] = izp; } else { neighbor[ZUP][i] = izp + COM_BIT; if( p == EVEN ) { offnode_even[ZUP]++; } else offnode_odd[ZUP]++; } if( node_number_KE( x, y, zm, t ) == mynode_KE( ) ) { neighbor[ZDOWN][i] = izm; } else { neighbor[ZDOWN][i] = izm + COM_BIT; if( p == EVEN ) { offnode_even[ZDOWN]++; } else offnode_odd[ZDOWN]++; } if( node_number_KE( x, y, z, tp ) == mynode_KE( ) ) { neighbor[TUP][i] = itp; } else { neighbor[TUP][i] = itp + COM_BIT; if( p == EVEN ) { offnode_even[TUP]++; } else offnode_odd[TUP]++; } if( node_number_KE( x, y, z, tm ) == mynode_KE( ) ) { neighbor[TDOWN][i] = itm; } else { neighbor[TDOWN][i] = itm + COM_BIT; if( p == EVEN ) { offnode_even[TDOWN]++; } else offnode_odd[TDOWN]++; } } } /*}}} */ msg_tag *start_gather_from_temp( /* arguments */ void *field, /* which field? Pointer to the field array */ int size, /* size in bytes of the field */ int dist, /* distance of elements in the array */ int dir, /* direction to gather from. eg XUP - index into neighbor tables */ int parity, /* parity of sites whose neighbors we gather. one of EVEN, ODD or EVENANDODD. */ char **dest ) /* one of the vectors of pointers */ { int i, n, in, n_off; site *st; msg_tag *mbuf; mbuf = 0; switch ( parity ) { case EVEN: n_off = offnode_even[dir]; break; case ODD: n_off = offnode_odd[dir]; break; case EVENANDODD: n_off = offnode_even[dir] + offnode_odd[dir]; break; default: printf( "ERROR start_gather_from_temp: Wrong parity\n" ); return ( 0 ); } //printf("n_off=%d\n",n_off); if( n_off > 0 ) { // Communication is needed MEMALIGN(mbuf, msg_tag, 1 ); /* mbuf = ( msg_tag * ) malloc( sizeof( msg_tag ) ); */ mbuf->size = size; MEMALIGN(mbuf->rbuf, char, size*n_off); MEMALIGN(mbuf->sbuf, char, size*n_off); MEMALIGN(mbuf->ptr, char *, n_off ); /* mbuf->rbuf = memalign( 16, size * n_off ); */ /* mbuf->sbuf = memalign( 16, size * n_off ); */ /* mbuf->ptr = malloc( sizeof( char * ) * n_off ); */ mbuf->done = 0; mbuf->n = n_off; } n = 0; //printf("dir=%d,ngb=%d \n", dir,neighbor[dir][0]); FORSOMEPARITY( i, st, parity ) { //AG: off node neighbours labeled by an integer greater than COM_BIT (idx + COM_BIT) if( ( in = neighbor[dir][i] ) < COM_BIT ) { //AG: point dest[i] to multi-valued site associated with (on-node) neighbour dest[i] = ( char * ) field + dist * in; } else { //AG: point dest[i] to multi-valued site associated with (off-node) neighbour //AG: which exists in recieve buffer dest[i] = mbuf->rbuf + n * size; //AG prepare to create send buffer. Set pointer to opposite value to be sent other way mbuf->ptr[n] = ( char * ) field + dist * ( in - COM_BIT ); //AG: and copy multi-valued data to send buffer memcpy( mbuf->sbuf + n * size, mbuf->ptr[n], size ); n++; } } /* node0_fprintf(stderr,"No of off points %d\n",n_off); */ if( n_off > 0 ) { if( dir > 7 ) dir -= 8; gen_send_recv( dir, mbuf->sbuf, mbuf->rbuf , ( mbuf->size ) * ( mbuf->n ) ); } return ( mbuf ); } void restart_gather_from_temp( /* arguments */ void *field, /* which field? Some member of structure "site" */ int size, /* size in bytes of the field */ int dist, /* distance of elements in the array */ int dir, /* direction to gather from. eg XUP - index into neighbor tables */ int parity, /* parity of sites whose neighbors we gather. one of EVEN, ODD or EVENANDODD. */ char **dest, /* one of the vectors of pointers */ msg_tag * mbuf ) /* previously returned by start_gather */ { int i; if( mbuf == 0 ) return; if( mbuf->done != 1 ) { node0_printf( "ERROR restart_gather:previos gather was not waited for\n" ); return; } mbuf->done = 0; for ( i = 0; i < mbuf->n; i++ ) { memcpy( mbuf->sbuf + i * ( mbuf->size ), mbuf->ptr[i], mbuf->size ); } if( dir > 7 ) dir -= 8; gen_send_recv( dir, mbuf->sbuf , mbuf->rbuf , ( mbuf->size ) * ( mbuf->n ) ); } msg_tag *start_general_gather_from_temp( /* arguments */ void *field, /* which field? Pointer to the field array */ int size, /* size in bytes of the field (eg sizeof(su3_vector)) */ int dist, /* distance of elements in the array */ int *dst, /* direction to gather from. eg XUP - index into neighbor tables */ int parity, /* parity of sites whose neighbors we gather. one of EVEN, ODD or EVENANDODD. */ char **dest ) /* one of the vectors of pointers */ { char *buf2; char **ptr; int i, d, dir, nd, n; site *st; msg_tag *tg; msg_tag *mbuf; MEMALIGN(mbuf, msg_tag, 1) ; mbuf->size=size; mbuf->n=sites_on_node; MEMALIGN(mbuf->rbuf, char, size*sites_on_node); /* mbuf = ( msg_tag * ) malloc( sizeof( msg_tag ) ); */ /* mbuf->rbuf = memalign( 16, size * sites_on_node ); */ MEMALIGN(ptr, char *, sites_on_node); MEMALIGN(buf2, char, size*sites_on_node); /* ptr = malloc( sites_on_node * sizeof( char * ) ); */ /* buf2 = memalign( 16, size * sites_on_node ); */ FORALLSITES( i, st ) { memcpy( mbuf->rbuf + i * size, ( char * ) field + i * dist, size ); } for ( d = 0; d < 4; d++ ) { dir = d; nd = dst[d]; if( nd < 0 ) { dir = 7 - d; nd = -nd; } for ( n = 0; n < nd; n++ ) { tg = start_gather_from_temp( mbuf->rbuf, size, size, dir, EVENANDODD, ptr ); wait_gather_KE( tg ); // copy to buf2 FORALLSITES( i, st ) { memcpy( buf2 + i * size, ptr[i], size ); } // copy back to buf1 memcpy( mbuf->rbuf, buf2, size * sites_on_node ); cleanup_gather( tg ); } } FORSOMEPARITY( i, st, parity ) { dest[i] = mbuf->rbuf + i * size; } FREE(ptr, char *, sites_on_node); FREE(buf2, char, size*sites_on_node); return mbuf; } void wait_gather_KE( msg_tag * mbuf ) { if( mbuf == 0 ) return; mbuf->done = 1; return; } void wait_general_gather( msg_tag * mbuf ) { } void cleanup_gather( msg_tag * mbuf ) { if( mbuf != 0 ) { FREE(mbuf->rbuf, char, (mbuf->size)*(mbuf->n)); FREE(mbuf->sbuf, char, (mbuf->size)*(mbuf->n)); FREE(mbuf->ptr, char *, mbuf->n ); FREE(mbuf, msg_tag, 1 ); } } void cleanup_general_gather( msg_tag * mbuf ) { if( mbuf != 0 ) { FREE(mbuf->rbuf, char, (mbuf->size)*(mbuf->n)); FREE(mbuf, msg_tag, 1 ); } } /* Synchronize all nodes by sending one double to each directions */ void g_sync_KE( ) { MPI_Barrier( MPI_COMM_WORLD ); } void g_doublesum_KE( double *dpt ) { double work; MPI_Allreduce( dpt, &work, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); *dpt = work; } void g_doublemax_KE( double *dpt ) { double work; MPI_Allreduce( dpt, &work, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD ); *dpt = work; } void broadcast_double_KE( double *dpt ) { MPI_Bcast( dpt, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD ); } void broadcast_char( char *buf, int size ) { MPI_Bcast( buf, size, MPI_BYTE, 0, MPI_COMM_WORLD ); } void broadcast_int_KE( int *buf ) { MPI_Bcast( buf, 1, MPI_INTEGER, 0, MPI_COMM_WORLD ); } /* Double precision time */ double dclock( ) { return MPI_Wtime(); } /* version of exit for multinode processes -- kill all nodes */ void terminate_KE( int status ) { printf( "terminate: node %d, status = %d\n", this_node, status ); exit( status ); }