# run using # poe -procs 4 # can also use options # -css_interrupt {yes | no} (default no) # -eager_limit size_limit (is default $MP_EAGER_LIMIT above # above this limit use rendezvous protocol) # -buffer_mem memsize (is default 2.8megs for IP 64meg US) # -single_thread {no | yes} ? (default is no which means a # it assumes multiple message passing threads and # adds lock overhead. If no thread, then say 'yes' # can use cpus_per_node and nodes together instead of -procs # -cpu_use numcpuspernode (default is $MP_TASKS_PER_NODE) # -nodes numnodesused # -euidevice {en0 | fi0 | tr0 | css0} (default in $MP_EUIDEVICE) # -euilib {ip | us} (default in $MP_EUILIB) CC = mpcc CXX = mpCC CFLAGS = -O3 -qarch=pwr3 -qansialias -qlist -qsource -DSP2 -DNITER=3000 MPIHOME = /usr/local MPIINC = -I${MPIHOME} MPILIB = -L${MPIHOME} -lm FLEXIO = /u4/jshalf FLEXINC = -I$(FLEXIO)/include FLEXLIB = -L$(FLEXIO)/lib -lieeeio # SerialTest : Test the WaveToy stuff # ParTest : Just test the domain decomp machinery # MPItest : test simplest MPI comm # PthreadTest : test cube domain decomposition with pthreads # PthreadSlice : test slice domain decomp with pthreads MAIN = MPIperfExchange # MAIN = PthreadTest LD = -lpthread include Makefile.rules # Tests: (prelim) MPIperf (regular MPI isend) ##################################################### # 10k iter # 8 procs: 46:8203 2 procs: 36.073 # 3000 iter # 8 procs: 13.752 2 procs: 9.91353 # cssinterrupt # 8 procs: 15.5324 # 2 procs: 10.0334 # single_thread yes # 8 procs: 14.1984 # 2 procs: 9.86464 # eagerlimit 0 # 8 procs: 17.8428 # eagerlimit 20000 (couldn't push it futher) # 8 procs: 15.089 seconds # euilib ip # 8 procs: 26.5896 # 2 procs: 17.8684 # buffer mem 3000000 # 8 procs: 13.842 # buffer mem 30000000 # 8 procs: 13.4472