[CSC 435] Some Files...

Andrew J. Pounds pounds_aj at mercer.edu
Sat Mar 26 13:29:57 EDT 2016


Guys -- I know I gave you a hardcopy of this in class, but I am 
attaching my OpenMP version of matrix multiplication and the makefile I 
used (so you can see how to compile it).    I will do the same for 
pthreads.   I recommend copying this to your openmp branch of your 
babyblas repository and trying to run it in parallel (if you haven't 
gotten anything to run in parallel yet).

Also, if you haven't figured it out yet, your HPC for S&E book is a 
pretty good OpenMP reference!



-- 
Andrew J. Pounds, Ph.D.  (pounds_aj at mercer.edu)
Professor of Chemistry and Computer Science
Mercer University,  Macon, GA 31207   (478) 301-5627
http://faculty.mercer.edu/pounds_aj

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://theochem.mercer.edu/pipermail/csc435/attachments/20160326/a27ad128/attachment.html>
-------------- next part --------------
#ifdef __cplusplus
extern "C" {
#endif
    void mmm_( int *threads, int *len,  double *a, double *b, double*c );
#ifdef __cplusplus
    }
#endif

#ifdef OPENMP 

/*   O P E N M P   S E C T I O N  */

void mmm_( int *threads, int *len,  double *a, double *b, double *c ){

int i, j, k;
int veclen = *len;
double sum;

omp_set_num_threads(*threads);

#pragma omp parallel shared(veclen,a,b,c) private(i,j,k) reduction(+ : sum)  
{
#pragma omp for
for (i=0; i<veclen; i++) {
    for (j=0; j<veclen; j++) {
       sum = 0.0;
       for (k=0;k<veclen;k++){
          sum += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
       }
       *(c+(i*veclen+j)) = sum;

       }
    }
  }
} /* End Parallel Region */

#else

/*  S E R I A L   C O D E  */

void mmm_( int *threads, int *len,  double *a, double *b, double *c ){

    int i, j, k;
    int veclen = *len;
    int mod;

#ifdef STRIP8
    const int stride = 8;

    mod = veclen % stride;

    for (i=0; i<veclen; i++) {
        for (j=0; j<veclen; j++) {
            *(c+(i*veclen+j)) = 0.0;
            for (k=0;k<mod;k++){
                *(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j)); 
            }
            for (k=mod;k<veclen;k+=stride) {
                *(c+(i*veclen+j)) += *(a+(i*veclen+k  )) * *(b+( k   *veclen+j)) 
                                   + *(a+(i*veclen+k+1)) * *(b+((k+1)*veclen+j)) 
                                   + *(a+(i*veclen+k+2)) * *(b+((k+2)*veclen+j)) 
                                   + *(a+(i*veclen+k+3)) * *(b+((k+3)*veclen+j)) 
                                   + *(a+(i*veclen+k+4)) * *(b+((k+4)*veclen+j)) 
                                   + *(a+(i*veclen+k+5)) * *(b+((k+5)*veclen+j)) 
                                   + *(a+(i*veclen+k+6)) * *(b+((k+6)*veclen+j)) 
                                   + *(a+(i*veclen+k+7)) * *(b+((k+7)*veclen+j)); 
            }
        }
    }
#elif STRIP4
const int stride = 4;

mod = veclen % stride;

for (i=0; i<veclen; i++) {
    for (j=0; j<veclen; j++) {
       *(c+(i*veclen+j)) = 0.0;
       for (k=0;k<mod;k++){
          *(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j)); 
       }
       for (k=mod;k<veclen;k+=stride) {
          *(c+(i*veclen+j)) += *(a+(i*veclen+k  )) * *(b+( k   *veclen+j)) 
                             + *(a+(i*veclen+k+1)) * *(b+((k+1)*veclen+j)) 
                             + *(a+(i*veclen+k+2)) * *(b+((k+2)*veclen+j)) 
                             + *(a+(i*veclen+k+3)) * *(b+((k+3)*veclen+j)); 
       }
       }
    }

#else

// Normal Matrix Multiplication

    for (i=0; i<veclen; i++) {
        for (j=0; j<veclen; j++) {
            *(c+(i*veclen+j)) = 0.0;
            for (k=0;k<veclen;k++){
                *(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j)); 
            }
        }
    }
#endif
}
#endif 

-------------- next part --------------
# Makefile to build Program 
#
# Andrew J. Pounds, Ph.D.
# Departments of Chemistry and Computer Science
# Mercer University
# Fall 2011 
#

F95 = gfortran   
CC = gcc 

debug ?= n
ifeq ($(debug), y)
    CFLAGS += -g -DDEBUG
else
    CFLAGS += -O3 
endif

OPEN_MP_FLAGS = -DOPENMP -fopenmp

ATLASLIBS = -L/usr/lib64/atlas -lblas -llapack -lf77blas -lcblas -latlas
OPEN_MP_LIB = -lgomp

OBJS = array.o zeromat.o walltime.o cputime.o mmm.o  \
       vvm.o 

all: driver atlasdriver 

atlasdriver : atlasdriver.o $(OBJS)    
	$(F95) -o atlasdriver atlasdriver.o $(OBJS) $(ATLASLIBS) $(OPEN_MP_LIB) 

atlasdriver.o : atlasdriver.f90 array.o   
	$(F95) $(FFLAGS) -c atlasdriver.f90  

driver : driver.o $(OBJS)    
	$(F95) -o driver driver.o $(OBJS)  $(OPEN_MP_LIB) 

driver.o : driver.f90 array.o   
	$(F95) $(FFLAGS) -c driver.f90  

zeromat.o : zeromat.f90    
	$(F95) $(FFLAGS)  -c zeromat.f90  

array.o : array.f90
	$(F95) -c array.f90

mmm.o : mmm.c
	$(CC) $(CFLAGS) $(COPTFLAGS) $(OPEN_MP_FLAGS)  -c mmm.c

vvm.o : vvm.c
	$(CC) $(CFLAGS) -c vvm.c

# Timing Library targets 

walltime.o : walltime.c
	$(CC)  -c walltime.c

cputime.o : cputime.c
	$(CC)  -c cputime.c

lib: cputime.o walltime.o
	ar -rc liblbstime.a cputime.o walltime.o
	ranlib liblbstime.a

# Default Targets for Cleaning up the Environment
clean :
	rm *.o

pristine :
	rm *.o
	touch *.c *.f90 
	rm *.mod
	rm driver atlasdriver

ctags :
	ctags *.f90 *.c


More information about the csc435 mailing list