[CSC 435] Some Files...
Andrew J. Pounds
pounds_aj at mercer.edu
Sat Mar 26 13:29:57 EDT 2016
Guys -- I know I gave you a hardcopy of this in class, but I am
attaching my OpenMP version of matrix multiplication and the makefile I
used (so you can see how to compile it). I will do the same for
pthreads. I recommend copying this to your openmp branch of your
babyblas repository and trying to run it in parallel (if you haven't
gotten anything to run in parallel yet).
Also, if you haven't figured it out yet, your HPC for S&E book is a
pretty good OpenMP reference!
--
Andrew J. Pounds, Ph.D. (pounds_aj at mercer.edu)
Professor of Chemistry and Computer Science
Mercer University, Macon, GA 31207 (478) 301-5627
http://faculty.mercer.edu/pounds_aj
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://theochem.mercer.edu/pipermail/csc435/attachments/20160326/a27ad128/attachment.html>
-------------- next part --------------
#ifdef __cplusplus
extern "C" {
#endif
void mmm_( int *threads, int *len, double *a, double *b, double*c );
#ifdef __cplusplus
}
#endif
#ifdef OPENMP
/* O P E N M P S E C T I O N */
void mmm_( int *threads, int *len, double *a, double *b, double *c ){
int i, j, k;
int veclen = *len;
double sum;
omp_set_num_threads(*threads);
#pragma omp parallel shared(veclen,a,b,c) private(i,j,k) reduction(+ : sum)
{
#pragma omp for
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
sum = 0.0;
for (k=0;k<veclen;k++){
sum += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
*(c+(i*veclen+j)) = sum;
}
}
}
} /* End Parallel Region */
#else
/* S E R I A L C O D E */
void mmm_( int *threads, int *len, double *a, double *b, double *c ){
int i, j, k;
int veclen = *len;
int mod;
#ifdef STRIP8
const int stride = 8;
mod = veclen % stride;
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
*(c+(i*veclen+j)) = 0.0;
for (k=0;k<mod;k++){
*(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
for (k=mod;k<veclen;k+=stride) {
*(c+(i*veclen+j)) += *(a+(i*veclen+k )) * *(b+( k *veclen+j))
+ *(a+(i*veclen+k+1)) * *(b+((k+1)*veclen+j))
+ *(a+(i*veclen+k+2)) * *(b+((k+2)*veclen+j))
+ *(a+(i*veclen+k+3)) * *(b+((k+3)*veclen+j))
+ *(a+(i*veclen+k+4)) * *(b+((k+4)*veclen+j))
+ *(a+(i*veclen+k+5)) * *(b+((k+5)*veclen+j))
+ *(a+(i*veclen+k+6)) * *(b+((k+6)*veclen+j))
+ *(a+(i*veclen+k+7)) * *(b+((k+7)*veclen+j));
}
}
}
#elif STRIP4
const int stride = 4;
mod = veclen % stride;
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
*(c+(i*veclen+j)) = 0.0;
for (k=0;k<mod;k++){
*(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
for (k=mod;k<veclen;k+=stride) {
*(c+(i*veclen+j)) += *(a+(i*veclen+k )) * *(b+( k *veclen+j))
+ *(a+(i*veclen+k+1)) * *(b+((k+1)*veclen+j))
+ *(a+(i*veclen+k+2)) * *(b+((k+2)*veclen+j))
+ *(a+(i*veclen+k+3)) * *(b+((k+3)*veclen+j));
}
}
}
#else
// Normal Matrix Multiplication
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
*(c+(i*veclen+j)) = 0.0;
for (k=0;k<veclen;k++){
*(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
}
}
#endif
}
#endif
-------------- next part --------------
# Makefile to build Program
#
# Andrew J. Pounds, Ph.D.
# Departments of Chemistry and Computer Science
# Mercer University
# Fall 2011
#
F95 = gfortran
CC = gcc
debug ?= n
ifeq ($(debug), y)
CFLAGS += -g -DDEBUG
else
CFLAGS += -O3
endif
OPEN_MP_FLAGS = -DOPENMP -fopenmp
ATLASLIBS = -L/usr/lib64/atlas -lblas -llapack -lf77blas -lcblas -latlas
OPEN_MP_LIB = -lgomp
OBJS = array.o zeromat.o walltime.o cputime.o mmm.o \
vvm.o
all: driver atlasdriver
atlasdriver : atlasdriver.o $(OBJS)
$(F95) -o atlasdriver atlasdriver.o $(OBJS) $(ATLASLIBS) $(OPEN_MP_LIB)
atlasdriver.o : atlasdriver.f90 array.o
$(F95) $(FFLAGS) -c atlasdriver.f90
driver : driver.o $(OBJS)
$(F95) -o driver driver.o $(OBJS) $(OPEN_MP_LIB)
driver.o : driver.f90 array.o
$(F95) $(FFLAGS) -c driver.f90
zeromat.o : zeromat.f90
$(F95) $(FFLAGS) -c zeromat.f90
array.o : array.f90
$(F95) -c array.f90
mmm.o : mmm.c
$(CC) $(CFLAGS) $(COPTFLAGS) $(OPEN_MP_FLAGS) -c mmm.c
vvm.o : vvm.c
$(CC) $(CFLAGS) -c vvm.c
# Timing Library targets
walltime.o : walltime.c
$(CC) -c walltime.c
cputime.o : cputime.c
$(CC) -c cputime.c
lib: cputime.o walltime.o
ar -rc liblbstime.a cputime.o walltime.o
ranlib liblbstime.a
# Default Targets for Cleaning up the Environment
clean :
rm *.o
pristine :
rm *.o
touch *.c *.f90
rm *.mod
rm driver atlasdriver
ctags :
ctags *.f90 *.c
More information about the csc435
mailing list