[CSC 435] CUDA Device Checking

Thu Mar 20 11:57:53 EDT 2014

Gentlemen --- here is some code that will check to see if there is 
memory available on the CUDA card before moving data to the device. Had 
to do this last light for my research codes and thought it would be 
beneficial for you guys as well -- and I think it will answer some of 
the questions that Steve and Bryan had on Tuesday...

/* C U D A   B L A S   S E C T I O N  */

#include <stdlib.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>

void cudablas_mmm( cublasHandle_t handle, double *A, double *B, double *C,
                    int DIM, double alpha, double beta){

     const double *d_alpha = &alpha;
     const double *d_beta = &beta;

     // Call the actual double precision matrix multiplication library 
function
     cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, DIM, DIM, DIM, 
d_alpha, A, DIM, B, DIM, d_beta, C, DIM);

}

void mmm_( int *len,  double *A, double *B, double *C ){

     int DIM = *len;
     double alpha = 1.0;
     double beta = 0.0;
     void *d_A, *d_B, *d_C;

     cudaError_t cudaStat;

     // Create CUDA card device handles
     cublasHandle_t handle;
     cublasCreate(&handle);

     // Allocate memory on the card to store the matrices
     if ( (cudaStat = cudaMalloc(&d_A, DIM*DIM * sizeof(double))) != 
cudaSuccess ){
           printf("Device memory allocation failed.\n");
           exit(1);
           }
     if ( (cudaStat = cudaMalloc(&d_B, DIM*DIM * sizeof(double))) != 
cudaSuccess ){
           printf("Device memory allocation failed.\n");
           exit(1);
           }
     if ( (cudaStat = cudaMalloc(&d_C, DIM*DIM * sizeof(double))) != 
cudaSuccess ){
           printf("Device memory allocation failed.\n");
           exit(1);
           }

     // Copy the matrices to the card remember first to arguments are 
always destination and then source.
     // The last argument determines the type of transfer - not the 
direction of transfer.
     cudaMemcpy(d_A, A, DIM*DIM* sizeof(double), cudaMemcpyHostToDevice);
     cudaMemcpy(d_B, B, DIM*DIM* sizeof(double), cudaMemcpyHostToDevice);

     // Call the matrix multiplication function that is GPU based
     cudablas_mmm(handle, d_A, d_B, d_C, DIM, alpha, beta);

     // Copy the matrix C back from the card to the host computer
cudaMemcpy(C,d_C,DIM*DIM*sizeof(double),cudaMemcpyDeviceToHost);

     // Free the memory on the CUDA card
     cudaFree(d_A);
     cudaFree(d_B);
     cudaFree(d_C);

     // Free the memory used for the devince handle
     cublasDestroy(handle);

}

-- 
Andrew J. Pounds, Ph.D.  (pounds_aj at mercer.edu)
Professor of Chemistry and Computer Science
Mercer University,  Macon, GA 31207   (478) 301-5627
http://faculty.mercer.edu/pounds_aj