// -*-C++-*- #include #include #include #include #include #include #include #include #include namespace WaveToyCUDA { // Check a return value, and if there is an error, output a // human-readable error message void check_error(cudaError_t cerr, char const *msg = "", ...) #ifdef __GNUC__ __attribute__((format (printf, 2, 3))) #endif ; void check_error(cudaError_t cerr, char const *msg, ...) { if (cerr) { if (strcmp(msg, "")) { va_list ap; va_start(ap, msg); char *usermsg; vasprintf(&usermsg, msg, ap); va_end(ap); CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "CUDA Error %d: %s:\n%s", (int)cerr, cudaGetErrorString(cerr), usermsg); free(usermsg); } else { CCTK_VWarn(CCTK_WARN_ABORT, __LINE__, __FILE__, CCTK_THORNSTRING, "CUDA Error %d: %s\n", (int)cerr, cudaGetErrorString(cerr)); } } } // Access a grid function in a kernel __device__ CCTK_REAL& gfelt(cudaPitchedPtr const& u, size_t i, size_t j, size_t k) { return *(CCTK_REAL*)&((char*)u.ptr)[i*sizeof(CCTK_REAL) + j*u.pitch + k*u.pitch*u.ysize]; } // Data living in the device memory namespace dev { cudaExtent ext; cudaPitchedPtr u; } // namespace dev // A simple kernel __global__ void init(size_t const lsh0, size_t const lsh1, size_t const lsh2, cudaPitchedPtr const u) { size_t const i = blockIdx.x * blockDim.x + threadIdx.x; size_t const j = blockIdx.y * blockDim.y + threadIdx.y; size_t const k = blockIdx.z * blockDim.z + threadIdx.z; if (i>>(cctk_lsh[0], cctk_lsh[1], cctk_lsh[2], dev::u); cerr = cudaGetLastError(); check_error(cerr, "Could not call routine \"init\""); // Copy data to host cudaMemcpy3DParms parms = {0}; parms.srcPtr = dev::u; parms.dstPtr = make_cudaPitchedPtr (u, sizeof(CCTK_REAL)*cctk_ash[0], cctk_lsh[0], cctk_lsh[1]); parms.extent = dev::ext; parms.kind = cudaMemcpyDeviceToHost; cerr = cudaMemcpy3D(&parms); check_error(cerr, "Failed to copy [%d,%d,%d] array", cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]); // Output grid function // for (ptrdiff_t k=0; k