Jacobi
|
This contains the device kernels as well as the host wrappers for these kernels. More...
#include "Jacobi.h"
Functions | |
template<> | |
__device__ void | AtomicMax (float *const address, const float value) |
Compute the maximum of 2 single-precision floating point values using an atomic operation. | |
template<> | |
__device__ void | AtomicMax (double *const address, const double value) |
Compute the maximum of 2 double-precision floating point values using an atomic operation. | |
__global__ void | HaloToBufferKernel (real *__restrict__ const block, const real *__restrict__ const haloLeft, const real *__restrict__ const haloRight, int2 size, int hasLeftNeighbor, int hasRightNeighbor) |
The device kernel for copying (unpacking) the values from the halo buffers to the left and right side of the data block. | |
__global__ void | BufferToHaloKernel (const real *__restrict__ const block, real *__restrict__ const leftSideEdge, real *__restrict__ const rightSideEdge, int2 size, int hasLeftNeighbor, int hasRightNeighbor) |
The device kernel for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers. | |
__global__ void | JacobiComputeKernel (const real *__restrict__ const oldBlock, real *__restrict__ const newBlock, int4 bounds, int stride, real *devResidue) |
The device kernel for one Jacobi iteration. | |
__global__ void | CopyBlockKernel (const real *__restrict__ const srcBlock, real *__restrict__ const dstBlock, int4 bounds, int stride) |
The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes. | |
void | CheckCudaCall (cudaError_t command, const char *commandName, const char *fileName, int line) |
The host function for checking the result of a CUDA API call. | |
real | CallJacobiKernel (real *devBlocks[2], real *devResidue, const int4 *bounds, const int2 *size) |
The host wrapper for one Jacobi iteration. | |
void | CopyDeviceBlock (real *devBlocks[2], const int4 *bounds, const int2 *size) |
The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes. | |
void | CopyDevHalosToBlock (real *devBlock, const real *devHaloLineLeft, const real *devHaloLineRight, const int2 *size, const int *neighbors) |
The host wrapper for copying (unpacking) the values from the halo buffers to the left and right side of the data block. | |
void | CopyDevSideEdgesFromBlock (const real *devBlock, real *devSideEdges[2], const int2 *size, const int *neighbors, cudaStream_t copyStream) |
The host wrapper for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers. |
This contains the device kernels as well as the host wrappers for these kernels.
__device__ void AtomicMax | ( | float *const | address, |
const float | value | ||
) |
Compute the maximum of 2 single-precision floating point values using an atomic operation.
[in] | address | The address of the reference value which might get updated with the maximum |
[in] | value | The value that is compared to the reference in order to determine the maximum |
__device__ void AtomicMax | ( | double *const | address, |
const double | value | ||
) |
Compute the maximum of 2 double-precision floating point values using an atomic operation.
[in] | address | The address of the reference value which might get updated with the maximum |
[in] | value | The value that is compared to the reference in order to determine the maximum |
__global__ void BufferToHaloKernel | ( | const real *__restrict__ const | block, |
real *__restrict__ const | leftSideEdge, | ||
real *__restrict__ const | rightSideEdge, | ||
int2 | size, | ||
int | hasLeftNeighbor, | ||
int | hasRightNeighbor | ||
) |
The device kernel for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers.
[in] | block | The 2D device block containing the updated values after a Jacobi run |
[out] | leftSideEdge | The left edge, holding the left-most updated halos |
[out] | rightSideEdge | The right edge, holding the right-most updated halos |
[in] | size | The 2D size of data block, excluding the edges which hold the halo values for the next iteration |
[in] | hasLeftNeighbor | Marks if the calling MPI process has a left neighbor |
[in] | hasRightNeighbor | Marks if the calling MPI process has a right neighbor |
real CallJacobiKernel | ( | real * | devBlocks[2], |
real * | devResidue, | ||
const int4 * | bounds, | ||
const int2 * | size | ||
) |
The host wrapper for one Jacobi iteration.
[in,out] | devBlocks | The 2 blocks involved: the first is the current one, the second is the one to be updated |
[out] | devResidue | The global residue that is to be updated through the Jacobi iteration |
[in] | bounds | The bounds of the rectangular block region that holds only computable values |
[in] | size | The 2D size of data blocks, excluding the edges which hold the halo values |
void CheckCudaCall | ( | cudaError_t | command, |
const char * | commandName, | ||
const char * | fileName, | ||
int | line | ||
) |
The host function for checking the result of a CUDA API call.
[in] | command | The result of the previously-issued CUDA API call |
[in] | commandName | The name of the issued API call |
[in] | fileName | The name of the file where the API call occurred |
[in] | line | The line in the file where the API call occurred |
__global__ void CopyBlockKernel | ( | const real *__restrict__ const | srcBlock, |
real *__restrict__ const | dstBlock, | ||
int4 | bounds, | ||
int | stride | ||
) |
The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes.
[in] | srcBlock | The source block, from which data will be read |
[out] | dstBlock | The destination block, where data will be written |
[in] | bounds | The bounds of the rectangular updated region (holding only computable values) |
[in] | size | The stride of the data blocks, excluding the halo values |
void CopyDevHalosToBlock | ( | real * | devBlock, |
const real * | devHaloLineLeft, | ||
const real * | devHaloLineRight, | ||
const int2 * | size, | ||
const int * | neighbors | ||
) |
The host wrapper for copying (unpacking) the values from the halo buffers to the left and right side of the data block.
[out] | devBlock | The 2D device block that will contain the halo values after unpacking |
[in] | devHaloLineLeft | The halo buffer for the left side of the data block |
[in] | devHaloLineRight | The halo buffer for the right side of the data block |
[in] | size | The 2D size of data block, excluding the edges which hold the halo values |
[in] | neighbors | The ranks of the neighboring MPI processes |
void CopyDeviceBlock | ( | real * | devBlocks[2], |
const int4 * | bounds, | ||
const int2 * | size | ||
) |
The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes.
[in,out] | devBlocks | The 2 blocks involved: the first is the old one, the second is the updated one |
[in] | bounds | The bounds of the rectangular updated region (holding only computable values) |
[in] | size | The 2D size of data blocks, excluding the edges which hold the halo values |
void CopyDevSideEdgesFromBlock | ( | const real * | devBlock, |
real * | devSideEdges[2], | ||
const int2 * | size, | ||
const int * | neighbors, | ||
cudaStream_t | copyStream | ||
) |
The host wrapper for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers.
[in] | devBlock | The 2D device block containing the updated values after a Jacobi run |
[out] | devSideEdges | The buffers where the edge values will be packed in |
[in] | size | The 2D size of data block, excluding the edges which hold the halo values for the next iteration |
[in] | neighbors | The ranks of the neighboring MPI processes |
[in] | copyStream | The stream on which this kernel will be executed |
__global__ void HaloToBufferKernel | ( | real *__restrict__ const | block, |
const real *__restrict__ const | haloLeft, | ||
const real *__restrict__ const | haloRight, | ||
int2 | size, | ||
int | hasLeftNeighbor, | ||
int | hasRightNeighbor | ||
) |
The device kernel for copying (unpacking) the values from the halo buffers to the left and right side of the data block.
[out] | block | The 2D device block that will contain the halo values after unpacking |
[in] | haloLeft | The halo buffer for the left side of the data block |
[in] | haloRight | The halo buffer for the right side of the data block |
[in] | size | The 2D size of data block, excluding the edges which hold the halo values |
[in] | hasLeftNeighbor | Marks if the calling MPI process has a left neighbor |
[in] | hasRightNeighbor | Marks if the calling MPI process has a right neighbor |
__global__ void JacobiComputeKernel | ( | const real *__restrict__ const | oldBlock, |
real *__restrict__ const | newBlock, | ||
int4 | bounds, | ||
int | stride, | ||
real * | devResidue | ||
) |
The device kernel for one Jacobi iteration.
[in] | oldBlock | The current (old) block providing the values at the start of the iteration |
[out] | newBlock | The new block that will hold the updated values after the iteration finishes |
[in] | bounds | The bounds of the rectangular block region that holds only computable values |
[in] | stride | The stride of the data blocks, excluding the edges holding the halo values |
[out] | devResidue | The global residue that is to be updated through the iteration |