|
Jacobi
|
This contains the device kernels as well as the host wrappers for these kernels. More...
#include "Jacobi.h"
Functions | |
| template<> | |
| __device__ void | AtomicMax (float *const address, const float value) |
| Compute the maximum of 2 single-precision floating point values using an atomic operation. | |
| template<> | |
| __device__ void | AtomicMax (double *const address, const double value) |
| Compute the maximum of 2 double-precision floating point values using an atomic operation. | |
| __global__ void | HaloToBufferKernel (real *__restrict__ const block, const real *__restrict__ const haloLeft, const real *__restrict__ const haloRight, int2 size, int hasLeftNeighbor, int hasRightNeighbor) |
| The device kernel for copying (unpacking) the values from the halo buffers to the left and right side of the data block. | |
| __global__ void | BufferToHaloKernel (const real *__restrict__ const block, real *__restrict__ const leftSideEdge, real *__restrict__ const rightSideEdge, int2 size, int hasLeftNeighbor, int hasRightNeighbor) |
| The device kernel for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers. | |
| __global__ void | JacobiComputeKernel (const real *__restrict__ const oldBlock, real *__restrict__ const newBlock, int4 bounds, int stride, real *devResidue) |
| The device kernel for one Jacobi iteration. | |
| __global__ void | CopyBlockKernel (const real *__restrict__ const srcBlock, real *__restrict__ const dstBlock, int4 bounds, int stride) |
| The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes. | |
| void | CheckCudaCall (cudaError_t command, const char *commandName, const char *fileName, int line) |
| The host function for checking the result of a CUDA API call. | |
| real | CallJacobiKernel (real *devBlocks[2], real *devResidue, const int4 *bounds, const int2 *size) |
| The host wrapper for one Jacobi iteration. | |
| void | CopyDeviceBlock (real *devBlocks[2], const int4 *bounds, const int2 *size) |
| The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes. | |
| void | CopyDevHalosToBlock (real *devBlock, const real *devHaloLineLeft, const real *devHaloLineRight, const int2 *size, const int *neighbors) |
| The host wrapper for copying (unpacking) the values from the halo buffers to the left and right side of the data block. | |
| void | CopyDevSideEdgesFromBlock (const real *devBlock, real *devSideEdges[2], const int2 *size, const int *neighbors, cudaStream_t copyStream) |
| The host wrapper for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers. | |
This contains the device kernels as well as the host wrappers for these kernels.
| __device__ void AtomicMax | ( | float *const | address, |
| const float | value | ||
| ) |
Compute the maximum of 2 single-precision floating point values using an atomic operation.
| [in] | address | The address of the reference value which might get updated with the maximum |
| [in] | value | The value that is compared to the reference in order to determine the maximum |
| __device__ void AtomicMax | ( | double *const | address, |
| const double | value | ||
| ) |
Compute the maximum of 2 double-precision floating point values using an atomic operation.
| [in] | address | The address of the reference value which might get updated with the maximum |
| [in] | value | The value that is compared to the reference in order to determine the maximum |
| __global__ void BufferToHaloKernel | ( | const real *__restrict__ const | block, |
| real *__restrict__ const | leftSideEdge, | ||
| real *__restrict__ const | rightSideEdge, | ||
| int2 | size, | ||
| int | hasLeftNeighbor, | ||
| int | hasRightNeighbor | ||
| ) |
The device kernel for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers.
| [in] | block | The 2D device block containing the updated values after a Jacobi run |
| [out] | leftSideEdge | The left edge, holding the left-most updated halos |
| [out] | rightSideEdge | The right edge, holding the right-most updated halos |
| [in] | size | The 2D size of data block, excluding the edges which hold the halo values for the next iteration |
| [in] | hasLeftNeighbor | Marks if the calling MPI process has a left neighbor |
| [in] | hasRightNeighbor | Marks if the calling MPI process has a right neighbor |
| real CallJacobiKernel | ( | real * | devBlocks[2], |
| real * | devResidue, | ||
| const int4 * | bounds, | ||
| const int2 * | size | ||
| ) |
The host wrapper for one Jacobi iteration.
| [in,out] | devBlocks | The 2 blocks involved: the first is the current one, the second is the one to be updated |
| [out] | devResidue | The global residue that is to be updated through the Jacobi iteration |
| [in] | bounds | The bounds of the rectangular block region that holds only computable values |
| [in] | size | The 2D size of data blocks, excluding the edges which hold the halo values |

| void CheckCudaCall | ( | cudaError_t | command, |
| const char * | commandName, | ||
| const char * | fileName, | ||
| int | line | ||
| ) |
The host function for checking the result of a CUDA API call.
| [in] | command | The result of the previously-issued CUDA API call |
| [in] | commandName | The name of the issued API call |
| [in] | fileName | The name of the file where the API call occurred |
| [in] | line | The line in the file where the API call occurred |
| __global__ void CopyBlockKernel | ( | const real *__restrict__ const | srcBlock, |
| real *__restrict__ const | dstBlock, | ||
| int4 | bounds, | ||
| int | stride | ||
| ) |
The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes.
| [in] | srcBlock | The source block, from which data will be read |
| [out] | dstBlock | The destination block, where data will be written |
| [in] | bounds | The bounds of the rectangular updated region (holding only computable values) |
| [in] | size | The stride of the data blocks, excluding the halo values |
| void CopyDevHalosToBlock | ( | real * | devBlock, |
| const real * | devHaloLineLeft, | ||
| const real * | devHaloLineRight, | ||
| const int2 * | size, | ||
| const int * | neighbors | ||
| ) |
The host wrapper for copying (unpacking) the values from the halo buffers to the left and right side of the data block.
| [out] | devBlock | The 2D device block that will contain the halo values after unpacking |
| [in] | devHaloLineLeft | The halo buffer for the left side of the data block |
| [in] | devHaloLineRight | The halo buffer for the right side of the data block |
| [in] | size | The 2D size of data block, excluding the edges which hold the halo values |
| [in] | neighbors | The ranks of the neighboring MPI processes |

| void CopyDeviceBlock | ( | real * | devBlocks[2], |
| const int4 * | bounds, | ||
| const int2 * | size | ||
| ) |
The host wrapper for copying the updated block over the old one, after a Jacobi iteration finishes.
| [in,out] | devBlocks | The 2 blocks involved: the first is the old one, the second is the updated one |
| [in] | bounds | The bounds of the rectangular updated region (holding only computable values) |
| [in] | size | The 2D size of data blocks, excluding the edges which hold the halo values |

| void CopyDevSideEdgesFromBlock | ( | const real * | devBlock, |
| real * | devSideEdges[2], | ||
| const int2 * | size, | ||
| const int * | neighbors, | ||
| cudaStream_t | copyStream | ||
| ) |
The host wrapper for copying (packing) the values on the left and right side of the data block to separate, contiguous buffers.
| [in] | devBlock | The 2D device block containing the updated values after a Jacobi run |
| [out] | devSideEdges | The buffers where the edge values will be packed in |
| [in] | size | The 2D size of data block, excluding the edges which hold the halo values for the next iteration |
| [in] | neighbors | The ranks of the neighboring MPI processes |
| [in] | copyStream | The stream on which this kernel will be executed |

| __global__ void HaloToBufferKernel | ( | real *__restrict__ const | block, |
| const real *__restrict__ const | haloLeft, | ||
| const real *__restrict__ const | haloRight, | ||
| int2 | size, | ||
| int | hasLeftNeighbor, | ||
| int | hasRightNeighbor | ||
| ) |
The device kernel for copying (unpacking) the values from the halo buffers to the left and right side of the data block.
| [out] | block | The 2D device block that will contain the halo values after unpacking |
| [in] | haloLeft | The halo buffer for the left side of the data block |
| [in] | haloRight | The halo buffer for the right side of the data block |
| [in] | size | The 2D size of data block, excluding the edges which hold the halo values |
| [in] | hasLeftNeighbor | Marks if the calling MPI process has a left neighbor |
| [in] | hasRightNeighbor | Marks if the calling MPI process has a right neighbor |
| __global__ void JacobiComputeKernel | ( | const real *__restrict__ const | oldBlock, |
| real *__restrict__ const | newBlock, | ||
| int4 | bounds, | ||
| int | stride, | ||
| real * | devResidue | ||
| ) |
The device kernel for one Jacobi iteration.
| [in] | oldBlock | The current (old) block providing the values at the start of the iteration |
| [out] | newBlock | The new block that will hold the updated values after the iteration finishes |
| [in] | bounds | The bounds of the rectangular block region that holds only computable values |
| [in] | stride | The stride of the data blocks, excluding the edges holding the halo values |
| [out] | devResidue | The global residue that is to be updated through the iteration |
1.7.6.1