Computes a device-wide exclusive prefix scan-by-key using the specified binary scan_op functor. The key equality is defined by equality_op . The init_value value is applied as the initial value, and is assigned to the beginning of each segment in d_values_out .
Supports non-commutative scan operators.
Results are not deterministic for pseudo-associative operators (e.g., addition of floating-point types). Results for pseudo-associative operators may vary from run to run. Additional details can be found in the decoupled look-back description.
When d_temp_storage is NULL, no work is done and the required allocation size is returned in temp_storage_bytes.
SnippetThe code snippet below illustrates the exclusive prefix min-scan-by-key of an int device vector
#include // or equivalently
#include // for INT_MAX
// CustomMin functor
struct CustomMin
{
template
CUB_RUNTIME_FUNCTION __forceinline__
T operator()(const T &a, const T &b) const {
return (b < a) ? b : a;
}
};
// CustomEqual functor
struct CustomEqual
{
template
CUB_RUNTIME_FUNCTION __forceinline__
T operator()(const T &a, const T &b) const {
return a == b;
}
};
// Declare, allocate, and initialize device-accessible pointers for input and output
int num_items; // e.g., 7
int *d_keys_in; // e.g., [0, 0, 1, 1, 1, 2, 2]
int *d_values_in; // e.g., [8, 6, 7, 5, 3, 0, 9]
int *d_values_out; // e.g., [ , , , , , , ]
CustomMin min_op;
CustomEqual equality_op;
...
// Determine temporary device storage requirements for exclusive prefix scan
void *d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceScan::ExclusiveScanByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, min_op, (int) INT_MAX, num_items, equality_op);
// Allocate temporary storage for exclusive prefix scan
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Run exclusive prefix min-scan
cub::DeviceScan::ExclusiveScanByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_values_in, d_values_out, min_op, (int) INT_MAX, num_items, equality_op);
// d_values_out |