12 host_address[0] = accel_address[0];
19 for(
size_t i = 0; i < element_size; i++)
20 accel_address[i] = host_address[i];
31 for(i = 0; i < d1_block_size; i++)
32 host_address[i+d1_offset] = accel_address[i];
43 for(i = 0; i < d1_block_size; i++)
44 accel_address[i] = host_address[i+d1_offset];
51 size_t d1_size,
size_t d2_size,
52 size_t d1_block_size,
size_t d2_block_size,
53 size_t d1_offset,
size_t d2_offset,
54 int host_address[d2_size],
55 int accel_address[d1_block_size][d2_block_size]) {
57 for(i = 0; i < d1_block_size; i++)
58 for(j = 0; j < d2_block_size; j++)
59 host_address[d2_offset+(i+d1_offset)*d2_size+j] =
65 size_t d1_size,
size_t d2_size,
66 size_t d1_block_size,
size_t d2_block_size,
67 size_t d1_offset,
size_t d2_offset,
68 int host_address [d2_size],
69 int accel_address [d1_block_size][d2_block_size]) {
71 for(i = 0; i < d1_block_size; i++)
72 for(j = 0; j < d2_block_size; j++)
74 host_address[d2_offset+(i + d1_offset)*d2_size+j];
78 size_t d1_size,
size_t d2_size,
size_t d3_size,
79 size_t d1_block_size,
size_t d2_block_size,
size_t d3_block_size,
80 size_t d1_offset,
size_t d2_offset,
size_t d3_offset,
81 int host_address [d3_size],
82 int accel_address [d1_block_size][d2_block_size][d3_block_size]) {
84 for(i = 0; i < d1_block_size; i++)
85 for(j = 0; j < d2_block_size; j++)
86 for(k = 0; k < d3_block_size; k++)
87 accel_address[i][j][k] =
88 host_address[d3_offset+( (i + d1_offset)*d2_size + j + d2_offset)*d3_size+j];
void P4A_copy_to_accel_1d(size_t element_size, size_t d1_size, size_t d1_block_size, size_t d1_offset, int *host_address, int *accel_address)
void P4A_copy_from_accel_1d(size_t element_size, size_t d1_size, size_t d1_block_size, size_t d1_offset, int *host_address, int *accel_address)
To copy parts of 1D arrays.
void P4A_copy_to_accel(size_t element_size, int *host_address, int *accel_address)
void P4A_copy_from_accel_2d(size_t element_size, size_t d1_size, size_t d2_size, size_t d1_block_size, size_t d2_block_size, size_t d1_offset, size_t d2_offset, int host_address[d2_size], int accel_address[d1_block_size][d2_block_size])
To copy parts of 2D arrays.
void P4A_copy_to_accel_2d(size_t element_size, size_t d1_size, size_t d2_size, size_t d1_block_size, size_t d2_block_size, size_t d1_offset, size_t d2_offset, int host_address[d2_size], int accel_address[d1_block_size][d2_block_size])
void P4A_accel_free(void *ptr)
Deallocate memory on the accelerator.
void P4A_accel_malloc(void **ptr, size_t n)
Allocate memory on the accelerator.
void P4A_copy_to_accel_3d(size_t element_size, size_t d1_size, size_t d2_size, size_t d3_size, size_t d1_block_size, size_t d2_block_size, size_t d3_block_size, size_t d1_offset, size_t d2_offset, size_t d3_offset, int host_address[d3_size], int accel_address[d1_block_size][d2_block_size][d3_block_size])
void P4A_copy_from_accel(size_t element_size, int *host_address, int *accel_address)
A small implementation of the runtime used by the code generated by the kernel_load_store and isolate...