mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-05 08:00:30 +01:00
radeonsi: add compute_last_block to configure the partial block fields
This commit is contained in:
parent
b443465fb9
commit
8daf5bb209
2 changed files with 49 additions and 5 deletions
|
|
@ -797,11 +797,6 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
|
|||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
|
||||
compute_resource_limits);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
|
||||
radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
|
||||
radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
|
||||
|
||||
unsigned dispatch_initiator =
|
||||
S_00B800_COMPUTE_SHADER_EN(1) |
|
||||
S_00B800_FORCE_START_AT_000(1) |
|
||||
|
|
@ -809,6 +804,33 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
|
|||
* allow launching waves out-of-order. (same as Vulkan) */
|
||||
S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
|
||||
|
||||
uint *last_block = sctx->compute_last_block;
|
||||
bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
|
||||
if (partial_block_en) {
|
||||
unsigned partial[3];
|
||||
|
||||
/* If no partial_block, these should be an entire block size, not 0. */
|
||||
partial[0] = last_block[0] ? last_block[0] : info->block[0];
|
||||
partial[1] = last_block[1] ? last_block[1] : info->block[1];
|
||||
partial[2] = last_block[2] ? last_block[2] : info->block[2];
|
||||
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]) |
|
||||
S_00B81C_NUM_THREAD_PARTIAL(partial[0]));
|
||||
radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]) |
|
||||
S_00B820_NUM_THREAD_PARTIAL(partial[1]));
|
||||
radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]) |
|
||||
S_00B824_NUM_THREAD_PARTIAL(partial[2]));
|
||||
|
||||
dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
|
||||
} else {
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]));
|
||||
radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]));
|
||||
radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]));
|
||||
}
|
||||
|
||||
if (info->indirect) {
|
||||
uint64_t base_va = r600_resource(info->indirect)->gpu_address;
|
||||
|
||||
|
|
|
|||
|
|
@ -896,6 +896,28 @@ struct si_context {
|
|||
uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
|
||||
uint32_t cs_user_data[4];
|
||||
|
||||
/**
|
||||
* last_block allows disabling threads at the farthermost grid boundary.
|
||||
* Full blocks as specified by "block" are launched, but the threads
|
||||
* outside of "last_block" dimensions are disabled.
|
||||
*
|
||||
* If a block touches the grid boundary in the i-th axis, threads with
|
||||
* THREAD_ID[i] >= last_block[i] are disabled.
|
||||
*
|
||||
* If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
|
||||
* meaning no effect.
|
||||
*
|
||||
* It's equivalent to doing this at the beginning of the compute shader:
|
||||
*
|
||||
* for (i = 0; i < 3; i++) {
|
||||
* if (block_id[i] == grid[i] - 1 &&
|
||||
* last_block[i] && last_block[i] >= thread_id[i])
|
||||
* return;
|
||||
* }
|
||||
* (this could be moved into pipe_grid_info)
|
||||
*/
|
||||
uint compute_last_block[3];
|
||||
|
||||
/* Vertex and index buffers. */
|
||||
bool vertex_buffers_dirty;
|
||||
bool vertex_buffer_pointer_dirty;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue