mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
turnip,ir3: account for dispatch group offsets
Fixes tests: dEQP-VK.compute.device_group.dispatch_base Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9339>
This commit is contained in:
parent
68bfb6ca49
commit
2087168a30
6 changed files with 45 additions and 9 deletions
|
|
@ -15,7 +15,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
|
|||
dEQP-VK.api.info.format_properties.g8b8g8r8_422_unorm,Fail
|
||||
dEQP-VK.api.info.get_physical_device_properties2.memory_properties,Fail
|
||||
dEQP-VK.compute.basic.max_local_size_x,Crash
|
||||
dEQP-VK.compute.device_group.dispatch_base,Fail
|
||||
dEQP-VK.draw.shader_viewport_index.fragment_shader_12,Fail
|
||||
dEQP-VK.draw.shader_viewport_index.fragment_shader_13,Fail
|
||||
dEQP-VK.draw.shader_viewport_index.fragment_shader_16,Fail
|
||||
|
|
|
|||
|
|
@ -1913,6 +1913,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
ir3_split_dest(b, dst, ctx->local_invocation_id, 0, 3);
|
||||
break;
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
case nir_intrinsic_load_work_group_id_zero_base:
|
||||
if (!ctx->work_group_id) {
|
||||
ctx->work_group_id =
|
||||
create_sysval_input(ctx, SYSTEM_VALUE_WORK_GROUP_ID, 0x7);
|
||||
|
|
@ -1920,6 +1921,11 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
ir3_split_dest(b, dst, ctx->work_group_id, 0, 3);
|
||||
break;
|
||||
case nir_intrinsic_load_base_work_group_id:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_BASE_GROUP_X + i);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);
|
||||
|
|
|
|||
|
|
@ -631,6 +631,10 @@ ir3_nir_scan_driver_consts(nir_shader *shader,
|
|||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_LOCAL_GROUP_SIZE_Z + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_base_work_group_id:
|
||||
layout->num_driver_params =
|
||||
MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,15 +45,18 @@ enum ir3_driver_param {
|
|||
IR3_DP_NUM_WORK_GROUPS_X = 0,
|
||||
IR3_DP_NUM_WORK_GROUPS_Y = 1,
|
||||
IR3_DP_NUM_WORK_GROUPS_Z = 2,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_X = 4,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Y = 5,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Z = 6,
|
||||
IR3_DP_BASE_GROUP_X = 4,
|
||||
IR3_DP_BASE_GROUP_Y = 5,
|
||||
IR3_DP_BASE_GROUP_Z = 6,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_X = 8,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Y = 9,
|
||||
IR3_DP_LOCAL_GROUP_SIZE_Z = 10,
|
||||
/* NOTE: gl_NumWorkGroups should be vec4 aligned because
|
||||
* glDispatchComputeIndirect() needs to load these from
|
||||
* the info->indirect buffer. Keep that in mind when/if
|
||||
* adding any addition CS driver params.
|
||||
*/
|
||||
IR3_DP_CS_COUNT = 8, /* must be aligned to vec4 */
|
||||
IR3_DP_CS_COUNT = 12, /* must be aligned to vec4 */
|
||||
|
||||
/* vertex shader driver params: */
|
||||
IR3_DP_DRAWID = 0,
|
||||
|
|
|
|||
|
|
@ -3808,15 +3808,19 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
if (link->constlen <= offset)
|
||||
return;
|
||||
|
||||
uint32_t num_consts = MIN2(const_state->num_driver_params,
|
||||
(link->constlen - offset) * 4);
|
||||
|
||||
if (!info->indirect) {
|
||||
uint32_t driver_params[4] = {
|
||||
uint32_t driver_params[8] = {
|
||||
[IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1],
|
||||
[IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2],
|
||||
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
|
||||
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
|
||||
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
|
||||
};
|
||||
|
||||
uint32_t num_consts = MIN2(const_state->num_driver_params,
|
||||
(link->constlen - offset) * 4);
|
||||
assert(num_consts <= ARRAY_SIZE(driver_params));
|
||||
|
||||
/* push constants */
|
||||
|
|
@ -3864,6 +3868,21 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
|
|||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0]));
|
||||
}
|
||||
|
||||
/* Zeroing of IR3_DP_BASE_GROUP_X/Y/Z for indirect dispatch */
|
||||
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
|
||||
assert(num_consts == align(IR3_DP_BASE_GROUP_Z, 4));
|
||||
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_BASE_GROUP_X / 4)) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
||||
tu_cs_emit_qw(cs, 0);
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
tu_cs_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -75,6 +75,11 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
.runtime_descriptor_array = true,
|
||||
},
|
||||
};
|
||||
|
||||
const struct nir_lower_compute_system_values_options compute_sysval_options = {
|
||||
.has_base_work_group_id = true,
|
||||
};
|
||||
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
ir3_get_compiler_options(dev->compiler);
|
||||
|
||||
|
|
@ -178,7 +183,7 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
|
||||
NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue