turnip,ir3: account for dispatch group offsets

Fixes tests:
 dEQP-VK.compute.device_group.dispatch_base

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9339>
This commit is contained in:
Danylo Piliaiev 2021-03-01 17:31:56 +02:00
parent 68bfb6ca49
commit 2087168a30
6 changed files with 45 additions and 9 deletions

View file

@ -15,7 +15,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
dEQP-VK.api.info.format_properties.g8b8g8r8_422_unorm,Fail
dEQP-VK.api.info.get_physical_device_properties2.memory_properties,Fail
dEQP-VK.compute.basic.max_local_size_x,Crash
dEQP-VK.compute.device_group.dispatch_base,Fail
dEQP-VK.draw.shader_viewport_index.fragment_shader_12,Fail
dEQP-VK.draw.shader_viewport_index.fragment_shader_13,Fail
dEQP-VK.draw.shader_viewport_index.fragment_shader_16,Fail

View file

@ -1913,6 +1913,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
ir3_split_dest(b, dst, ctx->local_invocation_id, 0, 3);
break;
case nir_intrinsic_load_work_group_id:
case nir_intrinsic_load_work_group_id_zero_base:
if (!ctx->work_group_id) {
ctx->work_group_id =
create_sysval_input(ctx, SYSTEM_VALUE_WORK_GROUP_ID, 0x7);
@ -1920,6 +1921,11 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
}
ir3_split_dest(b, dst, ctx->work_group_id, 0, 3);
break;
case nir_intrinsic_load_base_work_group_id:
for (int i = 0; i < dest_components; i++) {
dst[i] = create_driver_param(ctx, IR3_DP_BASE_GROUP_X + i);
}
break;
case nir_intrinsic_load_num_work_groups:
for (int i = 0; i < dest_components; i++) {
dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);

View file

@ -631,6 +631,10 @@ ir3_nir_scan_driver_consts(nir_shader *shader,
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_LOCAL_GROUP_SIZE_Z + 1);
break;
case nir_intrinsic_load_base_work_group_id:
layout->num_driver_params =
MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1);
break;
default:
break;
}

View file

@ -45,15 +45,18 @@ enum ir3_driver_param {
IR3_DP_NUM_WORK_GROUPS_X = 0,
IR3_DP_NUM_WORK_GROUPS_Y = 1,
IR3_DP_NUM_WORK_GROUPS_Z = 2,
IR3_DP_LOCAL_GROUP_SIZE_X = 4,
IR3_DP_LOCAL_GROUP_SIZE_Y = 5,
IR3_DP_LOCAL_GROUP_SIZE_Z = 6,
IR3_DP_BASE_GROUP_X = 4,
IR3_DP_BASE_GROUP_Y = 5,
IR3_DP_BASE_GROUP_Z = 6,
IR3_DP_LOCAL_GROUP_SIZE_X = 8,
IR3_DP_LOCAL_GROUP_SIZE_Y = 9,
IR3_DP_LOCAL_GROUP_SIZE_Z = 10,
/* NOTE: gl_NumWorkGroups should be vec4 aligned because
* glDispatchComputeIndirect() needs to load these from
* the info->indirect buffer. Keep that in mind when/if
* adding any addition CS driver params.
*/
IR3_DP_CS_COUNT = 8, /* must be aligned to vec4 */
IR3_DP_CS_COUNT = 12, /* must be aligned to vec4 */
/* vertex shader driver params: */
IR3_DP_DRAWID = 0,

View file

@ -3808,15 +3808,19 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
if (link->constlen <= offset)
return;
uint32_t num_consts = MIN2(const_state->num_driver_params,
(link->constlen - offset) * 4);
if (!info->indirect) {
uint32_t driver_params[4] = {
uint32_t driver_params[8] = {
[IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0],
[IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1],
[IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2],
[IR3_DP_BASE_GROUP_X] = info->offsets[0],
[IR3_DP_BASE_GROUP_Y] = info->offsets[1],
[IR3_DP_BASE_GROUP_Z] = info->offsets[2],
};
uint32_t num_consts = MIN2(const_state->num_driver_params,
(link->constlen - offset) * 4);
assert(num_consts <= ARRAY_SIZE(driver_params));
/* push constants */
@ -3864,6 +3868,21 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
CP_LOAD_STATE6_0_NUM_UNIT(1));
tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0]));
}
/* Zeroing of IR3_DP_BASE_GROUP_X/Y/Z for indirect dispatch */
if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) {
assert(num_consts == align(IR3_DP_BASE_GROUP_Z, 4));
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_BASE_GROUP_X / 4)) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) |
CP_LOAD_STATE6_0_NUM_UNIT(1));
tu_cs_emit_qw(cs, 0);
for (uint32_t i = 0; i < 4; i++)
tu_cs_emit(cs, 0);
}
}
static void

View file

@ -75,6 +75,11 @@ tu_spirv_to_nir(struct tu_device *dev,
.runtime_descriptor_array = true,
},
};
const struct nir_lower_compute_system_values_options compute_sysval_options = {
.has_base_work_group_id = true,
};
const nir_shader_compiler_options *nir_options =
ir3_get_compiler_options(dev->compiler);
@ -178,7 +183,7 @@ tu_spirv_to_nir(struct tu_device *dev,
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);