From 2087168a30f5a5472c8f31161da929f273908cf7 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Mon, 1 Mar 2021 17:31:56 +0200 Subject: [PATCH] turnip,ir3: account for dispatch group offsets Fixes tests: dEQP-VK.compute.device_group.dispatch_base Signed-off-by: Danylo Piliaiev Part-of: --- .../ci/deqp-freedreno-a630-fails.txt | 1 - src/freedreno/ir3/ir3_compiler_nir.c | 6 +++++ src/freedreno/ir3/ir3_nir.c | 4 +++ src/freedreno/ir3/ir3_shader.h | 11 +++++--- src/freedreno/vulkan/tu_cmd_buffer.c | 25 ++++++++++++++++--- src/freedreno/vulkan/tu_shader.c | 7 +++++- 6 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 15577411122..59c666602db 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -15,7 +15,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail dEQP-VK.api.info.format_properties.g8b8g8r8_422_unorm,Fail dEQP-VK.api.info.get_physical_device_properties2.memory_properties,Fail dEQP-VK.compute.basic.max_local_size_x,Crash -dEQP-VK.compute.device_group.dispatch_base,Fail dEQP-VK.draw.shader_viewport_index.fragment_shader_12,Fail dEQP-VK.draw.shader_viewport_index.fragment_shader_13,Fail dEQP-VK.draw.shader_viewport_index.fragment_shader_16,Fail diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 02a9992c973..20029fc8791 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1913,6 +1913,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) ir3_split_dest(b, dst, ctx->local_invocation_id, 0, 3); break; case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_work_group_id_zero_base: if (!ctx->work_group_id) { ctx->work_group_id = create_sysval_input(ctx, SYSTEM_VALUE_WORK_GROUP_ID, 0x7); @@ -1920,6 +1921,11 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) } ir3_split_dest(b, dst, ctx->work_group_id, 0, 3); break; + case nir_intrinsic_load_base_work_group_id: + for (int i = 0; i < dest_components; i++) { + dst[i] = create_driver_param(ctx, IR3_DP_BASE_GROUP_X + i); + } + break; case nir_intrinsic_load_num_work_groups: for (int i = 0; i < dest_components; i++) { dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 40fbf1d53bf..ead0d1a85a9 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -631,6 +631,10 @@ ir3_nir_scan_driver_consts(nir_shader *shader, layout->num_driver_params = MAX2(layout->num_driver_params, IR3_DP_LOCAL_GROUP_SIZE_Z + 1); break; + case nir_intrinsic_load_base_work_group_id: + layout->num_driver_params = + MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1); + break; default: break; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 05491f71fab..0be0ab18b1b 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -45,15 +45,18 @@ enum ir3_driver_param { IR3_DP_NUM_WORK_GROUPS_X = 0, IR3_DP_NUM_WORK_GROUPS_Y = 1, IR3_DP_NUM_WORK_GROUPS_Z = 2, - IR3_DP_LOCAL_GROUP_SIZE_X = 4, - IR3_DP_LOCAL_GROUP_SIZE_Y = 5, - IR3_DP_LOCAL_GROUP_SIZE_Z = 6, + IR3_DP_BASE_GROUP_X = 4, + IR3_DP_BASE_GROUP_Y = 5, + IR3_DP_BASE_GROUP_Z = 6, + IR3_DP_LOCAL_GROUP_SIZE_X = 8, + IR3_DP_LOCAL_GROUP_SIZE_Y = 9, + IR3_DP_LOCAL_GROUP_SIZE_Z = 10, /* NOTE: gl_NumWorkGroups should be vec4 aligned because * glDispatchComputeIndirect() needs to load these from * the info->indirect buffer. Keep that in mind when/if * adding any addition CS driver params. */ - IR3_DP_CS_COUNT = 8, /* must be aligned to vec4 */ + IR3_DP_CS_COUNT = 12, /* must be aligned to vec4 */ /* vertex shader driver params: */ IR3_DP_DRAWID = 0, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 1ba541d9cdc..17c75b760e7 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -3808,15 +3808,19 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, if (link->constlen <= offset) return; + uint32_t num_consts = MIN2(const_state->num_driver_params, + (link->constlen - offset) * 4); + if (!info->indirect) { - uint32_t driver_params[4] = { + uint32_t driver_params[8] = { [IR3_DP_NUM_WORK_GROUPS_X] = info->blocks[0], [IR3_DP_NUM_WORK_GROUPS_Y] = info->blocks[1], [IR3_DP_NUM_WORK_GROUPS_Z] = info->blocks[2], + [IR3_DP_BASE_GROUP_X] = info->offsets[0], + [IR3_DP_BASE_GROUP_Y] = info->offsets[1], + [IR3_DP_BASE_GROUP_Z] = info->offsets[2], }; - uint32_t num_consts = MIN2(const_state->num_driver_params, - (link->constlen - offset) * 4); assert(num_consts <= ARRAY_SIZE(driver_params)); /* push constants */ @@ -3864,6 +3868,21 @@ tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd, CP_LOAD_STATE6_0_NUM_UNIT(1)); tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0])); } + + /* Zeroing of IR3_DP_BASE_GROUP_X/Y/Z for indirect dispatch */ + if (info->indirect && num_consts > IR3_DP_BASE_GROUP_X) { + assert(num_consts == align(IR3_DP_BASE_GROUP_Z, 4)); + + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 7); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset + (IR3_DP_BASE_GROUP_X / 4)) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + tu_cs_emit_qw(cs, 0); + for (uint32_t i = 0; i < 4; i++) + tu_cs_emit(cs, 0); + } } static void diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 172c0c9965e..d5170112674 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -75,6 +75,11 @@ tu_spirv_to_nir(struct tu_device *dev, .runtime_descriptor_array = true, }, }; + + const struct nir_lower_compute_system_values_options compute_sysval_options = { + .has_base_work_group_id = true, + }; + const nir_shader_compiler_options *nir_options = ir3_get_compiler_options(dev->compiler); @@ -178,7 +183,7 @@ tu_spirv_to_nir(struct tu_device *dev, NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); + NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);