diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index bb4e7ac6f15..8426c0c0140 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1867,6 +1867,10 @@ intrinsic("load_local_shared_r600", src_comp=[0], dest_comp=0, indices = [], fla store("local_shared_r600", [1], [WRITE_MASK]) store("tf_r600", []) +# r600 primitive_id +system_value("primitive_id_raw_r600", 1) +system_value("primitive_id_modulo_r600", 2) + # these two definitions are aimed at r600 indirect per_vertex_input accesses intrinsic("r600_indirect_vertex_at_index", dest_comp=1, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER]) load("r600_per_vertex_input", [1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/gallium/drivers/r600/ci/r600-turks-fails.txt b/src/gallium/drivers/r600/ci/r600-turks-fails.txt index 34bb9ff9eb7..c67f0fa9334 100644 --- a/src/gallium/drivers/r600/ci/r600-turks-fails.txt +++ b/src/gallium/drivers/r600/ci/r600-turks-fails.txt @@ -27,7 +27,6 @@ KHR-GLES31.core.shader_storage_buffer_object.advanced-readWrite-case1-cs,Fail KHR-GLES31.core.tessellation_shader.single.max_patch_vertices,Crash KHR-GLES31.core.tessellation_shader.tessellation_shader_tessellation.TCS_TES,Fail -KHR-GLES31.core.tessellation_shader.tessellation_shader_tessellation.gl_InvocationID_PatchVerticesIn_PrimitiveID,Fail KHR-GLES31.core.tessellation_shader.tessellation_shader_tessellation.max_in_out_attributes,Fail KHR-GLES31.core.texture_border_clamp.Texture2DArrayR32I,Fail @@ -784,9 +783,6 @@ spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -s spec@arb_tessellation_shader@execution@fs-primitiveid-instanced,Fail spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail -spec@arb_tessellation_shader@execution@tcs-primitiveid-instanced,Fail -spec@arb_tessellation_shader@execution@tes-no-tcs-primitiveid-instanced,Fail -spec@arb_tessellation_shader@execution@tes-primitiveid-instanced,Fail spec@arb_tessellation_shader@execution@trivial-tess-gs_no-gs-inputs,Fail diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 599f62e93b6..334f4b8b649 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -4970,7 +4970,8 @@ void evergreen_init_state_functions(struct r600_context *rctx) void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches, - const bool vertexid) + const bool vertexid, + const uint32_t primitiveid_modulo) { struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader; struct r600_pipe_shader_selector *ls = rctx->vs_shader; @@ -5049,6 +5050,12 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, rctx->lds_constant_buffer.output_patch0_offset = output_patch0_offset; rctx->lds_constant_buffer.perpatch_output_offset = perpatch_output_offset; + rctx->lds_constant_buffer.primitiveid_modulo = primitiveid_modulo; + rctx->lds_constant_buffer.primitiveid_inverse = + primitiveid_modulo == (uint32_t)(~0) ? + 0 : + (((uint64_t)1) << 32) / primitiveid_modulo + 1; + /* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES * LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */ num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 71a0aae1bea..5d054861adc 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -487,6 +487,10 @@ struct r600_lds_constant_buffer { uint32_t instance_base; uint32_t vertex_base; uint32_t draw_id; + + /* gl_PrimitiveID instanced compatibility */ + uint32_t primitiveid_modulo; + uint32_t primitiveid_inverse; }; struct r600_context { @@ -827,7 +831,8 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches, - const bool vertexid); + const bool vertexid, + const uint32_t primitiveid_modulo); uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned num_patches); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 9e6c279d9a5..43beeea2cbf 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2169,7 +2169,8 @@ r600_draw_parameters(struct r600_context *rctx, const bool is_mapped, const uint8_t **indirect_ptr, unsigned *num_patches, - unsigned *cs_space) + unsigned *cs_space, + const uint32_t primitiveid_modulo) { const bool draw_parameters_enabled = rctx->vs_shader->current->shader.vs_draw_parameters_enabled; @@ -2214,7 +2215,7 @@ r600_draw_parameters(struct r600_context *rctx, *cs_space += R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS * indirect->draw_count; } - evergreen_setup_tess_constants(rctx, info, num_patches, draw_parameters_enabled); + evergreen_setup_tess_constants(rctx, info, num_patches, draw_parameters_enabled, primitiveid_modulo); return unlikely(indirect) ? indirect->draw_count : @@ -2728,7 +2729,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info false, &indirect_ptr, &num_patches, - &cs_space); + &cs_space, + unlikely(info->instance_count > 1 && rctx->patch_vertices) ? + draws[0].count / rctx->patch_vertices : + ~0); r600_indirect_parameters_init(rctx, cs, indirect, @@ -2988,7 +2992,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info true, &indirect_ptr, &num_patches, - &cs_space); + &cs_space, + ~0); assert(radeon_check_cs(rctx, cs) || true); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index b831f54ca88..613204f4356 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -737,6 +737,47 @@ r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level) ; } +static bool +r600_lower_primitive_id_pass(nir_builder *b, + nir_intrinsic_instr *intrin, + UNUSED void *cb_data) +{ + if (intrin->intrinsic != nir_intrinsic_load_primitive_id) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + assert(offsetof(struct r600_lds_constant_buffer, primitiveid_inverse) - + offsetof(struct r600_lds_constant_buffer, primitiveid_modulo) == + sizeof(uint32_t)); + + nir_def *const primitive_id_raw = nir_load_primitive_id_raw_r600(b); + nir_def *const primitive_id_vec2 = nir_load_primitive_id_modulo_r600(b); + nir_def *const primitive_id_modulo = nir_channel(b, primitive_id_vec2, 0); + nir_def *const primitive_id_inverse = nir_channel(b, primitive_id_vec2, 1); + + /* This transformation is equivalent to: + * nir_umod(b, primitive_id_raw, primitive_id_modulo); */ + nir_def_rewrite_uses( + &intrin->def, + nir_isub(b, + primitive_id_raw, + nir_imul(b, + nir_umul_high(b, primitive_id_raw, primitive_id_inverse), + primitive_id_modulo))); + + return true; +} + +static bool +r600_lower_primitive_id(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, + r600_lower_primitive_id_pass, + nir_metadata_control_flow, + nullptr); +} + DEBUG_GET_ONCE_NUM_OPTION(skip_opt_start, "R600_SFN_SKIP_OPT_START", -1); DEBUG_GET_ONCE_NUM_OPTION(skip_opt_end, "R600_SFN_SKIP_OPT_END", -1); DEBUG_GET_ONCE_NUM_OPTION(skip_ra_start, "R600_SFN_SKIP_RA_START", -1); @@ -766,6 +807,12 @@ r600_lower_and_optimize_nir(nir_shader *sh, NIR_PASS(_, sh, nir_remove_dead_variables, nir_var_shader_out, 0); r600::sort_fsoutput(sh); } + + if (sh->info.stage == MESA_SHADER_TESS_CTRL || + sh->info.stage == MESA_SHADER_TESS_EVAL) { + NIR_PASS(_, sh, r600_lower_primitive_id); + } + nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in | nir_var_shader_out; NIR_PASS(_, sh, nir_opt_combine_stores, nir_var_shader_out); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 314ffc6a3ea..3d31835e45e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -930,6 +930,10 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr) case nir_intrinsic_load_draw_id: return emit_get_lds_info_uint(intr, offsetof(struct r600_lds_constant_buffer, draw_id)); + case nir_intrinsic_load_primitive_id_modulo_r600: + return emit_get_lds_info_uint2(intr, + offsetof(struct r600_lds_constant_buffer, + primitiveid_modulo)); case nir_intrinsic_barrier: return emit_barrier(intr); case nir_intrinsic_shared_atomic: @@ -1544,6 +1548,25 @@ Shader::emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset) return true; } +bool +Shader::emit_get_lds_info_uint2(nir_intrinsic_instr *instr, int offset) +{ + auto src = value_factory().temp_register(); + emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write)); + + auto dest = value_factory().dest_vec4(instr->def, pin_group); + auto fetch = new LoadFromBuffer(dest, + {0, 1, 7, 7}, + src, + offset, + R600_LDS_INFO_CONST_BUFFER, + nullptr, + fmt_32_32); + emit_instruction(fetch); + + return true; +} + bool Shader::emit_shader_clock(nir_intrinsic_instr *instr) { diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h index 5905d7a7be2..7e6b9a0c1d2 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader.h @@ -317,6 +317,7 @@ private: bool emit_local_load(nir_intrinsic_instr *instr); bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset); bool emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset); + bool emit_get_lds_info_uint2(nir_intrinsic_instr *instr, int offset); bool emit_group_barrier(nir_intrinsic_instr *intr); bool emit_shader_clock(nir_intrinsic_instr *instr); bool emit_wait_ack(); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp index 1880b5629d4..16c7d0ccb0c 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_tess.cpp @@ -30,7 +30,7 @@ TCSShader::do_scan_instruction(nir_instr *instr) nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); switch (ii->intrinsic) { - case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_primitive_id_raw_r600: m_sv_values.set(es_primitive_id); break; case nir_intrinsic_load_invocation_id: @@ -80,7 +80,7 @@ TCSShader::process_stage_intrinsic(nir_intrinsic_instr *instr) return emit_simple_mov(instr->def, 0, m_rel_patch_id); case nir_intrinsic_load_invocation_id: return emit_simple_mov(instr->def, 0, m_invocation_id); - case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_primitive_id_raw_r600: return emit_simple_mov(instr->def, 0, m_primitive_id); case nir_intrinsic_load_tcs_tess_factor_base_r600: return emit_simple_mov(instr->def, 0, m_tess_factor_base); @@ -158,7 +158,7 @@ TESShader::do_scan_instruction(nir_instr *instr) case nir_intrinsic_load_tess_coord_xy: m_sv_values.set(es_tess_coord); break; - case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_primitive_id_raw_r600: m_sv_values.set(es_primitive_id); break; case nir_intrinsic_load_tcs_rel_patch_id_r600: @@ -208,7 +208,7 @@ TESShader::process_stage_intrinsic(nir_intrinsic_instr *intr) case nir_intrinsic_load_tess_coord_xy: return emit_simple_mov(intr->def, 0, m_tess_coord[0], pin_none) && emit_simple_mov(intr->def, 1, m_tess_coord[1], pin_none); - case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_primitive_id_raw_r600: return emit_simple_mov(intr->def, 0, m_primitive_id); case nir_intrinsic_load_tcs_rel_patch_id_r600: return emit_simple_mov(intr->def, 0, m_rel_patch_id);