diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index e008c356fe7..255f6c8f379 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -21,11 +21,12 @@ unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader) } else if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) return 2; else { - /* We always build up all three indices for the prim export - * independent of the primitive type. The additional garbage - * data shouldn't hurt. This is used by exports and streamout. + /* The shader compiler replaces 0 with 3. The generated code will be correct regardless + * of the draw primitive type, but it's less efficient. + * + * Computing prim export values for non-existent vertices has no effect. */ - return 3; + return 0; /* unknown */ } } else { assert(shader->selector->stage == MESA_SHADER_TESS_EVAL); diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index cf1f846842f..ad57e18faa4 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -32,38 +32,6 @@ nir_def *si_nir_load_internal_binding(nir_builder *b, struct si_shader_args *arg return nir_load_smem_amd(b, num_components, addr, nir_imm_int(b, slot * 16)); } -static nir_def *get_num_vert_per_prim(nir_builder *b, struct si_shader *shader, - struct si_shader_args *args) -{ - const struct si_shader_info *info = &shader->selector->info; - gl_shader_stage stage = shader->selector->stage; - - unsigned num_vertices; - if (stage == MESA_SHADER_GEOMETRY) { - num_vertices = mesa_vertices_per_prim(info->base.gs.output_primitive); - } else if (stage == MESA_SHADER_VERTEX) { - if (info->base.vs.blit_sgprs_amd) - num_vertices = 3; - else if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) - num_vertices = 2; - else { - /* Extract OUTPRIM field. */ - nir_def *num = GET_FIELD_NIR(GS_STATE_OUTPRIM); - return nir_iadd_imm(b, num, 1); - } - } else { - assert(stage == MESA_SHADER_TESS_EVAL); - - if (info->base.tess.point_mode) - num_vertices = 1; - else if (info->base.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) - num_vertices = 2; - else - num_vertices = 3; - } - return nir_imm_int(b, num_vertices); -} - static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader, struct si_shader_args *args) { @@ -411,9 +379,14 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s replacement = nir_load_smem_amd(b, 4, addr, nir_imm_int(b, offset)); break; } - case nir_intrinsic_load_num_vertices_per_primitive_amd: - replacement = get_num_vert_per_prim(b, shader, args); + case nir_intrinsic_load_num_vertices_per_primitive_amd: { + unsigned num_vertices = gfx10_ngg_get_vertices_per_prim(shader); + if (num_vertices) + replacement = nir_imm_int(b, num_vertices); + else + replacement = nir_iadd_imm(b, GET_FIELD_NIR(GS_STATE_OUTPRIM), 1); break; + } case nir_intrinsic_load_cull_ccw_amd: /* radeonsi embed cw/ccw info into front/back face enabled */ replacement = nir_imm_false(b); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1aa1b037687..06d22a6cd78 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1966,8 +1966,9 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir) unsigned clip_plane_enable = SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(key->ge.opt.ngg_culling); + unsigned num_vertices = gfx10_ngg_get_vertices_per_prim(shader); - options.num_vertices_per_primitive = gfx10_ngg_get_vertices_per_prim(shader); + options.num_vertices_per_primitive = num_vertices ? num_vertices : 3; options.early_prim_export = gfx10_ngg_export_prim_early(shader); options.passthrough = gfx10_is_ngg_passthrough(shader); options.use_edgeflags = gfx10_edgeflags_have_effect(shader);