mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-11 21:00:17 +01:00
r600: implement gs indirect load_per_vertex_input
This functionality is useful with the software fp64 implementation. It allows running the remaining tests. Note: the same tests do not generate this indirect access on cayman which has the hardware fp64 implementation enabled. This change was tested on cypress, palm and barts. Here are the tests fixed: spec/arb_gpu_shader_fp64/execution/gs-isnan-dvec: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-array-copy: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-dmat4: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-dmat4-row-major: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-double-array-const-index: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-double-array-variable-index: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-double-bool-double: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-double-uniform-array-direct-indirect: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-doubles-float-mixed: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-dvec4-uniform-array-direct-indirect: fail pass spec/arb_gpu_shader_fp64/uniform_buffers/gs-nested-struct: fail pass Signed-off-by: Patrick Lerda <patrick9876@free.fr> Reviewed-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34926>
This commit is contained in:
parent
8df9e3b2d0
commit
1186c73c6b
4 changed files with 209 additions and 13 deletions
|
|
@ -1622,6 +1622,10 @@ intrinsic("load_local_shared_r600", src_comp=[0], dest_comp=0, indices = [], fla
|
|||
store("local_shared_r600", [1], [WRITE_MASK])
|
||||
store("tf_r600", [])
|
||||
|
||||
# these two definitions are aimed at r600 indirect per_vertex_input accesses
|
||||
intrinsic("r600_indirect_vertex_at_index", dest_comp=1, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
load("r600_indirect_per_vertex_input", [1, 1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# AMD GCN/RDNA specific intrinsics
|
||||
|
||||
# This barrier is a hint that prevents moving the instruction that computes
|
||||
|
|
|
|||
|
|
@ -697,11 +697,164 @@ r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
|
|||
}
|
||||
}
|
||||
|
||||
struct indirect_per_vertex {
|
||||
nir_deref_instr *array_indirect_deref;
|
||||
uint32_t mask;
|
||||
nir_instr *saved_for_removal[R600_GS_VERTEX_INDIRECT_TOTAL][4];
|
||||
unsigned obsolete_deref_count;
|
||||
nir_instr *obsolete_deref[32];
|
||||
};
|
||||
|
||||
static bool
|
||||
r600_nir_gs_load_deref_io_to_indirect_per_vertex_input(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
struct indirect_per_vertex *indirect_per_vertex =
|
||||
(struct indirect_per_vertex *)cb_data;
|
||||
unsigned j;
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
if (!nir_deref_mode_is_one_of(deref, nir_var_shader_in))
|
||||
return false;
|
||||
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
|
||||
|
||||
if (!is_arrayed || var->data.location != VARYING_SLOT_POS)
|
||||
return false;
|
||||
|
||||
nir_def *array_index = deref->arr.index.ssa;
|
||||
|
||||
if (!array_index)
|
||||
return false;
|
||||
|
||||
assert(intrin->def.num_components == 4);
|
||||
|
||||
nir_deref_instr *original_array = nir_instr_as_deref(
|
||||
nir_instr_as_deref(intrin->src[0].ssa->parent_instr)->parent.ssa->parent_instr);
|
||||
|
||||
if (!original_array || original_array->deref_type != nir_deref_type_var ||
|
||||
!glsl_type_is_array(original_array->type))
|
||||
return false;
|
||||
|
||||
auto element_type = glsl_without_array(original_array->type);
|
||||
|
||||
if (element_type != &glsl_type_builtin_vec4)
|
||||
return false;
|
||||
|
||||
const unsigned array_length = glsl_get_length(original_array->type);
|
||||
|
||||
assert(array_length <= R600_GS_VERTEX_INDIRECT_TOTAL);
|
||||
|
||||
for (j = 0; j < indirect_per_vertex->obsolete_deref_count &&
|
||||
j < ARRAY_SIZE(indirect_per_vertex->obsolete_deref);
|
||||
j++)
|
||||
if (intrin->src[0].ssa->parent_instr == indirect_per_vertex->obsolete_deref[j])
|
||||
break;
|
||||
|
||||
if (j == indirect_per_vertex->obsolete_deref_count &&
|
||||
j != ARRAY_SIZE(indirect_per_vertex->obsolete_deref)) {
|
||||
indirect_per_vertex->obsolete_deref[j] = intrin->src[0].ssa->parent_instr;
|
||||
indirect_per_vertex->obsolete_deref_count++;
|
||||
}
|
||||
|
||||
/* The next block generates a global array which is required
|
||||
* for the indirect access. This array is located at the
|
||||
* beginning. All the possible elements are generated. At the
|
||||
* end, the elements which are not referenced are removed. */
|
||||
if (!indirect_per_vertex->array_indirect_deref) {
|
||||
static const char array_indirect_name[] = "r600_indirect_vertex_at_index";
|
||||
|
||||
b->cursor = nir_before_block(nir_start_block(b->impl));
|
||||
|
||||
nir_variable *array_indirect_var = nir_local_variable_create(
|
||||
b->impl,
|
||||
glsl_array_type(glsl_int_type(), R600_GS_VERTEX_INDIRECT_TOTAL, 0),
|
||||
array_indirect_name);
|
||||
indirect_per_vertex->array_indirect_deref =
|
||||
nir_build_deref_var(b, array_indirect_var);
|
||||
|
||||
for (unsigned k = 0; k < R600_GS_VERTEX_INDIRECT_TOTAL; k++) {
|
||||
nir_def *build_count = nir_imm_int(b, k);
|
||||
nir_deref_instr *build_array =
|
||||
nir_build_deref_array(b,
|
||||
indirect_per_vertex->array_indirect_deref,
|
||||
build_count);
|
||||
nir_def *build_store =
|
||||
nir_r600_indirect_vertex_at_index(b, intrin->def.bit_size, build_count);
|
||||
nir_store_deref(b, build_array, build_store, 1);
|
||||
indirect_per_vertex->saved_for_removal[k][0] = build_count->parent_instr;
|
||||
indirect_per_vertex->saved_for_removal[k][1] = &build_array->instr;
|
||||
indirect_per_vertex->saved_for_removal[k][2] = build_store->parent_instr;
|
||||
indirect_per_vertex->saved_for_removal[k][3] =
|
||||
nir_instr_next(build_store->parent_instr); // nir_store_deref
|
||||
}
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
for (unsigned k = 0; k < array_length; k++)
|
||||
indirect_per_vertex->mask |= (1 << k);
|
||||
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *array_indirect_def = nir_load_deref(
|
||||
b,
|
||||
nir_build_deref_array(b, indirect_per_vertex->array_indirect_deref, array_index));
|
||||
nir_def *load = nir_load_r600_indirect_per_vertex_input(b,
|
||||
intrin->def.num_components,
|
||||
intrin->def.bit_size,
|
||||
array_indirect_def,
|
||||
zero);
|
||||
|
||||
nir_intrinsic_set_base(nir_instr_as_intrinsic(load->parent_instr),
|
||||
var->data.driver_location);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, load);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
r600_gs_load_deref_io_to_indirect_per_vertex_input(nir_shader *shader)
|
||||
{
|
||||
struct indirect_per_vertex indirect_per_vertex = {nullptr};
|
||||
bool ret =
|
||||
nir_shader_intrinsics_pass(shader,
|
||||
r600_nir_gs_load_deref_io_to_indirect_per_vertex_input,
|
||||
nir_metadata_control_flow,
|
||||
&indirect_per_vertex);
|
||||
|
||||
if (indirect_per_vertex.array_indirect_deref) {
|
||||
for (unsigned k = 0; k < R600_GS_VERTEX_INDIRECT_TOTAL; k++)
|
||||
if ((indirect_per_vertex.mask & (1 << k)) == 0) {
|
||||
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][3]);
|
||||
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][2]);
|
||||
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][1]);
|
||||
nir_instr_remove(indirect_per_vertex.saved_for_removal[k][0]);
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < indirect_per_vertex.obsolete_deref_count; k++)
|
||||
nir_instr_remove(indirect_per_vertex.obsolete_deref[k]);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level)
|
||||
{
|
||||
const int nir_lower_flrp_mask = 16 | 32 | 64;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS(_, nir, r600_gs_load_deref_io_to_indirect_per_vertex_input);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_flrp, nir_lower_flrp_mask, false);
|
||||
|
||||
nir_lower_idiv_options idiv_options = {0};
|
||||
|
|
|
|||
|
|
@ -29,7 +29,10 @@ GeometryShader::do_scan_instruction(nir_instr *instr)
|
|||
case nir_intrinsic_store_output:
|
||||
return process_store_output(ii);
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_r600_indirect_per_vertex_input:
|
||||
return process_load_input(ii);
|
||||
case nir_intrinsic_r600_indirect_vertex_at_index:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -165,7 +168,11 @@ GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_load_invocation_id:
|
||||
return emit_simple_mov(intr->def, 0, m_invocation_id);
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
return emit_load_per_vertex_input(intr);
|
||||
return emit_load_per_vertex_input_direct(intr);
|
||||
case nir_intrinsic_load_r600_indirect_per_vertex_input:
|
||||
return emit_load_per_vertex_input_indirect(intr);
|
||||
case nir_intrinsic_r600_indirect_vertex_at_index:
|
||||
return emit_indirect_vertex_at_index(intr);
|
||||
default:;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -294,7 +301,43 @@ GeometryShader::store_output(nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
bool
|
||||
GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr *instr)
|
||||
GeometryShader::emit_indirect_vertex_at_index(nir_intrinsic_instr *instr)
|
||||
{
|
||||
auto dest = value_factory().dest(instr->def, 0, pin_free);
|
||||
auto literal_index = nir_src_as_const_value(instr->src[0]);
|
||||
|
||||
assert(literal_index);
|
||||
assert(literal_index->u32 < R600_GS_VERTEX_INDIRECT_TOTAL);
|
||||
|
||||
auto addr = m_per_vertex_offsets[literal_index->u32];
|
||||
|
||||
auto ir = new AluInstr(op1_mov, dest, addr, AluInstr::write);
|
||||
emit_instruction(ir);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GeometryShader::emit_load_per_vertex_input_direct(nir_intrinsic_instr *instr)
|
||||
{
|
||||
auto literal_index = nir_src_as_const_value(instr->src[0]);
|
||||
assert(literal_index);
|
||||
assert(literal_index->u32 < R600_GS_VERTEX_INDIRECT_TOTAL);
|
||||
assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
|
||||
|
||||
return load_per_vertex_input_at_addr(instr, m_per_vertex_offsets[literal_index->u32]);
|
||||
}
|
||||
|
||||
bool
|
||||
GeometryShader::emit_load_per_vertex_input_indirect(nir_intrinsic_instr *instr)
|
||||
{
|
||||
return load_per_vertex_input_at_addr(
|
||||
instr,
|
||||
value_factory().src(instr->src[0], 0)->as_register());
|
||||
}
|
||||
|
||||
bool
|
||||
GeometryShader::load_per_vertex_input_at_addr(nir_intrinsic_instr *instr, PRegister addr)
|
||||
{
|
||||
auto dest = value_factory().dest_vec4(instr->def, pin_group);
|
||||
|
||||
|
|
@ -303,19 +346,9 @@ GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr *instr)
|
|||
dest_swz[i] = i + nir_intrinsic_component(instr);
|
||||
}
|
||||
|
||||
auto literal_index = nir_src_as_const_value(instr->src[0]);
|
||||
|
||||
if (!literal_index) {
|
||||
sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
|
||||
return false;
|
||||
}
|
||||
assert(literal_index->u32 < R600_GS_VERTEX_INDIRECT_TOTAL);
|
||||
assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
|
||||
|
||||
EVTXDataFormat fmt =
|
||||
chip_class() >= ISA_CC_EVERGREEN ? fmt_invalid : fmt_32_32_32_32_float;
|
||||
|
||||
auto addr = m_per_vertex_offsets[literal_index->u32];
|
||||
auto fetch = new LoadFromBuffer(dest,
|
||||
dest_swz,
|
||||
addr,
|
||||
|
|
|
|||
|
|
@ -34,7 +34,13 @@ private:
|
|||
|
||||
void emit_adj_fix();
|
||||
|
||||
bool emit_load_per_vertex_input(nir_intrinsic_instr *instr);
|
||||
bool emit_indirect_vertex_at_index(nir_intrinsic_instr *instr);
|
||||
|
||||
bool emit_load_per_vertex_input_direct(nir_intrinsic_instr *instr);
|
||||
|
||||
bool emit_load_per_vertex_input_indirect(nir_intrinsic_instr *instr);
|
||||
|
||||
bool load_per_vertex_input_at_addr(nir_intrinsic_instr *instr, PRegister addr);
|
||||
|
||||
bool load_input(UNUSED nir_intrinsic_instr *intr) override
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue