mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 09:20:12 +01:00
brw: enable A64 loads source rematerialization
Allows to avoid Wa_1407528679 on A64 loads Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29663>
This commit is contained in:
parent
f482fc33cf
commit
339630ab05
1 changed files with 118 additions and 20 deletions
|
|
@ -57,7 +57,7 @@ struct nir_to_brw_state {
|
||||||
fs_reg *ssa_values;
|
fs_reg *ssa_values;
|
||||||
fs_inst **resource_insts;
|
fs_inst **resource_insts;
|
||||||
struct brw_fs_bind_info *ssa_bind_infos;
|
struct brw_fs_bind_info *ssa_bind_infos;
|
||||||
fs_reg *resource_values;
|
fs_reg *uniform_values;
|
||||||
fs_reg *system_values;
|
fs_reg *system_values;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -395,7 +395,7 @@ fs_nir_emit_impl(nir_to_brw_state &ntb, nir_function_impl *impl)
|
||||||
ntb.ssa_values = rzalloc_array(ntb.mem_ctx, fs_reg, impl->ssa_alloc);
|
ntb.ssa_values = rzalloc_array(ntb.mem_ctx, fs_reg, impl->ssa_alloc);
|
||||||
ntb.resource_insts = rzalloc_array(ntb.mem_ctx, fs_inst *, impl->ssa_alloc);
|
ntb.resource_insts = rzalloc_array(ntb.mem_ctx, fs_inst *, impl->ssa_alloc);
|
||||||
ntb.ssa_bind_infos = rzalloc_array(ntb.mem_ctx, struct brw_fs_bind_info, impl->ssa_alloc);
|
ntb.ssa_bind_infos = rzalloc_array(ntb.mem_ctx, struct brw_fs_bind_info, impl->ssa_alloc);
|
||||||
ntb.resource_values = rzalloc_array(ntb.mem_ctx, fs_reg, impl->ssa_alloc);
|
ntb.uniform_values = rzalloc_array(ntb.mem_ctx, fs_reg, impl->ssa_alloc);
|
||||||
|
|
||||||
fs_nir_emit_cf_list(ntb, &impl->body);
|
fs_nir_emit_cf_list(ntb, &impl->body);
|
||||||
}
|
}
|
||||||
|
|
@ -1863,7 +1863,7 @@ get_resource_nir_src(nir_to_brw_state &ntb, const nir_src &src)
|
||||||
{
|
{
|
||||||
if (!is_resource_src(src))
|
if (!is_resource_src(src))
|
||||||
return fs_reg();
|
return fs_reg();
|
||||||
return ntb.resource_values[src.ssa->index];
|
return ntb.uniform_values[src.ssa->index];
|
||||||
}
|
}
|
||||||
|
|
||||||
static fs_reg
|
static fs_reg
|
||||||
|
|
@ -4578,7 +4578,8 @@ add_rebuild_src(nir_src *src, void *state)
|
||||||
}
|
}
|
||||||
|
|
||||||
static fs_reg
|
static fs_reg
|
||||||
try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def *resource_def)
|
try_rebuild_source(nir_to_brw_state &ntb, const brw::fs_builder &bld,
|
||||||
|
nir_def *resource_def, bool a64 = false)
|
||||||
{
|
{
|
||||||
/* Create a build at the location of the resource_intel intrinsic */
|
/* Create a build at the location of the resource_intel intrinsic */
|
||||||
fs_builder ubld8 = bld.exec_all().group(8, 0);
|
fs_builder ubld8 = bld.exec_all().group(8, 0);
|
||||||
|
|
@ -4605,11 +4606,32 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
unsigned base_offset = nir_intrinsic_base(intrin);
|
unsigned base_offset = nir_intrinsic_base(intrin);
|
||||||
unsigned load_offset = nir_src_as_uint(intrin->src[0]);
|
unsigned load_offset = nir_src_as_uint(intrin->src[0]);
|
||||||
fs_reg src(UNIFORM, base_offset / 4,
|
fs_reg src(UNIFORM, base_offset / 4,
|
||||||
brw_type_with_size(BRW_TYPE_UD, intrin->def.bit_size));
|
brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size));
|
||||||
src.offset = load_offset + base_offset % 4;
|
src.offset = load_offset + base_offset % 4;
|
||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_mesh_inline_data_intel: {
|
||||||
|
assert(ntb.s.stage == MESA_SHADER_MESH ||
|
||||||
|
ntb.s.stage == MESA_SHADER_TASK);
|
||||||
|
const task_mesh_thread_payload &payload = ntb.s.task_mesh_payload();
|
||||||
|
fs_reg data = offset(payload.inline_parameter, 1,
|
||||||
|
nir_intrinsic_align_offset(intrin));
|
||||||
|
return retype(data, brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_local_arg_addr_intel: {
|
||||||
|
assert(brw_shader_stage_is_bindless(ntb.s.stage));
|
||||||
|
const bs_thread_payload &payload = ntb.s.bs_payload();
|
||||||
|
return retype(payload.local_arg_ptr, BRW_TYPE_Q);
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_global_arg_addr_intel: {
|
||||||
|
assert(brw_shader_stage_is_bindless(ntb.s.stage));
|
||||||
|
const bs_thread_payload &payload = ntb.s.bs_payload();
|
||||||
|
return retype(payload.global_arg_ptr, BRW_TYPE_Q);
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
/* Execute the code below, since we have to generate new
|
/* Execute the code below, since we have to generate new
|
||||||
* instructions.
|
* instructions.
|
||||||
|
|
@ -4620,7 +4642,7 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "Trying remat : ");
|
fprintf(stderr, "Trying remat :\n");
|
||||||
for (unsigned i = 0; i < resources.array.size(); i++) {
|
for (unsigned i = 0; i < resources.array.size(); i++) {
|
||||||
fprintf(stderr, " ");
|
fprintf(stderr, " ");
|
||||||
nir_print_instr(resources.array[i]->parent_instr, stderr);
|
nir_print_instr(resources.array[i]->parent_instr, stderr);
|
||||||
|
|
@ -4636,7 +4658,7 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
case nir_instr_type_load_const: {
|
case nir_instr_type_load_const: {
|
||||||
nir_load_const_instr *load_const =
|
nir_load_const_instr *load_const =
|
||||||
nir_instr_as_load_const(instr);
|
nir_instr_as_load_const(instr);
|
||||||
ubld8.MOV(brw_imm_ud(load_const->value[0].i32),
|
ubld8.MOV(brw_imm_d(load_const->value[0].i32),
|
||||||
&ntb.resource_insts[def->index]);
|
&ntb.resource_insts[def->index]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -4680,11 +4702,45 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
&ntb.resource_insts[def->index]);
|
&ntb.resource_insts[def->index]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case nir_op_iand:
|
||||||
|
ubld8.AND(srcs[0], srcs[1], &ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
case nir_op_ishl:
|
case nir_op_ishl:
|
||||||
ubld8.SHL(srcs[0], srcs[1], &ntb.resource_insts[def->index]);
|
ubld8.SHL(srcs[0], srcs[1], &ntb.resource_insts[def->index]);
|
||||||
break;
|
break;
|
||||||
case nir_op_mov:
|
case nir_op_mov:
|
||||||
break;
|
break;
|
||||||
|
case nir_op_ult32: {
|
||||||
|
if (brw_type_size_bits(srcs[0].type) != 32)
|
||||||
|
break;
|
||||||
|
fs_reg dst = ubld8.vgrf(srcs[0].type);
|
||||||
|
enum brw_reg_type utype =
|
||||||
|
brw_type_with_size(srcs[0].type,
|
||||||
|
brw_type_size_bits(srcs[0].type));
|
||||||
|
ntb.resource_insts[def->index] =
|
||||||
|
ubld8.CMP(dst,
|
||||||
|
retype(srcs[0], utype),
|
||||||
|
retype(srcs[1], utype),
|
||||||
|
brw_cmod_for_nir_comparison(alu->op));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case nir_op_b2i32:
|
||||||
|
ubld8.MOV(negate(retype(srcs[0], BRW_TYPE_D)),
|
||||||
|
&ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
|
case nir_op_unpack_64_2x32_split_x:
|
||||||
|
ubld8.MOV(subscript(srcs[0], BRW_TYPE_D, 0),
|
||||||
|
&ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
|
case nir_op_unpack_64_2x32_split_y:
|
||||||
|
ubld8.MOV(subscript(srcs[0], BRW_TYPE_D, 1),
|
||||||
|
&ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
|
case nir_op_pack_64_2x32_split: {
|
||||||
|
fs_reg dst = ubld8.vgrf(BRW_TYPE_Q);
|
||||||
|
ntb.resource_insts[def->index] =
|
||||||
|
ubld8.emit(FS_OPCODE_PACK, dst, srcs[0], srcs[1]);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -4712,9 +4768,37 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_mesh_inline_data_intel: {
|
||||||
|
assert(ntb.s.stage == MESA_SHADER_MESH ||
|
||||||
|
ntb.s.stage == MESA_SHADER_TASK);
|
||||||
|
const task_mesh_thread_payload &payload = ntb.s.task_mesh_payload();
|
||||||
|
fs_reg data = retype(
|
||||||
|
offset(payload.inline_parameter, 1,
|
||||||
|
nir_intrinsic_align_offset(intrin)),
|
||||||
|
brw_type_with_size(BRW_TYPE_D, intrin->def.bit_size));
|
||||||
|
ubld8.MOV(data, &ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_local_arg_addr_intel: {
|
||||||
|
assert(brw_shader_stage_is_bindless(ntb.s.stage));
|
||||||
|
const bs_thread_payload &payload = ntb.s.bs_payload();
|
||||||
|
ubld8.MOV(retype(payload.local_arg_ptr, BRW_TYPE_Q),
|
||||||
|
&ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_btd_global_arg_addr_intel: {
|
||||||
|
assert(brw_shader_stage_is_bindless(ntb.s.stage));
|
||||||
|
const bs_thread_payload &payload = ntb.s.bs_payload();
|
||||||
|
ubld8.MOV(retype(payload.global_arg_ptr, BRW_TYPE_Q),
|
||||||
|
&ntb.resource_insts[def->index]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_reloc_const_intel: {
|
case nir_intrinsic_load_reloc_const_intel: {
|
||||||
uint32_t id = nir_intrinsic_param_idx(intrin);
|
uint32_t id = nir_intrinsic_param_idx(intrin);
|
||||||
fs_reg dst = ubld8.vgrf(BRW_TYPE_UD);
|
fs_reg dst = ubld8.vgrf(BRW_TYPE_D);
|
||||||
ntb.resource_insts[def->index] =
|
ntb.resource_insts[def->index] =
|
||||||
ubld8.emit(SHADER_OPCODE_MOV_RELOC_IMM, dst,
|
ubld8.emit(SHADER_OPCODE_MOV_RELOC_IMM, dst,
|
||||||
brw_imm_ud(id), brw_imm_ud(0));
|
brw_imm_ud(id), brw_imm_ud(0));
|
||||||
|
|
@ -4749,7 +4833,8 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
|
|
||||||
if (ntb.resource_insts[def->index] == NULL) {
|
if (ntb.resource_insts[def->index] == NULL) {
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "Tried remat : ");
|
if (a64) {
|
||||||
|
fprintf(stderr, "Tried remat :\n");
|
||||||
for (unsigned i = 0; i < resources.array.size(); i++) {
|
for (unsigned i = 0; i < resources.array.size(); i++) {
|
||||||
fprintf(stderr, " ");
|
fprintf(stderr, " ");
|
||||||
nir_print_instr(resources.array[i]->parent_instr, stderr);
|
nir_print_instr(resources.array[i]->parent_instr, stderr);
|
||||||
|
|
@ -4758,6 +4843,7 @@ try_rebuild_resource(nir_to_brw_state &ntb, const brw::fs_builder &bld, nir_def
|
||||||
fprintf(stderr, "failed at! : ");
|
fprintf(stderr, "failed at! : ");
|
||||||
nir_print_instr(instr, stderr);
|
nir_print_instr(instr, stderr);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return fs_reg();
|
return fs_reg();
|
||||||
}
|
}
|
||||||
|
|
@ -4907,8 +4993,10 @@ choose_oword_block_size_dwords(const struct intel_device_info *devinfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
static fs_reg
|
static fs_reg
|
||||||
increment_a64_address(const fs_builder &bld, fs_reg address, uint32_t v)
|
increment_a64_address(const fs_builder &_bld, fs_reg address, uint32_t v, bool use_no_mask)
|
||||||
{
|
{
|
||||||
|
const fs_builder bld = use_no_mask ? _bld.exec_all().group(8, 0) : _bld;
|
||||||
|
|
||||||
if (bld.shader->devinfo->has_64bit_int) {
|
if (bld.shader->devinfo->has_64bit_int) {
|
||||||
struct brw_reg imm = brw_imm_reg(address.type);
|
struct brw_reg imm = brw_imm_reg(address.type);
|
||||||
imm.u64 = v;
|
imm.u64 = v;
|
||||||
|
|
@ -5724,10 +5812,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
|
|
||||||
if (nir_intrinsic_resource_access_intel(instr) &
|
if (nir_intrinsic_resource_access_intel(instr) &
|
||||||
nir_resource_intel_non_uniform) {
|
nir_resource_intel_non_uniform) {
|
||||||
ntb.resource_values[instr->def.index] = fs_reg();
|
ntb.uniform_values[instr->def.index] = fs_reg();
|
||||||
} else {
|
} else {
|
||||||
ntb.resource_values[instr->def.index] =
|
ntb.uniform_values[instr->def.index] =
|
||||||
try_rebuild_resource(ntb, bld, instr->src[1].ssa);
|
try_rebuild_source(ntb, bld, instr->src[1].ssa);
|
||||||
}
|
}
|
||||||
ntb.ssa_values[instr->def.index] =
|
ntb.ssa_values[instr->def.index] =
|
||||||
ntb.ssa_values[instr->src[1].ssa->index];
|
ntb.ssa_values[instr->src[1].ssa->index];
|
||||||
|
|
@ -6529,9 +6617,16 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
const fs_builder ubld8 = bld.exec_all().group(8, 0);
|
const fs_builder ubld8 = bld.exec_all().group(8, 0);
|
||||||
const fs_builder ubld16 = bld.exec_all().group(16, 0);
|
const fs_builder ubld16 = bld.exec_all().group(16, 0);
|
||||||
|
|
||||||
|
ntb.uniform_values[instr->src[0].ssa->index] =
|
||||||
|
try_rebuild_source(ntb, bld, instr->src[0].ssa, true);
|
||||||
|
bool no_mask = ntb.uniform_values[instr->src[0].ssa->index].file != BAD_FILE;
|
||||||
|
fs_reg address =
|
||||||
|
ntb.uniform_values[instr->src[0].ssa->index].file != BAD_FILE ?
|
||||||
|
ntb.uniform_values[instr->src[0].ssa->index] :
|
||||||
|
bld.emit_uniformize(get_nir_src(ntb, instr->src[0]));
|
||||||
|
|
||||||
const fs_reg packed_consts =
|
const fs_reg packed_consts =
|
||||||
ubld1.vgrf(BRW_TYPE_UD, total_dwords);
|
ubld1.vgrf(BRW_TYPE_UD, total_dwords);
|
||||||
fs_reg address = bld.emit_uniformize(get_nir_src(ntb, instr->src[0]));
|
|
||||||
|
|
||||||
while (loaded_dwords < total_dwords) {
|
while (loaded_dwords < total_dwords) {
|
||||||
const unsigned block =
|
const unsigned block =
|
||||||
|
|
@ -6546,12 +6641,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */
|
srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */
|
||||||
srcs[A64_LOGICAL_ARG] = brw_imm_ud(block);
|
srcs[A64_LOGICAL_ARG] = brw_imm_ud(block);
|
||||||
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0);
|
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0);
|
||||||
ubld.emit(SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
fs_inst *inst =
|
||||||
retype(byte_offset(packed_consts, loaded_dwords * 4), BRW_TYPE_UD),
|
ubld.emit(SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
||||||
srcs, A64_LOGICAL_NUM_SRCS)->size_written =
|
retype(byte_offset(packed_consts, loaded_dwords * 4), BRW_TYPE_UD),
|
||||||
|
srcs, A64_LOGICAL_NUM_SRCS);
|
||||||
|
inst->size_written =
|
||||||
align(block_bytes, REG_SIZE * reg_unit(devinfo));
|
align(block_bytes, REG_SIZE * reg_unit(devinfo));
|
||||||
|
inst->has_no_mask_send_params = no_mask;
|
||||||
|
|
||||||
address = increment_a64_address(ubld1, address, block_bytes);
|
address = increment_a64_address(ubld1, address, block_bytes, no_mask);
|
||||||
loaded_dwords += block;
|
loaded_dwords += block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -7397,7 +7495,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
retype(byte_offset(dest, loaded * 4), BRW_TYPE_UD),
|
retype(byte_offset(dest, loaded * 4), BRW_TYPE_UD),
|
||||||
srcs, A64_LOGICAL_NUM_SRCS)->size_written = block_bytes;
|
srcs, A64_LOGICAL_NUM_SRCS)->size_written = block_bytes;
|
||||||
|
|
||||||
address = increment_a64_address(ubld1, address, block_bytes);
|
address = increment_a64_address(ubld1, address, block_bytes, false);
|
||||||
loaded += block;
|
loaded += block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -7434,7 +7532,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
srcs, A64_LOGICAL_NUM_SRCS);
|
srcs, A64_LOGICAL_NUM_SRCS);
|
||||||
|
|
||||||
const unsigned block_bytes = block * 4;
|
const unsigned block_bytes = block * 4;
|
||||||
address = increment_a64_address(ubld1, address, block_bytes);
|
address = increment_a64_address(ubld1, address, block_bytes, false);
|
||||||
written += block;
|
written += block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue