From 79af0ac29a18046a4d02a7f5259e1fa005bb098a Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Thu, 16 Feb 2023 20:30:30 -0800 Subject: [PATCH] intel/compiler: Add gather4_i/l/[_c]/b sampler message v2: (Ian) - Format comment Reviewed-by: Ian Romanick Signed-off-by: Sagar Ghuge Part-of: --- src/intel/compiler/brw_disasm.c | 5 +++ src/intel/compiler/brw_eu_defines.h | 11 ++++++ src/intel/compiler/brw_fs.cpp | 18 ++++++++++ .../compiler/brw_fs_copy_propagation.cpp | 3 ++ src/intel/compiler/brw_fs_cse.cpp | 3 ++ src/intel/compiler/brw_fs_generator.cpp | 22 ++++++++++++ .../compiler/brw_fs_lower_simd_width.cpp | 3 ++ src/intel/compiler/brw_fs_nir.cpp | 24 +++++++++++-- src/intel/compiler/brw_ir_performance.cpp | 3 ++ .../compiler/brw_lower_logical_sends.cpp | 36 ++++++++++++++++++- src/intel/compiler/brw_shader.cpp | 12 +++++++ 11 files changed, 136 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index b70ee663a9a..76349a8d540 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -641,6 +641,11 @@ static const char *const xe2_sampler_msg_type[] = { [GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po", [XE2_SAMPLER_MESSAGE_SAMPLE_MLOD] = "sample_mlod", [XE2_SAMPLER_MESSAGE_SAMPLE_COMPARE_MLOD] = "sample_c_mlod", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I] = "gather4_i", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L] = "gather4_l", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B] = "gather4_b", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C] = "gather4_i_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C] = "gather4_l_c", [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", [GFX9_SAMPLER_MESSAGE_SAMPLE_LZ] = "sample_lz", [GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ] = "sample_c_lz", diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 0302334014d..41432028743 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -343,6 +343,12 @@ enum opcode { SHADER_OPCODE_LOD_LOGICAL, SHADER_OPCODE_TG4, SHADER_OPCODE_TG4_LOGICAL, + SHADER_OPCODE_TG4_IMPLICIT_LOD, + SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL, + SHADER_OPCODE_TG4_EXPLICIT_LOD, + SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL, + SHADER_OPCODE_TG4_BIAS, + SHADER_OPCODE_TG4_BIAS_LOGICAL, SHADER_OPCODE_TG4_OFFSET, SHADER_OPCODE_TG4_OFFSET_LOGICAL, SHADER_OPCODE_SAMPLEINFO, @@ -1480,12 +1486,17 @@ enum brw_message_target { #define GFX5_SAMPLER_MESSAGE_LOD 9 #define GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 #define GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO 11 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L 13 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B 14 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I 15 #define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C 16 #define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17 #define GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18 #define XE2_SAMPLER_MESSAGE_SAMPLE_MLOD 18 #define XE2_SAMPLER_MESSAGE_SAMPLE_COMPARE_MLOD 19 #define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C 21 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C 23 #define GFX9_SAMPLER_MESSAGE_SAMPLE_LZ 24 #define GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ 25 #define GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ 26 diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 567e03eda02..1ce40c5d161 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -274,6 +274,9 @@ fs_inst::is_control_source(unsigned arg) const case SHADER_OPCODE_LOD: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_SAMPLEINFO: return arg == 1 || arg == 2; @@ -315,6 +318,9 @@ fs_inst::is_payload(unsigned arg) const case SHADER_OPCODE_LOD: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_SAMPLEINFO: return arg == 0; @@ -719,6 +725,9 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_LOD_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: case SHADER_OPCODE_SAMPLEINFO_LOGICAL: assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM && src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM && @@ -950,6 +959,9 @@ fs_inst::size_read(int arg) const case SHADER_OPCODE_LOD: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_SAMPLEINFO: if (arg == 0 && src[0].file == VGRF) return mlen * REG_SIZE; @@ -1080,6 +1092,9 @@ fs_inst::implied_mrf_writes() const case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_LOD: @@ -1115,6 +1130,9 @@ fs_inst::has_sampler_residency() const case SHADER_OPCODE_TXS_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: assert(src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM); return src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0; default: diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 61522d70cd2..15208766835 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -1221,6 +1221,9 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst, case SHADER_OPCODE_TXF_UMS_LOGICAL: case SHADER_OPCODE_TXF_MCS_LOGICAL: case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: case SHADER_OPCODE_SAMPLEINFO_LOGICAL: diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 460d97c5770..78c36cb5b63 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -93,6 +93,9 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case SHADER_OPCODE_TXF_MCS_LOGICAL: case SHADER_OPCODE_LOD_LOGICAL: case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: case FS_OPCODE_PACK: return true; diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 2525c415ce5..c8867bcf06d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1064,6 +1064,25 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, assert(!inst->shadow_compare); msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4; break; + case SHADER_OPCODE_TG4_BIAS: + assert(devinfo->ver >= 20); + assert(!inst->shadow_compare); + msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B; + break; + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + assert(devinfo->ver >= 20); + if (inst->shadow_compare) + msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C; + else + msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L; + break; + case SHADER_OPCODE_TG4_IMPLICIT_LOD: + assert(devinfo->ver >= 20); + if (inst->shadow_compare) + msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C; + else + msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I; + break; case SHADER_OPCODE_SAMPLEINFO: msg_type = GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; break; @@ -2108,6 +2127,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, case SHADER_OPCODE_TXS: case SHADER_OPCODE_LOD: case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_SAMPLEINFO: assert(inst->src[0].file == BAD_FILE); generate_tex(inst, dst, src[1], src[2]); diff --git a/src/intel/compiler/brw_fs_lower_simd_width.cpp b/src/intel/compiler/brw_fs_lower_simd_width.cpp index 2a895bfb42a..76bf5efb166 100644 --- a/src/intel/compiler/brw_fs_lower_simd_width.cpp +++ b/src/intel/compiler/brw_fs_lower_simd_width.cpp @@ -550,6 +550,9 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst) case SHADER_OPCODE_SAMPLEINFO_LOGICAL: case SHADER_OPCODE_TXF_CMS_W_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: return get_sampler_lowered_simd_width(devinfo, inst); /* On gfx12 parameters are fixed to 16-bit values and therefore they all diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 67c38603aa3..1ea0359ba44 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -8303,12 +8303,30 @@ fs_nir_emit_texture(nir_to_brw_state &ntb, case nir_texop_lod: opcode = SHADER_OPCODE_LOD_LOGICAL; break; - case nir_texop_tg4: - if (srcs[TEX_LOGICAL_SRC_TG4_OFFSET].file != BAD_FILE) + case nir_texop_tg4: { + if (srcs[TEX_LOGICAL_SRC_TG4_OFFSET].file != BAD_FILE) { opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL; - else + } else { opcode = SHADER_OPCODE_TG4_LOGICAL; + if (devinfo->ver >= 20) { + /* If SPV_AMD_texture_gather_bias_lod extension is enabled, all + * texture gather functions (ie. the ones which do not take the + * extra bias argument and the ones that do) fetch texels from + * implicit LOD in fragment shader stage. In all other shader + * stages, base level is used instead. + */ + if (instr->is_gather_implicit_lod) + opcode = SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL; + + if (got_bias) + opcode = SHADER_OPCODE_TG4_BIAS_LOGICAL; + + if (got_lod) + opcode = SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL; + } + } break; + } case nir_texop_texture_samples: opcode = SHADER_OPCODE_SAMPLEINFO_LOGICAL; break; diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index d50e63bfdb1..f6403df7c5b 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -945,6 +945,9 @@ namespace { case SHADER_OPCODE_LOD: case SHADER_OPCODE_GET_BUFFER_SIZE: case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 5415566ebeb..0db64708f1c 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -905,7 +905,21 @@ sampler_msg_type(const intel_device_info *devinfo, assert(devinfo->ver >= 7); return shadow_compare ? GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C : GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; - case SHADER_OPCODE_SAMPLEINFO: + case SHADER_OPCODE_TG4_BIAS: + assert(!has_min_lod); + assert(devinfo->ver >= 20); + return XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B; + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + assert(!has_min_lod); + assert(devinfo->ver >= 20); + return shadow_compare ? XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C : + XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L; + case SHADER_OPCODE_TG4_IMPLICIT_LOD: + assert(!has_min_lod); + assert(devinfo->ver >= 20); + return shadow_compare ? XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C : + XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I; + case SHADER_OPCODE_SAMPLEINFO: assert(!has_min_lod); return GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; default: @@ -962,6 +976,9 @@ shader_opcode_needs_header(opcode op) switch (op) { case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + case SHADER_OPCODE_TG4_IMPLICIT_LOD: case SHADER_OPCODE_SAMPLEINFO: return true; default: @@ -1145,6 +1162,8 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, /* Set up the LOD info */ switch (op) { case FS_OPCODE_TXB: + case SHADER_OPCODE_TG4_BIAS: + case SHADER_OPCODE_TG4_EXPLICIT_LOD: case SHADER_OPCODE_TXL: bld.MOV(sources[length], lod); length++; @@ -3183,6 +3202,21 @@ brw_fs_lower_logical_sends(fs_visitor &s) lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4); break; + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + assert(devinfo->ver >= 20); + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_BIAS); + break; + + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + assert(devinfo->ver >= 20); + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_EXPLICIT_LOD); + break; + + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: + assert(devinfo->ver >= 20); + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_IMPLICIT_LOD); + break; + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET); break; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 7b38b6f4235..21318f2c667 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -270,6 +270,18 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "tg4_offset"; case SHADER_OPCODE_TG4_OFFSET_LOGICAL: return "tg4_offset_logical"; + case SHADER_OPCODE_TG4_BIAS: + return "tg4_b"; + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + return "tg4_b_logical"; + case SHADER_OPCODE_TG4_EXPLICIT_LOD: + return "tg4_l"; + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + return "tg4_l_logical"; + case SHADER_OPCODE_TG4_IMPLICIT_LOD: + return "tg4_i"; + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: + return "tg4_i_logical"; case SHADER_OPCODE_SAMPLEINFO: return "sampleinfo"; case SHADER_OPCODE_SAMPLEINFO_LOGICAL: