From 73920b7e2f0ffff436fbb513ddbb70f75488eda9 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Fri, 19 Aug 2022 12:40:20 -0700 Subject: [PATCH] intel/compiler: Use FS thread payload only for FS Move the setup into the FS thread payload constructor. Consolidate payload setup for that in brw_fs_thread_payload.cpp file. Reviewed-by: Francisco Jerez Part-of: --- src/intel/compiler/brw_fs.cpp | 115 ++------ src/intel/compiler/brw_fs.h | 19 +- src/intel/compiler/brw_fs_thread_payload.cpp | 264 ++++++++++++++++++ src/intel/compiler/brw_fs_visitor.cpp | 18 +- .../compiler/brw_lower_logical_sends.cpp | 2 +- src/intel/compiler/brw_mesh.cpp | 4 +- src/intel/compiler/brw_shader.cpp | 2 +- src/intel/compiler/brw_vec4.cpp | 2 +- src/intel/compiler/brw_vec4_gs_visitor.cpp | 2 +- src/intel/compiler/brw_vec4_tcs.cpp | 2 +- src/intel/compiler/brw_wm_iz.cpp | 169 ----------- src/intel/compiler/meson.build | 2 +- 12 files changed, 322 insertions(+), 279 deletions(-) create mode 100644 src/intel/compiler/brw_fs_thread_payload.cpp delete mode 100644 src/intel/compiler/brw_wm_iz.cpp diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index d99044ab15a..10eff271c08 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1299,7 +1299,7 @@ fs_visitor::emit_samplepos_setup() * the positions using vstride=16, width=8, hstride=2. */ const fs_reg sample_pos_reg = - fetch_payload_reg(abld, payload.sample_pos_reg, BRW_REGISTER_TYPE_W); + fetch_payload_reg(abld, fs_payload().sample_pos_reg, BRW_REGISTER_TYPE_W); for (unsigned i = 0; i < 2; i++) { fs_reg tmp_d = bld.vgrf(BRW_REGISTER_TYPE_D); @@ -1430,7 +1430,7 @@ fs_visitor::emit_samplemaskin_setup() assert(!wm_prog_data->per_coarse_pixel_dispatch); fs_reg coverage_mask = - fetch_payload_reg(bld, payload.sample_mask_in_reg, BRW_REGISTER_TYPE_D); + fetch_payload_reg(bld, fs_payload().sample_mask_in_reg, BRW_REGISTER_TYPE_D); if (wm_prog_data->persample_dispatch) { /* gl_SampleMaskIn[] comes from two sources: the input coverage mask, @@ -1616,7 +1616,7 @@ fs_visitor::assign_curb_setup() fs_reg(), /* payload2 */ }; - fs_reg dest = retype(brw_vec8_grf(payload.num_regs + i, 0), + fs_reg dest = retype(brw_vec8_grf(payload().num_regs + i, 0), BRW_REGISTER_TYPE_UD); fs_inst *send = ubld.emit(SHADER_OPCODE_SEND, dest, srcs, 4); @@ -1667,7 +1667,7 @@ fs_visitor::assign_curb_setup() assert(constant_nr / 8 < 64); used |= BITFIELD64_BIT(constant_nr / 8); - struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs + + struct brw_reg brw_reg = brw_vec1_grf(payload().num_regs + constant_nr / 8, constant_nr % 8); brw_reg.abs = inst->src[i].abs; @@ -1688,8 +1688,8 @@ fs_visitor::assign_curb_setup() /* push_reg_mask_param is in 32-bit units */ unsigned mask_param = stage_prog_data->push_reg_mask_param; - struct brw_reg mask = brw_vec1_grf(payload.num_regs + mask_param / 8, - mask_param % 8); + struct brw_reg mask = brw_vec1_grf(payload().num_regs + mask_param / 8, + mask_param % 8); fs_reg b32; for (unsigned i = 0; i < 64; i++) { @@ -1708,7 +1708,7 @@ fs_visitor::assign_curb_setup() if (want_zero & BITFIELD64_BIT(i)) { assert(i < prog_data->curb_read_length); struct brw_reg push_reg = - retype(brw_vec8_grf(payload.num_regs + i, 0), + retype(brw_vec8_grf(payload().num_regs + i, 0), BRW_REGISTER_TYPE_D); ubld.AND(push_reg, push_reg, component(b32, i % 16)); @@ -1719,7 +1719,7 @@ fs_visitor::assign_curb_setup() } /* This may be updated in assign_urb_setup or assign_vs_urb_setup. */ - this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length; + this->first_non_payload_grf = payload().num_regs + prog_data->curb_read_length; } /* @@ -1956,7 +1956,7 @@ fs_visitor::assign_urb_setup() assert(stage == MESA_SHADER_FRAGMENT); struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); - int urb_start = payload.num_regs + prog_data->base.curb_read_length; + int urb_start = payload().num_regs + prog_data->base.curb_read_length; /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. @@ -2000,7 +2000,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst) { for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == ATTR) { - int grf = payload.num_regs + + int grf = payload().num_regs + prog_data->curb_read_length + inst->src[i].nr + inst->src[i].offset / REG_SIZE; @@ -5852,78 +5852,11 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con fprintf(file, "\n"); } -void -fs_visitor::setup_fs_payload_gfx6() -{ - assert(stage == MESA_SHADER_FRAGMENT); - struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); - const unsigned payload_width = MIN2(16, dispatch_width); - assert(dispatch_width % payload_width == 0); - assert(devinfo->ver >= 6); - - /* R0: PS thread payload header. */ - payload.num_regs++; - - for (unsigned j = 0; j < dispatch_width / payload_width; j++) { - /* R1: masks, pixel X/Y coordinates. */ - payload.subspan_coord_reg[j] = payload.num_regs++; - } - - for (unsigned j = 0; j < dispatch_width / payload_width; j++) { - /* R3-26: barycentric interpolation coordinates. These appear in the - * same order that they appear in the brw_barycentric_mode enum. Each - * set of coordinates occupies 2 registers if dispatch width == 8 and 4 - * registers if dispatch width == 16. Coordinates only appear if they - * were enabled using the "Barycentric Interpolation Mode" bits in - * WM_STATE. - */ - for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { - if (prog_data->barycentric_interp_modes & (1 << i)) { - payload.barycentric_coord_reg[i][j] = payload.num_regs; - payload.num_regs += payload_width / 4; - } - } - - /* R27-28: interpolated depth if uses source depth */ - if (prog_data->uses_src_depth) { - payload.source_depth_reg[j] = payload.num_regs; - payload.num_regs += payload_width / 8; - } - - /* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */ - if (prog_data->uses_src_w) { - payload.source_w_reg[j] = payload.num_regs; - payload.num_regs += payload_width / 8; - } - - /* R31: MSAA position offsets. */ - if (prog_data->uses_pos_offset) { - payload.sample_pos_reg[j] = payload.num_regs; - payload.num_regs++; - } - - /* R32-33: MSAA input coverage mask */ - if (prog_data->uses_sample_mask) { - assert(devinfo->ver >= 7); - payload.sample_mask_in_reg[j] = payload.num_regs; - payload.num_regs += payload_width / 8; - } - - /* R66: Source Depth and/or W Attribute Vertex Deltas */ - if (prog_data->uses_depth_w_coefficients) { - payload.depth_w_coef_reg[j] = payload.num_regs; - payload.num_regs++; - } - } - - if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - source_depth_to_render_target = true; - } -} - void fs_visitor::setup_vs_payload() { + thread_payload &payload = this->payload(); + /* R0: thread header, R1: urb handles */ payload.num_regs = 2; } @@ -5932,6 +5865,7 @@ void fs_visitor::setup_gs_payload() { assert(stage == MESA_SHADER_GEOMETRY); + thread_payload &payload = this->payload(); struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); @@ -5974,6 +5908,8 @@ fs_visitor::setup_gs_payload() void fs_visitor::setup_cs_payload() { + thread_payload &payload = this->payload(); + assert(devinfo->ver >= 7); /* TODO: Fill out uses_btd_stack_ids automatically */ payload.num_regs = 1 + brw_cs_prog_data(prog_data)->uses_btd_stack_ids; @@ -6687,6 +6623,7 @@ bool fs_visitor::run_tcs() { assert(stage == MESA_SHADER_TESS_CTRL); + thread_payload &payload = this->payload(); struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); @@ -6761,7 +6698,7 @@ fs_visitor::run_tes() assert(stage == MESA_SHADER_TESS_EVAL); /* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */ - payload.num_regs = 5; + payload().num_regs = 5; emit_nir_code(); @@ -6863,10 +6800,8 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) assert(stage == MESA_SHADER_FRAGMENT); - if (devinfo->ver >= 6) - setup_fs_payload_gfx6(); - else - setup_fs_payload_gfx4(); + payload_ = new fs_thread_payload(*this, source_depth_to_render_target, + runtime_check_aads_emit); if (0) { emit_dummy_fs(); @@ -6971,7 +6906,7 @@ fs_visitor::run_bs(bool allow_spilling) assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE); /* R0: thread header, R1: stack IDs, R2: argument addresses */ - payload.num_regs = 3; + payload().num_regs = 3; emit_nir_code(); @@ -7017,7 +6952,7 @@ fs_visitor::run_task(bool allow_spilling) * Inline parameter is optional but always present since we use it to pass * the address to descriptors. */ - payload.num_regs = dispatch_width == 32 ? 4 : 3; + payload().num_regs = dispatch_width == 32 ? 4 : 3; emit_nir_code(); @@ -7064,7 +6999,7 @@ fs_visitor::run_mesh(bool allow_spilling) * Inline parameter is optional but always present since we use it to pass * the address to descriptors. */ - payload.num_regs = dispatch_width == 32 ? 4 : 3; + payload().num_regs = dispatch_width == 32 ? 4 : 3; emit_nir_code(); @@ -7437,7 +7372,7 @@ brw_compile_fs(const struct brw_compiler *compiler, return NULL; } else if (!INTEL_DEBUG(DEBUG_NO8)) { simd8_cfg = v8->cfg; - prog_data->base.dispatch_grf_start_reg = v8->payload.num_regs; + prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs; prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used); const performance &perf = v8->performance_analysis.require(); throughput = MAX2(throughput, perf.throughput); @@ -7481,7 +7416,7 @@ brw_compile_fs(const struct brw_compiler *compiler, v16->fail_msg); } else { simd16_cfg = v16->cfg; - prog_data->dispatch_grf_start_reg_16 = v16->payload.num_regs; + prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs; prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used); const performance &perf = v16->performance_analysis.require(); throughput = MAX2(throughput, perf.throughput); @@ -7514,7 +7449,7 @@ brw_compile_fs(const struct brw_compiler *compiler, "SIMD32 shader inefficient\n"); } else { simd32_cfg = v32->cfg; - prog_data->dispatch_grf_start_reg_32 = v32->payload.num_regs; + prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs; prog_data->reg_blocks_32 = brw_register_blocks(v32->grf_used); throughput = MAX2(throughput, perf.throughput); } diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index c22ce4566c9..728ecc81d4c 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -89,9 +89,15 @@ struct shader_stats { struct thread_payload { /** The number of thread payload registers the hardware will supply. */ uint8_t num_regs; + + virtual ~thread_payload() = default; }; struct fs_thread_payload : public thread_payload { + fs_thread_payload(const fs_visitor &v, + bool &source_depth_to_render_target, + bool &runtime_check_aads_emit); + uint8_t subspan_coord_reg[2]; uint8_t source_depth_reg[2]; uint8_t source_w_reg[2]; @@ -150,8 +156,6 @@ public: bool run_mesh(bool allow_spilling); void optimize(); void allocate_registers(bool allow_spilling); - void setup_fs_payload_gfx4(); - void setup_fs_payload_gfx6(); void setup_vs_payload(); void setup_gs_payload(); void setup_cs_payload(); @@ -411,7 +415,16 @@ public: bool failed; char *fail_msg; - fs_thread_payload payload; + thread_payload *payload_; + + thread_payload &payload() { + return *this->payload_; + } + + fs_thread_payload &fs_payload() { + assert(stage == MESA_SHADER_FRAGMENT); + return *static_cast(this->payload_); + }; bool source_depth_to_render_target; bool runtime_check_aads_emit; diff --git a/src/intel/compiler/brw_fs_thread_payload.cpp b/src/intel/compiler/brw_fs_thread_payload.cpp new file mode 100644 index 00000000000..f1429963e96 --- /dev/null +++ b/src/intel/compiler/brw_fs_thread_payload.cpp @@ -0,0 +1,264 @@ +/* + * Copyright © 2006-2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_fs.h" + +using namespace brw; + +static inline void +setup_fs_payload_gfx6(fs_thread_payload &payload, + const fs_visitor &v, + bool &source_depth_to_render_target) +{ + struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data); + + const unsigned payload_width = MIN2(16, v.dispatch_width); + assert(v.dispatch_width % payload_width == 0); + assert(v.devinfo->ver >= 6); + + payload.num_regs = 0; + + /* R0: PS thread payload header. */ + payload.num_regs++; + + for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) { + /* R1: masks, pixel X/Y coordinates. */ + payload.subspan_coord_reg[j] = payload.num_regs++; + } + + for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) { + /* R3-26: barycentric interpolation coordinates. These appear in the + * same order that they appear in the brw_barycentric_mode enum. Each + * set of coordinates occupies 2 registers if dispatch width == 8 and 4 + * registers if dispatch width == 16. Coordinates only appear if they + * were enabled using the "Barycentric Interpolation Mode" bits in + * WM_STATE. + */ + for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { + if (prog_data->barycentric_interp_modes & (1 << i)) { + payload.barycentric_coord_reg[i][j] = payload.num_regs; + payload.num_regs += payload_width / 4; + } + } + + /* R27-28: interpolated depth if uses source depth */ + if (prog_data->uses_src_depth) { + payload.source_depth_reg[j] = payload.num_regs; + payload.num_regs += payload_width / 8; + } + + /* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */ + if (prog_data->uses_src_w) { + payload.source_w_reg[j] = payload.num_regs; + payload.num_regs += payload_width / 8; + } + + /* R31: MSAA position offsets. */ + if (prog_data->uses_pos_offset) { + payload.sample_pos_reg[j] = payload.num_regs; + payload.num_regs++; + } + + /* R32-33: MSAA input coverage mask */ + if (prog_data->uses_sample_mask) { + assert(v.devinfo->ver >= 7); + payload.sample_mask_in_reg[j] = payload.num_regs; + payload.num_regs += payload_width / 8; + } + + /* R66: Source Depth and/or W Attribute Vertex Deltas */ + if (prog_data->uses_depth_w_coefficients) { + payload.depth_w_coef_reg[j] = payload.num_regs; + payload.num_regs++; + } + } + + if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + source_depth_to_render_target = true; + } +} + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +static const struct { + GLuint mode:2; + GLuint sd_present:1; + GLuint sd_to_rt:1; + GLuint dd_present:1; + GLuint ds_present:1; +} wm_iz_table[BRW_WM_IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +/** + * \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES + * \param lookup bitmask of BRW_WM_IZ_* flags + */ +static inline void +setup_fs_payload_gfx4(fs_thread_payload &payload, + const fs_visitor &v, + bool &source_depth_to_render_target, + bool &runtime_check_aads_emit) +{ + assert(v.dispatch_width <= 16); + + struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data); + brw_wm_prog_key *key = (brw_wm_prog_key *) v.key; + + GLuint reg = 1; + bool kill_stats_promoted_workaround = false; + int lookup = key->iz_lookup; + + assert(lookup < BRW_WM_IZ_BIT_MAX); + + /* Crazy workaround in the windowizer, which we need to track in + * our register allocation and render target writes. See the "If + * statistics are enabled..." paragraph of 11.5.3.2: Early Depth + * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. + */ + if (key->stats_wm && + (lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) && + wm_iz_table[lookup].mode == P) { + kill_stats_promoted_workaround = true; + } + + payload.subspan_coord_reg[0] = reg++; + + if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth || + kill_stats_promoted_workaround) { + payload.source_depth_reg[0] = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) + source_depth_to_render_target = true; + + if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) { + payload.aa_dest_stencil_reg[0] = reg; + runtime_check_aads_emit = + !wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES; + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + payload.dest_depth_reg[0] = reg; + reg+=2; + } + + payload.num_regs = reg; +} + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +fs_thread_payload::fs_thread_payload(const fs_visitor &v, + bool &source_depth_to_render_target, + bool &runtime_check_aads_emit) + : subspan_coord_reg(), + source_depth_reg(), + source_w_reg(), + aa_dest_stencil_reg(), + dest_depth_reg(), + sample_pos_reg(), + sample_mask_in_reg(), + depth_w_coef_reg(), + barycentric_coord_reg(), + local_invocation_id_reg() +{ + if (v.devinfo->ver >= 6) + setup_fs_payload_gfx6(*this, v, source_depth_to_render_target); + else + setup_fs_payload_gfx4(*this, v, source_depth_to_render_target, + runtime_check_aads_emit); +} diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index d5fe7df9211..12e25bc8919 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -194,7 +194,7 @@ fs_visitor::emit_interpolation_setup_gfx4() abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart); } - this->pixel_z = fetch_payload_reg(bld, payload.source_depth_reg); + this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg); /* The SF program automatically handles doing the perspective correction or * not based on wm_prog_data::interp_mode[] so we can use the same pixel @@ -469,7 +469,7 @@ fs_visitor::emit_interpolation_setup_gfx6() * pixels locations, here we recompute the Z value with 2 coefficients * in X & Y axis. */ - fs_reg coef_payload = fetch_payload_reg(abld, payload.depth_w_coef_reg, BRW_REGISTER_TYPE_F); + fs_reg coef_payload = fetch_payload_reg(abld, fs_payload().depth_w_coef_reg, BRW_REGISTER_TYPE_F); const fs_reg x_start = brw_vec1_grf(coef_payload.nr, 2); const fs_reg y_start = brw_vec1_grf(coef_payload.nr, 6); const fs_reg z_cx = brw_vec1_grf(coef_payload.nr, 1); @@ -507,19 +507,19 @@ fs_visitor::emit_interpolation_setup_gfx6() if (wm_prog_data->uses_src_depth) { assert(!wm_prog_data->uses_depth_w_coefficients); - this->pixel_z = fetch_payload_reg(bld, payload.source_depth_reg); + this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg); } if (wm_prog_data->uses_src_w) { abld = bld.annotate("compute pos.w"); - this->pixel_w = fetch_payload_reg(abld, payload.source_w_reg); + this->pixel_w = fetch_payload_reg(abld, fs_payload().source_w_reg); this->wpos_w = vgrf(glsl_type::float_type); abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); } for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { this->delta_xy[i] = fetch_barycentric_reg( - bld, payload.barycentric_coord_reg[i]); + bld, fs_payload().barycentric_coord_reg[i]); } uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes & @@ -622,7 +622,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); /* Hand over gl_FragDepth or the payload depth. */ - const fs_reg dst_depth = fetch_payload_reg(bld, payload.dest_depth_reg); + const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg); fs_reg src_depth, src_stencil; if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { @@ -636,7 +636,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, * explicitly the pass-through case. */ assert(devinfo->ver <= 5); - src_depth = fetch_payload_reg(bld, payload.source_depth_reg); + src_depth = fetch_payload_reg(bld, fs_payload().source_depth_reg); } if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) @@ -1214,7 +1214,6 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, init(); } - void fs_visitor::init() { @@ -1233,7 +1232,7 @@ fs_visitor::init() this->nir_ssa_values = NULL; this->nir_system_values = NULL; - memset(&this->payload, 0, sizeof(this->payload)); + this->payload_ = new thread_payload(); this->source_depth_to_render_target = false; this->runtime_check_aads_emit = false; this->first_non_payload_grf = 0; @@ -1254,4 +1253,5 @@ fs_visitor::init() fs_visitor::~fs_visitor() { + delete this->payload_; } diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 17bb2cedb38..f469ee01865 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -2681,7 +2681,7 @@ fs_visitor::lower_logical_sends() lower_fb_write_logical_send(ibld, inst, brw_wm_prog_data(prog_data), (const brw_wm_prog_key *)key, - payload); + fs_payload()); break; case FS_OPCODE_FB_READ_LOGICAL: diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 3156938b6ab..1ac7cb3965a 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -1211,9 +1211,9 @@ fs_visitor::nir_emit_task_mesh_intrinsic(const fs_builder &bld, switch (instr->intrinsic) { case nir_intrinsic_load_mesh_inline_data_intel: - assert(payload.num_regs == 3 || payload.num_regs == 4); + assert(payload().num_regs == 3 || payload().num_regs == 4); /* Inline Parameter is the last element of the payload. */ - bld.MOV(dest, retype(brw_vec1_grf(payload.num_regs - 1, + bld.MOV(dest, retype(brw_vec1_grf(payload().num_regs - 1, nir_intrinsic_align_offset(instr)), dest.type)); break; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 8f78b5629cd..e0521d1647f 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1420,7 +1420,7 @@ brw_compile_tes(const struct brw_compiler *compiler, return NULL; } - prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; + prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs; prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, params->log_data, mem_ctx, diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 884b3c8cc52..6d767254221 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2642,7 +2642,7 @@ brw_compile_vs(const struct brw_compiler *compiler, return NULL; } - prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; + prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs; fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base.base, v.runtime_check_aads_emit, diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index 0c69c214887..ac998586125 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -823,7 +823,7 @@ brw_compile_gs(const struct brw_compiler *compiler, debug_enabled); if (v.run_gs()) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; - prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; + prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs; fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base.base, false, MESA_SHADER_GEOMETRY); diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index e4a5d1e66bc..ecb4775ab18 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -453,7 +453,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, return NULL; } - prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; + prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs; fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base.base, false, MESA_SHADER_TESS_CTRL); diff --git a/src/intel/compiler/brw_wm_iz.cpp b/src/intel/compiler/brw_wm_iz.cpp deleted file mode 100644 index d4ef05a4a8d..00000000000 --- a/src/intel/compiler/brw_wm_iz.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_fs.h" - - -#undef P /* prompted depth */ -#undef C /* computed */ -#undef N /* non-promoted? */ - -#define P 0 -#define C 1 -#define N 2 - -static const struct { - GLuint mode:2; - GLuint sd_present:1; - GLuint sd_to_rt:1; - GLuint dd_present:1; - GLuint ds_present:1; -} wm_iz_table[BRW_WM_IZ_BIT_MAX] = -{ - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { N, 0, 1, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 0 }, - { C, 0, 1, 1, 0 }, - { C, 0, 1, 1, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 1 }, - { N, 0, 1, 0, 1 }, - { N, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { N, 1, 1, 0, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { C, 0, 0, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { C, 0, 1, 0, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { P, 0, 0, 0, 0 }, - { C, 1, 1, 1, 1 }, - { C, 0, 1, 1, 1 }, - { C, 0, 1, 1, 1 } -}; - -/** - * \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES - * \param lookup bitmask of BRW_WM_IZ_* flags - */ -void fs_visitor::setup_fs_payload_gfx4() -{ - assert(stage == MESA_SHADER_FRAGMENT); - assert(dispatch_width <= 16); - struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - GLuint reg = 1; - bool kill_stats_promoted_workaround = false; - int lookup = key->iz_lookup; - - assert(lookup < BRW_WM_IZ_BIT_MAX); - - /* Crazy workaround in the windowizer, which we need to track in - * our register allocation and render target writes. See the "If - * statistics are enabled..." paragraph of 11.5.3.2: Early Depth - * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. - */ - if (key->stats_wm && - (lookup & BRW_WM_IZ_PS_KILL_ALPHATEST_BIT) && - wm_iz_table[lookup].mode == P) { - kill_stats_promoted_workaround = true; - } - - payload.subspan_coord_reg[0] = reg++; - - if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth || - kill_stats_promoted_workaround) { - payload.source_depth_reg[0] = reg; - reg += 2; - } - - if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) - source_depth_to_render_target = true; - - if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) { - payload.aa_dest_stencil_reg[0] = reg; - runtime_check_aads_emit = - !wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES; - reg++; - } - - if (wm_iz_table[lookup].dd_present) { - payload.dest_depth_reg[0] = reg; - reg+=2; - } - - payload.num_regs = reg; -} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 5fc53b82c73..bc01df07ecc 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -65,6 +65,7 @@ libintel_compiler_files = files( 'brw_fs_saturate_propagation.cpp', 'brw_fs_scoreboard.cpp', 'brw_fs_sel_peephole.cpp', + 'brw_fs_thread_payload.cpp', 'brw_fs_validate.cpp', 'brw_fs_visitor.cpp', 'brw_inst.h', @@ -139,7 +140,6 @@ libintel_compiler_files = files( 'brw_vec4_vs_visitor.cpp', 'brw_vec4_vs.h', 'brw_vue_map.c', - 'brw_wm_iz.cpp', 'gfx6_gs_visitor.cpp', 'gfx6_gs_visitor.h', )