diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 57ab81e0552..03eb7e1a7d7 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -479,6 +479,7 @@ struct brw_wm_prog_key { bool force_dual_color_blend:1; bool coherent_fb_fetch:1; bool ignore_sample_mask_out:1; + bool coarse_pixel:1; uint8_t color_outputs_valid; uint64_t input_slots_valid; @@ -852,6 +853,11 @@ struct brw_wm_prog_data { bool contains_flat_varying; bool contains_noperspective_varying; + /** + * Shader is ran at the coarse pixel shading dispatch rate (3DSTATE_CPS). + */ + bool per_coarse_pixel_dispatch; + /** * Mask of which interpolation modes are required by the fragment shader. * Used in hardware setup on gfx6+. diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 6e3312ed5d7..4dbaeabce7e 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -1972,6 +1972,9 @@ brw_disassemble_inst(FILE *file, const struct intel_device_info *devinfo, string(file, " Hi"); if (brw_fb_write_desc_last_render_target(devinfo, imm_desc)) string(file, " LastRT"); + if (devinfo->ver >= 10 && + brw_fb_write_desc_coarse_write(devinfo, imm_desc)) + string(file, " CoarseWrite"); if (devinfo->ver < 7 && brw_fb_write_desc_write_commit(devinfo, imm_desc)) string(file, " WriteCommit"); diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 4d2197f1885..d84af2b73b4 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1078,16 +1078,20 @@ static inline uint32_t brw_fb_write_desc(const struct intel_device_info *devinfo, unsigned binding_table_index, unsigned msg_control, - bool last_render_target) + bool last_render_target, + bool coarse_write) { const unsigned msg_type = devinfo->ver >= 6 ? GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE : BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + assert(devinfo->ver >= 10 || !coarse_write); + if (devinfo->ver >= 6) { return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) | - SET_BITS(last_render_target, 12, 12); + SET_BITS(last_render_target, 12, 12) | + SET_BITS(coarse_write, 18, 18); } else { return (SET_BITS(binding_table_index, 7, 0) | SET_BITS(msg_control, 11, 8) | @@ -1137,6 +1141,14 @@ brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo, return GET_BITS(desc, 15, 15); } +static inline bool +brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, + uint32_t desc) +{ + assert(devinfo->ver >= 10); + return GET_BITS(desc, 18, 18); +} + static inline uint32_t brw_mdc_sm2(unsigned exec_size) { @@ -1202,12 +1214,15 @@ static inline uint32_t brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, unsigned msg_type, bool noperspective, + bool coarse_pixel_rate, unsigned simd_mode, unsigned slot_group) { + assert(devinfo->ver >= 10 || !coarse_pixel_rate); return (SET_BITS(slot_group, 11, 11) | SET_BITS(msg_type, 13, 12) | SET_BITS(!!noperspective, 14, 14) | + SET_BITS(coarse_pixel_rate, 15, 15) | SET_BITS(simd_mode, 16, 16)); } @@ -1453,6 +1468,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, bool noperspective, + bool coarse_pixel_rate, unsigned mode, struct brw_reg data, unsigned msg_length, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index fbe6c31168a..138c48ed251 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2463,7 +2463,8 @@ brw_fb_WRITE(struct brw_codegen *p, brw_message_desc(devinfo, msg_length, response_length, header_present) | brw_fb_write_desc(devinfo, binding_table_index, msg_control, - last_render_target)); + last_render_target, + false /* coarse_write */)); brw_inst_set_eot(devinfo, insn, eot); return insn; @@ -3247,6 +3248,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, bool noperspective, + bool coarse_pixel_rate, unsigned mode, struct brw_reg data, unsigned msg_length, @@ -3258,8 +3260,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, const unsigned simd_mode = (exec_size == BRW_EXECUTE_16); const unsigned desc = brw_message_desc(devinfo, msg_length, response_length, false) | - brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode, - slot_group); + brw_pixel_interp_desc(devinfo, mode, noperspective, coarse_pixel_rate, + simd_mode, slot_group); /* brw_send_indirect_message will automatically use a direct send message * if data is actually immediate. diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 0a4dfa90112..5e9a4a5a330 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4663,7 +4663,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, inst->desc = (inst->group / 16) << 11 | /* rt slot group */ - brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt); + brw_fb_write_desc(devinfo, inst->target, msg_ctl, inst->last_rt, + prog_data->per_coarse_pixel_dispatch); uint32_t ex_desc = 0; if (devinfo->ver >= 11) { @@ -5340,7 +5341,7 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, simd_mode, 0 /* return_format unused on gfx7+ */); inst->src[0] = brw_imm_ud(0); - inst->src[1] = brw_imm_ud(0); /* ex_desc */ + inst->src[1] = brw_imm_ud(0); } else if (surface_handle.file != BAD_FILE) { /* Bindless surface */ assert(devinfo->ver >= 9); @@ -5398,6 +5399,8 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, inst->src[1] = brw_imm_ud(0); /* ex_desc */ } + inst->ex_desc = 0; + inst->src[2] = src_payload; inst->resize_sources(3); @@ -9103,6 +9106,13 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader, prog_data->barycentric_interp_modes = brw_compute_barycentric_interp_modes(devinfo, shader); + prog_data->per_coarse_pixel_dispatch = + key->coarse_pixel && + !prog_data->persample_dispatch && + !prog_data->uses_sample_mask && + (prog_data->computed_depth_mode == BRW_PSCDEPTH_OFF) && + !prog_data->computed_stencil; + calculate_urb_setup(devinfo, key, prog_data, shader); brw_compute_flat_inputs(prog_data, shader); } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 89c5147ebe2..cdd361e138c 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1760,11 +1760,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, assert(msg_data.type == BRW_REGISTER_TYPE_UD); assert(inst->size_written % REG_SIZE == 0); + struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); + brw_pixel_interpolator_query(p, retype(dst, BRW_REGISTER_TYPE_UW), /* If we don't have a payload, what we send doesn't matter */ has_payload ? src : brw_vec8_grf(0, 0), inst->pi_noperspective, + prog_data->per_coarse_pixel_dispatch, msg_type, msg_data, has_payload ? 2 * inst->exec_size / 8 : 1,