diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 81ec18ddfa4..78f7d40a84e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4048,6 +4048,16 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, header_size = 1; sources[0] = fs_reg(); length++; + + /* If we're requesting fewer than four channels worth of response, + * and we have an explicit header, we need to set up the sampler + * writemask. It's reversed from normal: 1 means "don't write". + */ + if (inst->regs_written != 4 * reg_width) { + assert((inst->regs_written % reg_width) == 0); + unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf; + inst->offset |= mask << 12; + } } if (shadow_c.file != BAD_FILE) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index bcd2e3e9b12..a5c3297e5a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -213,7 +213,8 @@ public: uint32_t surface, fs_reg surface_reg, uint32_t sampler, - fs_reg sampler_reg); + fs_reg sampler_reg, + unsigned return_channels); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, const fs_reg &sampler); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 43d37453425..3d7013f1bdd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3224,14 +3224,25 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) unreachable("unknown texture opcode"); } + unsigned num_components = nir_tex_instr_dest_size(instr); + + if (instr->dest.is_ssa) { + uint8_t write_mask = nir_ssa_def_components_read(&instr->dest.ssa); + assert(write_mask != 0); /* dead code should have been eliminated */ + num_components = _mesa_fls(write_mask); + } + + const bool can_reduce_return_length = devinfo->gen >= 9 && + instr->op != nir_texop_tg4 && instr->op != nir_texop_query_levels; + emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, is_cube_array, - texture, texture_reg, sampler, sampler_reg); + texture, texture_reg, sampler, sampler_reg, + can_reduce_return_length ? num_components : 4); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; - unsigned num_components = nir_tex_instr_dest_size(instr); emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), dest, this->result), (1 << num_components) - 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index daabf708b06..da29f0be8b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -115,7 +115,8 @@ fs_visitor::emit_texture(ir_texture_opcode op, uint32_t surface, fs_reg surface_reg, uint32_t sampler, - fs_reg sampler_reg) + fs_reg sampler_reg, + unsigned return_channels) { fs_inst *inst = NULL; @@ -204,7 +205,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, } inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); - inst->regs_written = 4 * dispatch_width / 8; + inst->regs_written = return_channels * dispatch_width / 8; if (shadow_c.file != BAD_FILE) inst->shadow_compare = true;