brw: Add brw_tex_inst

Incorporate some "control sources" directly into the instruction.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
Caio Oliveira 2025-08-21 00:02:14 -07:00 committed by Marge Bot
parent 0fcce2722f
commit f0f1e63f99
11 changed files with 188 additions and 175 deletions

View file

@ -630,12 +630,6 @@ enum tex_logical_srcs {
TEX_LOGICAL_SRC_SAMPLER_HANDLE,
/** Texel offset for gathers */
TEX_LOGICAL_SRC_TG4_OFFSET,
/** REQUIRED: Number of coordinate components (as UD immediate) */
TEX_LOGICAL_SRC_COORD_COMPONENTS,
/** REQUIRED: Number of derivative components (as UD immediate) */
TEX_LOGICAL_SRC_GRAD_COMPONENTS,
/** REQUIRED: request residency (as UD immediate) */
TEX_LOGICAL_SRC_RESIDENCY,
TEX_LOGICAL_NUM_SRCS,
};

View file

@ -3632,17 +3632,15 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon
srcs[TEX_LOGICAL_SRC_SURFACE] = texture;
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle;
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0);
brw_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
ARRAY_SIZE(srcs));
brw_tex_inst *tex = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
ARRAY_SIZE(srcs))->as_tex();
tex->coord_components = components;
/* We only care about one or two regs of response, but the sampler always
* writes 4/8.
*/
inst->size_written = 4 * dest.component_size(inst->exec_size);
tex->size_written = 4 * dest.component_size(tex->exec_size);
return dest;
}
@ -3713,14 +3711,12 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const b
srcs[TEX_LOGICAL_SRC_MCS] = mcs;
srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(target);
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(3);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);
brw_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
tex->size_written = 4 * tex->dst.component_size(tex->exec_size);
tex->coord_components = 3;
return inst;
return tex;
}
/**
@ -6021,9 +6017,6 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
else
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = image;
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0);
/* Since the image size is always uniform, we can just emit a SIMD8
* query instruction and splat the result out.
@ -6031,8 +6024,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
const brw_builder ubld = bld.scalar_group();
brw_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4);
brw_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
tmp, srcs, ARRAY_SIZE(srcs));
brw_tex_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
tmp, srcs, ARRAY_SIZE(srcs))->as_tex();
inst->size_written = 4 * REG_SIZE * reg_unit(devinfo);
for (unsigned c = 0; c < instr->def.num_components; ++c) {
@ -7430,8 +7423,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
*/
assert(!instr->is_sparse || srcs[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);
srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(instr->is_sparse);
int lod_components = 0;
/* The hardware requires a LOD for buffer textures */
@ -7612,9 +7603,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]);
}
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
enum opcode opcode;
switch (instr->op) {
case nir_texop_tex:
@ -7741,9 +7729,12 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
brw_allocate_vgrf_units(*bld.shader, total_regs * reg_unit(devinfo)),
dst_type);
brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
inst->offset = header_bits;
inst->size_written = total_regs * grf_size;
brw_tex_inst *tex = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
tex->offset = header_bits;
tex->size_written = total_regs * grf_size;
tex->residency = instr->is_sparse;
tex->coord_components = instr->coord_components;
tex->grad_components = lod_components;
/* Wa_14012688258:
*
@ -7758,7 +7749,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
assert(instr->coord_components >= 3u);
/* See opt_zero_samples(). */
inst->keep_payload_trailing_zeros = true;
tex->keep_payload_trailing_zeros = true;
}
/* With half-floats returns, the stride into a GRF allocation for each
@ -7781,7 +7772,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb,
if (instr->op != nir_texop_query_levels && !instr->is_sparse &&
!non_aligned_component_stride) {
/* In most cases we can write directly to the result. */
inst->dst = nir_def_reg;
tex->dst = nir_def_reg;
} else {
/* In other cases, we have to reorganize the sampler message's results
* a bit to match the NIR intrinsic's expectations.

View file

@ -14,10 +14,13 @@
static inline unsigned
brw_inst_kind_size(brw_inst_kind kind)
{
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_tex_inst));
/* TODO: Temporarily here to ensure all instructions can be converted to
* SEND. Once all new kinds are added, change so that BASE allocate only
* sizeof(brw_inst).
*/
return sizeof(brw_send_inst);
}
@ -154,6 +157,27 @@ brw_inst_kind_for_opcode(enum opcode opcode)
case SHADER_OPCODE_INTERLOCK:
return BRW_KIND_SEND;
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL:
case SHADER_OPCODE_TXF_LOGICAL:
case SHADER_OPCODE_TXL_LOGICAL:
case SHADER_OPCODE_TXS_LOGICAL:
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
case SHADER_OPCODE_TXF_MCS_LOGICAL:
case SHADER_OPCODE_LOD_LOGICAL:
case SHADER_OPCODE_TG4_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
return BRW_KIND_TEX;
default:
return BRW_KIND_BASE;
}
@ -436,17 +460,15 @@ brw_inst::components_read(unsigned i) const
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM &&
src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM &&
src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
case SHADER_OPCODE_SAMPLEINFO_LOGICAL: {
const brw_tex_inst *tex = as_tex();
/* Texture coordinates. */
if (i == TEX_LOGICAL_SRC_COORDINATE)
return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
return tex->coord_components;
/* Texture derivatives. */
else if ((i == TEX_LOGICAL_SRC_LOD || i == TEX_LOGICAL_SRC_LOD2) &&
opcode == SHADER_OPCODE_TXD_LOGICAL)
return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
return tex->grad_components;
/* Texture offset. */
else if (i == TEX_LOGICAL_SRC_TG4_OFFSET)
return 2;
@ -460,6 +482,7 @@ brw_inst::components_read(unsigned i) const
return 1;
} else
return 1;
}
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
if (i == MEMORY_LOGICAL_DATA0)
@ -663,27 +686,8 @@ brw_inst::flags_written(const intel_device_info *devinfo) const
bool
brw_inst::has_sampler_residency() const
{
switch (opcode) {
case SHADER_OPCODE_TEX_LOGICAL:
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXL_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL:
case SHADER_OPCODE_TXF_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL:
case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
case SHADER_OPCODE_TXS_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
case SHADER_OPCODE_TG4_LOGICAL:
case SHADER_OPCODE_TG4_BIAS_LOGICAL:
case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
assert(src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
return src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
default:
return false;
}
const brw_tex_inst *tex = as_tex();
return tex && tex->residency;
}
/* \sa inst_is_raw_move in brw_eu_validate. */

View file

@ -42,6 +42,7 @@ struct brw_shader;
enum ENUM_PACKED brw_inst_kind {
BRW_KIND_BASE,
BRW_KIND_SEND,
BRW_KIND_TEX,
};
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
@ -68,6 +69,7 @@ struct brw_inst : brw_exec_node {
}
KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX);
#undef KIND_HELPERS
@ -270,6 +272,12 @@ struct brw_send_inst : brw_inst {
};
};
struct brw_tex_inst : brw_inst {
uint8_t coord_components;
uint8_t grad_components;
bool residency;
};
/**
* Make the execution of \p inst dependent on the evaluation of a possibly
* inverted predicate.

View file

@ -841,38 +841,32 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo,
}
static void
lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex)
{
const intel_device_info *devinfo = bld.shader->devinfo;
const brw_compiler *compiler = bld.shader->compiler;
const brw_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];
const brw_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
const brw_reg lod = inst->src[TEX_LOGICAL_SRC_LOD];
const brw_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];
const brw_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD];
const brw_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
const brw_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS];
const brw_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
const brw_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
assert(inst->src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM);
const bool residency = inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0;
const brw_reg coordinate = tex->src[TEX_LOGICAL_SRC_COORDINATE];
const brw_reg shadow_c = tex->src[TEX_LOGICAL_SRC_SHADOW_C];
const brw_reg lod = tex->src[TEX_LOGICAL_SRC_LOD];
const brw_reg lod2 = tex->src[TEX_LOGICAL_SRC_LOD2];
const brw_reg min_lod = tex->src[TEX_LOGICAL_SRC_MIN_LOD];
const brw_reg sample_index = tex->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
const brw_reg mcs = tex->src[TEX_LOGICAL_SRC_MCS];
const brw_reg surface = tex->src[TEX_LOGICAL_SRC_SURFACE];
const brw_reg sampler = tex->src[TEX_LOGICAL_SRC_SAMPLER];
const brw_reg surface_handle = tex->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
const brw_reg sampler_handle = tex->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
const brw_reg tg4_offset = tex->src[TEX_LOGICAL_SRC_TG4_OFFSET];
const unsigned payload_type_bit_size =
get_sampler_msg_payload_type_bit_size(devinfo, inst);
get_sampler_msg_payload_type_bit_size(devinfo, tex);
/* 16-bit payloads are available only on gfx11+ */
assert(payload_type_bit_size != 16 || devinfo->ver >= 11);
/* We never generate EOT sampler messages */
assert(!inst->eot);
assert(!tex->eot);
const enum brw_reg_type payload_type =
brw_type_with_size(BRW_TYPE_F, payload_type_bit_size);
@ -881,7 +875,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
const enum brw_reg_type payload_signed_type =
brw_type_with_size(BRW_TYPE_D, payload_type_bit_size);
unsigned header_size = 0, length = 0;
opcode op = inst->opcode;
opcode op = tex->opcode;
brw_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE];
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
sources[i] = bld.vgrf(payload_type);
@ -890,10 +884,10 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));
if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 ||
if (shader_opcode_needs_header(op, devinfo) || tex->offset != 0 ||
sampler_handle.file != BAD_FILE ||
is_high_sampler(devinfo, sampler) ||
residency) {
tex->residency) {
/* For general texture offsets (no txf workaround), we need a header to
* put them in.
*
@ -911,19 +905,19 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
* writemask. It's reversed from normal: 1 means "don't write".
*/
unsigned comps_regs =
DIV_ROUND_UP(regs_written(inst) - reg_unit(devinfo) * residency,
DIV_ROUND_UP(regs_written(tex) - reg_unit(devinfo) * tex->residency,
reg_unit(devinfo));
unsigned comp_regs =
DIV_ROUND_UP(inst->dst.component_size(inst->exec_size),
DIV_ROUND_UP(tex->dst.component_size(tex->exec_size),
reg_unit(devinfo) * REG_SIZE);
if (comps_regs < 4 * comp_regs) {
assert(comps_regs % comp_regs == 0);
unsigned mask = ~((1 << (comps_regs / comp_regs)) - 1) & 0xf;
inst->offset |= mask << 12;
tex->offset |= mask << 12;
}
if (residency)
inst->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */
if (tex->residency)
tex->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */
/* Build the actual header */
const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
@ -932,8 +926,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
ubld.MOV(header, brw_imm_ud(0));
else
ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD));
if (inst->offset) {
ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
if (tex->offset) {
ubld1.MOV(component(header, 2), brw_imm_ud(tex->offset));
} else if (devinfo->ver < 11 &&
bld.shader->stage != MESA_SHADER_VERTEX &&
bld.shader->stage != MESA_SHADER_FRAGMENT) {
@ -1049,13 +1043,13 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
/* Load dPdx and the coordinate together:
* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
*/
for (unsigned i = 0; i < coord_components; i++) {
for (unsigned i = 0; i < tex->coord_components; i++) {
bld.MOV(sources[length++], offset(coordinate, bld, i));
/* For cube map array, the coordinate is (u,v,r,ai) but there are
* only derivatives for (u, v, r).
*/
if (i < grad_components) {
if (i < tex->grad_components) {
bld.MOV(sources[length++], offset(lod, bld, i));
bld.MOV(sources[length++], offset(lod2, bld, i));
}
@ -1077,7 +1071,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
sources[length] = retype(sources[length], payload_signed_type);
bld.MOV(sources[length++], offset(coordinate, bld, 0));
if (coord_components >= 2) {
if (tex->coord_components >= 2) {
sources[length] = retype(sources[length], payload_signed_type);
bld.MOV(sources[length], offset(coordinate, bld, 1));
} else {
@ -1090,7 +1084,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
bld.MOV(sources[length++], lod);
}
for (unsigned i = 2; i < coord_components; i++) {
for (unsigned i = 2; i < tex->coord_components; i++) {
sources[length] = retype(sources[length], payload_signed_type);
bld.MOV(sources[length++], offset(coordinate, bld, i));
}
@ -1138,7 +1132,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
/* There is no offsetting for this message; just copy in the integer
* texture coordinates.
*/
for (unsigned i = 0; i < coord_components; i++) {
for (unsigned i = 0; i < tex->coord_components; i++) {
sources[length] = retype(sources[length], payload_signed_type);
bld.MOV(sources[length++], offset(coordinate, bld, i));
}
@ -1155,7 +1149,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
bld.MOV(sources[length++], offset(tg4_offset, bld, i));
}
if (coord_components == 3) /* r if present */
if (tex->coord_components == 3) /* r if present */
bld.MOV(sources[length++], offset(coordinate, bld, 2));
coordinate_done = true;
@ -1166,7 +1160,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
/* Set up the coordinate (except for cases where it was done above) */
if (!coordinate_done) {
for (unsigned i = 0; i < coord_components; i++)
for (unsigned i = 0; i < tex->coord_components; i++)
bld.MOV(retype(sources[length++], payload_type),
offset(coordinate, bld, i));
}
@ -1186,7 +1180,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
* Param Number 0 1 2 3 4
* Param BIAS_AI U V R MLOD
*/
length += 3 - coord_components;
length += 3 - tex->coord_components;
} else if (op == SHADER_OPCODE_TXD_LOGICAL && devinfo->verx10 >= 125) {
/* On DG2 and newer platforms, sample_d can only be used with 1D and
* 2D surfaces, so the maximum number of gradient components is 2.
@ -1196,12 +1190,12 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
*
* See bspec 45942, "Enable new message layout for cube array"
*/
length += 3 - coord_components;
length += (2 - grad_components) * 2;
length += 3 - tex->coord_components;
length += (2 - tex->grad_components) * 2;
} else {
length += 4 - coord_components;
length += 4 - tex->coord_components;
if (op == SHADER_OPCODE_TXD_LOGICAL)
length += (3 - grad_components) * 2;
length += (3 - tex->grad_components) * 2;
}
bld.MOV(sources[length++], min_lod);
@ -1228,24 +1222,24 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst)
if (devinfo->ver < 20) {
if (payload_type_bit_size == 16) {
assert(devinfo->ver >= 11);
simd_mode = inst->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H :
simd_mode = tex->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H :
GFX10_SAMPLER_SIMD_MODE_SIMD16H;
} else {
simd_mode = inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
simd_mode = tex->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 :
BRW_SAMPLER_SIMD_MODE_SIMD16;
}
} else {
if (payload_type_bit_size == 16) {
simd_mode = inst->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16H :
simd_mode = tex->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16H :
XE2_SAMPLER_SIMD_MODE_SIMD32H;
} else {
simd_mode = inst->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16 :
simd_mode = tex->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16 :
XE2_SAMPLER_SIMD_MODE_SIMD32;
}
}
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
inst = NULL;
brw_send_inst *send = brw_transform_inst_to_send(bld, tex);
tex = NULL;
send->mlen = mlen;
send->header_size = header_size;
@ -2679,7 +2673,7 @@ brw_lower_logical_sends(brw_shader &s)
case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
lower_sampler_logical_send(ibld, inst);
lower_sampler_logical_send(ibld, inst->as_tex());
break;
case SHADER_OPCODE_GET_BUFFER_SIZE:

View file

@ -160,55 +160,48 @@ get_fpu_lowered_simd_width(const brw_shader *shader,
*/
static unsigned
get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
const brw_inst *inst)
const brw_tex_inst *tex)
{
/* If we have a min_lod parameter on anything other than a simple sample
* message, it will push it over 5 arguments and we have to fall back to
* SIMD8.
*/
if (inst->opcode != SHADER_OPCODE_TEX_LOGICAL &&
inst->components_read(TEX_LOGICAL_SRC_MIN_LOD))
if (tex->opcode != SHADER_OPCODE_TEX_LOGICAL &&
tex->components_read(TEX_LOGICAL_SRC_MIN_LOD))
return devinfo->ver < 20 ? 8 : 16;
/* On Gfx9+ the LOD argument is for free if we're able to use the LZ
* variant of the TXL or TXF message.
*/
const bool implicit_lod = (inst->opcode == SHADER_OPCODE_TXL_LOGICAL ||
inst->opcode == SHADER_OPCODE_TXF_LOGICAL) &&
inst->src[TEX_LOGICAL_SRC_LOD].is_zero();
const bool implicit_lod = (tex->opcode == SHADER_OPCODE_TXL_LOGICAL ||
tex->opcode == SHADER_OPCODE_TXF_LOGICAL) &&
tex->src[TEX_LOGICAL_SRC_LOD].is_zero();
/* Calculate the total number of argument components that need to be passed
* to the sampler unit.
*/
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
const unsigned grad_components =
inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
const unsigned coord_components =
inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
unsigned num_payload_components =
coord_components +
inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
(implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) +
inst->components_read(TEX_LOGICAL_SRC_LOD2) +
inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
(inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
inst->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) +
inst->components_read(TEX_LOGICAL_SRC_MCS) +
inst->components_read(TEX_LOGICAL_SRC_MIN_LOD);
tex->coord_components +
tex->components_read(TEX_LOGICAL_SRC_SHADOW_C) +
(implicit_lod ? 0 : tex->components_read(TEX_LOGICAL_SRC_LOD)) +
tex->components_read(TEX_LOGICAL_SRC_LOD2) +
tex->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
(tex->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
tex->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) +
tex->components_read(TEX_LOGICAL_SRC_MCS) +
tex->components_read(TEX_LOGICAL_SRC_MIN_LOD);
if (inst->opcode == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20) {
num_payload_components += 3 - coord_components;
} else if (inst->opcode == SHADER_OPCODE_TXD_LOGICAL &&
if (tex->opcode == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20) {
num_payload_components += 3 - tex->coord_components;
} else if (tex->opcode == SHADER_OPCODE_TXD_LOGICAL &&
devinfo->verx10 >= 125 && devinfo->ver < 20) {
num_payload_components +=
3 - coord_components + (2 - grad_components) * 2;
3 - tex->coord_components + (2 - tex->grad_components) * 2;
} else {
num_payload_components += 4 - coord_components;
if (inst->opcode == SHADER_OPCODE_TXD_LOGICAL)
num_payload_components += (3 - grad_components) * 2;
num_payload_components += 4 - tex->coord_components;
if (tex->opcode == SHADER_OPCODE_TXD_LOGICAL)
num_payload_components += (3 - tex->grad_components) * 2;
}
@ -219,7 +212,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo,
* maximum message size supported by the sampler, regardless of whether a
* header is provided or not.
*/
return MIN2(inst->exec_size, simd_limit);
return MIN2(tex->exec_size, simd_limit);
}
static bool
@ -379,7 +372,7 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXF_LOGICAL:
case SHADER_OPCODE_TXS_LOGICAL:
return get_sampler_lowered_simd_width(devinfo, inst);
return get_sampler_lowered_simd_width(devinfo, inst->as_tex());
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:

View file

@ -251,6 +251,14 @@ send_inst_match(brw_send_inst *a, brw_send_inst *b)
a->send_bits == b->send_bits;
}
static bool
tex_inst_match(brw_tex_inst *a, brw_tex_inst *b)
{
return a->coord_components == b->coord_components &&
a->grad_components == b->grad_components &&
a->residency == b->residency;
}
static bool
instructions_match(brw_inst *a, brw_inst *b, bool *negate)
{
@ -258,6 +266,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate)
return a->opcode == b->opcode &&
/* `kind` is derived from opcode, so skipped. */
(a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) &&
(a->kind != BRW_KIND_TEX || tex_inst_match(a->as_tex(), b->as_tex())) &&
a->exec_size == b->exec_size &&
a->group == b->group &&
a->predicate == b->predicate &&
@ -342,6 +351,17 @@ hash_inst(const void *v)
break;
}
case BRW_KIND_TEX: {
const brw_tex_inst *tex = inst->as_tex();
const uint8_t tex_u8data[] = {
tex->coord_components,
tex->grad_components,
tex->residency,
};
hash = HASH(hash, tex_u8data);
break;
}
case BRW_KIND_BASE:
/* Nothing else to do. */
break;

View file

@ -97,46 +97,47 @@ brw_opt_combine_convergent_txf(brw_shader &s)
if (inst->opcode != SHADER_OPCODE_TXF_LOGICAL)
continue;
brw_tex_inst *tex = inst->as_tex();
/* Only handle buffers or single miplevel 1D images for now */
if (inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud > 1)
if (tex->coord_components > 1)
continue;
if (inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0)
if (tex->residency)
continue;
if (inst->predicate || inst->force_writemask_all)
if (tex->predicate || tex->force_writemask_all)
continue;
if (!is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_LOD]) ||
!is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_SURFACE]) ||
!is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]))
if (!is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_LOD]) ||
!is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_SURFACE]) ||
!is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]))
continue;
/* Only handle immediates for now: we could check is_uniform(),
* but we'd need to ensure the coordinate's definition reaches
* txfs[0] which is where we'll insert the combined coordinate.
*/
if (inst->src[TEX_LOGICAL_SRC_COORDINATE].file != IMM)
if (tex->src[TEX_LOGICAL_SRC_COORDINATE].file != IMM)
continue;
/* texelFetch from 1D buffers shouldn't have any of these */
assert(inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);
assert(inst->src[TEX_LOGICAL_SRC_LOD2].file == BAD_FILE);
assert(inst->src[TEX_LOGICAL_SRC_MIN_LOD].file == BAD_FILE);
assert(inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX].file == BAD_FILE);
assert(inst->src[TEX_LOGICAL_SRC_MCS].file == BAD_FILE);
assert(inst->src[TEX_LOGICAL_SRC_TG4_OFFSET].file == BAD_FILE);
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM &&
inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud == 0);
assert(tex->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE);
assert(tex->src[TEX_LOGICAL_SRC_LOD2].file == BAD_FILE);
assert(tex->src[TEX_LOGICAL_SRC_MIN_LOD].file == BAD_FILE);
assert(tex->src[TEX_LOGICAL_SRC_SAMPLE_INDEX].file == BAD_FILE);
assert(tex->src[TEX_LOGICAL_SRC_MCS].file == BAD_FILE);
assert(tex->src[TEX_LOGICAL_SRC_TG4_OFFSET].file == BAD_FILE);
assert(tex->grad_components == 0);
if (count > 0 &&
(!sources_match(defs, inst, txfs[0], TEX_LOGICAL_SRC_LOD) ||
!sources_match(defs, inst, txfs[0], TEX_LOGICAL_SRC_SURFACE) ||
!sources_match(defs, inst, txfs[0],
(!sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_LOD) ||
!sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_SURFACE) ||
!sources_match(defs, tex, txfs[0],
TEX_LOGICAL_SRC_SURFACE_HANDLE)))
continue;
txfs[count++] = inst;
txfs[count++] = tex;
if (count == ARRAY_SIZE(txfs))
break;
@ -179,9 +180,6 @@ brw_opt_combine_convergent_txf(brw_shader &s)
srcs[TEX_LOGICAL_SRC_SAMPLER] = txfs[0]->src[TEX_LOGICAL_SRC_SAMPLER];
srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] =
txfs[0]->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(1);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);
/* Each of our txf may have a reduced response length if some
* components are never read. Use the maximum of the sizes.
@ -194,9 +192,12 @@ brw_opt_combine_convergent_txf(brw_shader &s)
/* Emit the new divergent TXF */
brw_reg div = ubld.vgrf(BRW_TYPE_UD, new_dest_comps);
brw_inst *div_txf =
brw_tex_inst *div_txf =
ubld.emit(SHADER_OPCODE_TXF_LOGICAL, div, srcs,
TEX_LOGICAL_NUM_SRCS);
TEX_LOGICAL_NUM_SRCS)->as_tex();
div_txf->coord_components = 1;
div_txf->grad_components = 0;
div_txf->residency = false;
/* Update it to also use response length reduction */
const unsigned per_component_regs =

View file

@ -656,6 +656,12 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
}
}
if (const brw_tex_inst *tex = inst->as_tex()) {
fprintf(file, ", coord_comps: %uu", tex->coord_components);
fprintf(file, ", grad_comps: %uu", tex->grad_components);
fprintf(file, ", residency: %s", tex->residency ? "true" : "false");
}
fprintf(file, " ");
if (inst->force_writemask_all)

View file

@ -212,13 +212,14 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS];
tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
tex_srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
tex_srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);
bld.ADD(offset(dest, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX_LOGICAL, dest, tex_srcs, TEX_LOGICAL_NUM_SRCS)
->size_written = 4 * REG_SIZE;
brw_tex_inst *tex =
bld.emit(SHADER_OPCODE_TEX_LOGICAL, dest, tex_srcs, TEX_LOGICAL_NUM_SRCS)->as_tex();
tex->size_written = 4 * REG_SIZE;
tex->coord_components = 2;
bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);

View file

@ -277,13 +277,14 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS] = {};
tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2;
tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0);
tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2);
tex_srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0);
tex_srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0);
bld.ADD(offset(dst0, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX_LOGICAL, dst0, tex_srcs, TEX_LOGICAL_NUM_SRCS)
->size_written = 8 * REG_SIZE;
brw_tex_inst *tex =
bld.emit(SHADER_OPCODE_TEX_LOGICAL, dst0, tex_srcs, TEX_LOGICAL_NUM_SRCS)->as_tex();
tex->size_written = 8 * REG_SIZE;
tex->coord_components = 2;
bld.MOV(dst1, offset(dst0, bld, 2))->saturate = true;
EXPECT_NO_PROGRESS(brw_opt_saturate_propagation, bld);