diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 42ed9650f55..7168ffe60e7 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -630,12 +630,6 @@ enum tex_logical_srcs { TEX_LOGICAL_SRC_SAMPLER_HANDLE, /** Texel offset for gathers */ TEX_LOGICAL_SRC_TG4_OFFSET, - /** REQUIRED: Number of coordinate components (as UD immediate) */ - TEX_LOGICAL_SRC_COORD_COMPONENTS, - /** REQUIRED: Number of derivative components (as UD immediate) */ - TEX_LOGICAL_SRC_GRAD_COMPONENTS, - /** REQUIRED: request residency (as UD immediate) */ - TEX_LOGICAL_SRC_RESIDENCY, TEX_LOGICAL_NUM_SRCS, }; diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index b1e2ca43670..98a49cbe059 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -3632,17 +3632,15 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon srcs[TEX_LOGICAL_SRC_SURFACE] = texture; srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0); srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle; - srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components); - srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0); - srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0); - brw_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, - ARRAY_SIZE(srcs)); + brw_tex_inst *tex = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, + ARRAY_SIZE(srcs))->as_tex(); + tex->coord_components = components; /* We only care about one or two regs of response, but the sampler always * writes 4/8. */ - inst->size_written = 4 * dest.component_size(inst->exec_size); + tex->size_written = 4 * dest.component_size(tex->exec_size); return dest; } @@ -3713,14 +3711,12 @@ emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const b srcs[TEX_LOGICAL_SRC_MCS] = mcs; srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(target); srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0); - srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(3); - srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0); - srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0); - brw_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs)); - inst->size_written = 4 * inst->dst.component_size(inst->exec_size); + brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex(); + tex->size_written = 4 * tex->dst.component_size(tex->exec_size); + tex->coord_components = 3; - return inst; + return tex; } /** @@ -6021,9 +6017,6 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, else srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = image; srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0); - srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0); - srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0); - srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_d(0); /* Since the image size is always uniform, we can just emit a SIMD8 * query instruction and splat the result out. @@ -6031,8 +6024,8 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, const brw_builder ubld = bld.scalar_group(); brw_reg tmp = ubld.vgrf(BRW_TYPE_UD, 4); - brw_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL, - tmp, srcs, ARRAY_SIZE(srcs)); + brw_tex_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL, + tmp, srcs, ARRAY_SIZE(srcs))->as_tex(); inst->size_written = 4 * REG_SIZE * reg_unit(devinfo); for (unsigned c = 0; c < instr->def.num_components; ++c) { @@ -7430,8 +7423,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, */ assert(!instr->is_sparse || srcs[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE); - srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(instr->is_sparse); - int lod_components = 0; /* The hardware requires a LOD for buffer textures */ @@ -7612,9 +7603,6 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]); } - srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components); - srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components); - enum opcode opcode; switch (instr->op) { case nir_texop_tex: @@ -7741,9 +7729,12 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, brw_allocate_vgrf_units(*bld.shader, total_regs * reg_unit(devinfo)), dst_type); - brw_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); - inst->offset = header_bits; - inst->size_written = total_regs * grf_size; + brw_tex_inst *tex = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs))->as_tex(); + tex->offset = header_bits; + tex->size_written = total_regs * grf_size; + tex->residency = instr->is_sparse; + tex->coord_components = instr->coord_components; + tex->grad_components = lod_components; /* Wa_14012688258: * @@ -7758,7 +7749,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, assert(instr->coord_components >= 3u); /* See opt_zero_samples(). */ - inst->keep_payload_trailing_zeros = true; + tex->keep_payload_trailing_zeros = true; } /* With half-floats returns, the stride into a GRF allocation for each @@ -7781,7 +7772,7 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, if (instr->op != nir_texop_query_levels && !instr->is_sparse && !non_aligned_component_stride) { /* In most cases we can write directly to the result. */ - inst->dst = nir_def_reg; + tex->dst = nir_def_reg; } else { /* In other cases, we have to reorganize the sampler message's results * a bit to match the NIR intrinsic's expectations. diff --git a/src/intel/compiler/brw_inst.cpp b/src/intel/compiler/brw_inst.cpp index b47f9579f3c..e481a21bd05 100644 --- a/src/intel/compiler/brw_inst.cpp +++ b/src/intel/compiler/brw_inst.cpp @@ -14,10 +14,13 @@ static inline unsigned brw_inst_kind_size(brw_inst_kind kind) { + STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_tex_inst)); + /* TODO: Temporarily here to ensure all instructions can be converted to * SEND. Once all new kinds are added, change so that BASE allocate only * sizeof(brw_inst). */ + return sizeof(brw_send_inst); } @@ -154,6 +157,27 @@ brw_inst_kind_for_opcode(enum opcode opcode) case SHADER_OPCODE_INTERLOCK: return BRW_KIND_SEND; + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXD_LOGICAL: + case SHADER_OPCODE_TXF_LOGICAL: + case SHADER_OPCODE_TXL_LOGICAL: + case SHADER_OPCODE_TXS_LOGICAL: + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: + case FS_OPCODE_TXB_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOGICAL: + case SHADER_OPCODE_TG4_BIAS_LOGICAL: + case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: + case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: + case SHADER_OPCODE_SAMPLEINFO_LOGICAL: + return BRW_KIND_TEX; + default: return BRW_KIND_BASE; } @@ -436,17 +460,15 @@ brw_inst::components_read(unsigned i) const case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: - case SHADER_OPCODE_SAMPLEINFO_LOGICAL: - assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM && - src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM && - src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM); + case SHADER_OPCODE_SAMPLEINFO_LOGICAL: { + const brw_tex_inst *tex = as_tex(); /* Texture coordinates. */ if (i == TEX_LOGICAL_SRC_COORDINATE) - return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; + return tex->coord_components; /* Texture derivatives. */ else if ((i == TEX_LOGICAL_SRC_LOD || i == TEX_LOGICAL_SRC_LOD2) && opcode == SHADER_OPCODE_TXD_LOGICAL) - return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud; + return tex->grad_components; /* Texture offset. */ else if (i == TEX_LOGICAL_SRC_TG4_OFFSET) return 2; @@ -460,6 +482,7 @@ brw_inst::components_read(unsigned i) const return 1; } else return 1; + } case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: if (i == MEMORY_LOGICAL_DATA0) @@ -663,27 +686,8 @@ brw_inst::flags_written(const intel_device_info *devinfo) const bool brw_inst::has_sampler_residency() const { - switch (opcode) { - case SHADER_OPCODE_TEX_LOGICAL: - case FS_OPCODE_TXB_LOGICAL: - case SHADER_OPCODE_TXL_LOGICAL: - case SHADER_OPCODE_TXD_LOGICAL: - case SHADER_OPCODE_TXF_LOGICAL: - case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: - case SHADER_OPCODE_TXF_CMS_W_LOGICAL: - case SHADER_OPCODE_TXS_LOGICAL: - case SHADER_OPCODE_TG4_OFFSET_LOGICAL: - case SHADER_OPCODE_TG4_LOGICAL: - case SHADER_OPCODE_TG4_BIAS_LOGICAL: - case SHADER_OPCODE_TG4_EXPLICIT_LOD_LOGICAL: - case SHADER_OPCODE_TG4_IMPLICIT_LOD_LOGICAL: - case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: - case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: - assert(src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM); - return src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0; - default: - return false; - } + const brw_tex_inst *tex = as_tex(); + return tex && tex->residency; } /* \sa inst_is_raw_move in brw_eu_validate. */ diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 151439b3e22..4d6655de7f7 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -42,6 +42,7 @@ struct brw_shader; enum ENUM_PACKED brw_inst_kind { BRW_KIND_BASE, BRW_KIND_SEND, + BRW_KIND_TEX, }; brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode); @@ -68,6 +69,7 @@ struct brw_inst : brw_exec_node { } KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND); + KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX); #undef KIND_HELPERS @@ -270,6 +272,12 @@ struct brw_send_inst : brw_inst { }; }; +struct brw_tex_inst : brw_inst { + uint8_t coord_components; + uint8_t grad_components; + bool residency; +}; + /** * Make the execution of \p inst dependent on the evaluation of a possibly * inverted predicate. diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index f8d16a919a8..163977bd9b4 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -841,38 +841,32 @@ get_sampler_msg_payload_type_bit_size(const intel_device_info *devinfo, } static void -lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) +lower_sampler_logical_send(const brw_builder &bld, brw_tex_inst *tex) { const intel_device_info *devinfo = bld.shader->devinfo; const brw_compiler *compiler = bld.shader->compiler; - const brw_reg coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE]; - const brw_reg shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; - const brw_reg lod = inst->src[TEX_LOGICAL_SRC_LOD]; - const brw_reg lod2 = inst->src[TEX_LOGICAL_SRC_LOD2]; - const brw_reg min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD]; - const brw_reg sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX]; - const brw_reg mcs = inst->src[TEX_LOGICAL_SRC_MCS]; - const brw_reg surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; - const brw_reg sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER]; - const brw_reg surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; - const brw_reg sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; - const brw_reg tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; - assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); - const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; - assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); - const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud; - assert(inst->src[TEX_LOGICAL_SRC_RESIDENCY].file == IMM); - const bool residency = inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0; + const brw_reg coordinate = tex->src[TEX_LOGICAL_SRC_COORDINATE]; + const brw_reg shadow_c = tex->src[TEX_LOGICAL_SRC_SHADOW_C]; + const brw_reg lod = tex->src[TEX_LOGICAL_SRC_LOD]; + const brw_reg lod2 = tex->src[TEX_LOGICAL_SRC_LOD2]; + const brw_reg min_lod = tex->src[TEX_LOGICAL_SRC_MIN_LOD]; + const brw_reg sample_index = tex->src[TEX_LOGICAL_SRC_SAMPLE_INDEX]; + const brw_reg mcs = tex->src[TEX_LOGICAL_SRC_MCS]; + const brw_reg surface = tex->src[TEX_LOGICAL_SRC_SURFACE]; + const brw_reg sampler = tex->src[TEX_LOGICAL_SRC_SAMPLER]; + const brw_reg surface_handle = tex->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; + const brw_reg sampler_handle = tex->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; + const brw_reg tg4_offset = tex->src[TEX_LOGICAL_SRC_TG4_OFFSET]; const unsigned payload_type_bit_size = - get_sampler_msg_payload_type_bit_size(devinfo, inst); + get_sampler_msg_payload_type_bit_size(devinfo, tex); /* 16-bit payloads are available only on gfx11+ */ assert(payload_type_bit_size != 16 || devinfo->ver >= 11); /* We never generate EOT sampler messages */ - assert(!inst->eot); + assert(!tex->eot); const enum brw_reg_type payload_type = brw_type_with_size(BRW_TYPE_F, payload_type_bit_size); @@ -881,7 +875,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) const enum brw_reg_type payload_signed_type = brw_type_with_size(BRW_TYPE_D, payload_type_bit_size); unsigned header_size = 0, length = 0; - opcode op = inst->opcode; + opcode op = tex->opcode; brw_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE]; for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) sources[i] = bld.vgrf(payload_type); @@ -890,10 +884,10 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE)); assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE)); - if (shader_opcode_needs_header(op, devinfo) || inst->offset != 0 || + if (shader_opcode_needs_header(op, devinfo) || tex->offset != 0 || sampler_handle.file != BAD_FILE || is_high_sampler(devinfo, sampler) || - residency) { + tex->residency) { /* For general texture offsets (no txf workaround), we need a header to * put them in. * @@ -911,19 +905,19 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) * writemask. It's reversed from normal: 1 means "don't write". */ unsigned comps_regs = - DIV_ROUND_UP(regs_written(inst) - reg_unit(devinfo) * residency, + DIV_ROUND_UP(regs_written(tex) - reg_unit(devinfo) * tex->residency, reg_unit(devinfo)); unsigned comp_regs = - DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), + DIV_ROUND_UP(tex->dst.component_size(tex->exec_size), reg_unit(devinfo) * REG_SIZE); if (comps_regs < 4 * comp_regs) { assert(comps_regs % comp_regs == 0); unsigned mask = ~((1 << (comps_regs / comp_regs)) - 1) & 0xf; - inst->offset |= mask << 12; + tex->offset |= mask << 12; } - if (residency) - inst->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */ + if (tex->residency) + tex->offset |= 1 << 23; /* g0.2 bit23 : Pixel Null Mask Enable */ /* Build the actual header */ const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); @@ -932,8 +926,8 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) ubld.MOV(header, brw_imm_ud(0)); else ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_TYPE_UD)); - if (inst->offset) { - ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); + if (tex->offset) { + ubld1.MOV(component(header, 2), brw_imm_ud(tex->offset)); } else if (devinfo->ver < 11 && bld.shader->stage != MESA_SHADER_VERTEX && bld.shader->stage != MESA_SHADER_FRAGMENT) { @@ -1049,13 +1043,13 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) /* Load dPdx and the coordinate together: * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z */ - for (unsigned i = 0; i < coord_components; i++) { + for (unsigned i = 0; i < tex->coord_components; i++) { bld.MOV(sources[length++], offset(coordinate, bld, i)); /* For cube map array, the coordinate is (u,v,r,ai) but there are * only derivatives for (u, v, r). */ - if (i < grad_components) { + if (i < tex->grad_components) { bld.MOV(sources[length++], offset(lod, bld, i)); bld.MOV(sources[length++], offset(lod2, bld, i)); } @@ -1077,7 +1071,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) sources[length] = retype(sources[length], payload_signed_type); bld.MOV(sources[length++], offset(coordinate, bld, 0)); - if (coord_components >= 2) { + if (tex->coord_components >= 2) { sources[length] = retype(sources[length], payload_signed_type); bld.MOV(sources[length], offset(coordinate, bld, 1)); } else { @@ -1090,7 +1084,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) bld.MOV(sources[length++], lod); } - for (unsigned i = 2; i < coord_components; i++) { + for (unsigned i = 2; i < tex->coord_components; i++) { sources[length] = retype(sources[length], payload_signed_type); bld.MOV(sources[length++], offset(coordinate, bld, i)); } @@ -1138,7 +1132,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) /* There is no offsetting for this message; just copy in the integer * texture coordinates. */ - for (unsigned i = 0; i < coord_components; i++) { + for (unsigned i = 0; i < tex->coord_components; i++) { sources[length] = retype(sources[length], payload_signed_type); bld.MOV(sources[length++], offset(coordinate, bld, i)); } @@ -1155,7 +1149,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) bld.MOV(sources[length++], offset(tg4_offset, bld, i)); } - if (coord_components == 3) /* r if present */ + if (tex->coord_components == 3) /* r if present */ bld.MOV(sources[length++], offset(coordinate, bld, 2)); coordinate_done = true; @@ -1166,7 +1160,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) /* Set up the coordinate (except for cases where it was done above) */ if (!coordinate_done) { - for (unsigned i = 0; i < coord_components; i++) + for (unsigned i = 0; i < tex->coord_components; i++) bld.MOV(retype(sources[length++], payload_type), offset(coordinate, bld, i)); } @@ -1186,7 +1180,7 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) * Param Number 0 1 2 3 4 * Param BIAS_AI U V R MLOD */ - length += 3 - coord_components; + length += 3 - tex->coord_components; } else if (op == SHADER_OPCODE_TXD_LOGICAL && devinfo->verx10 >= 125) { /* On DG2 and newer platforms, sample_d can only be used with 1D and * 2D surfaces, so the maximum number of gradient components is 2. @@ -1196,12 +1190,12 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) * * See bspec 45942, "Enable new message layout for cube array" */ - length += 3 - coord_components; - length += (2 - grad_components) * 2; + length += 3 - tex->coord_components; + length += (2 - tex->grad_components) * 2; } else { - length += 4 - coord_components; + length += 4 - tex->coord_components; if (op == SHADER_OPCODE_TXD_LOGICAL) - length += (3 - grad_components) * 2; + length += (3 - tex->grad_components) * 2; } bld.MOV(sources[length++], min_lod); @@ -1228,24 +1222,24 @@ lower_sampler_logical_send(const brw_builder &bld, brw_inst *inst) if (devinfo->ver < 20) { if (payload_type_bit_size == 16) { assert(devinfo->ver >= 11); - simd_mode = inst->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H : + simd_mode = tex->exec_size <= 8 ? GFX10_SAMPLER_SIMD_MODE_SIMD8H : GFX10_SAMPLER_SIMD_MODE_SIMD16H; } else { - simd_mode = inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + simd_mode = tex->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : BRW_SAMPLER_SIMD_MODE_SIMD16; } } else { if (payload_type_bit_size == 16) { - simd_mode = inst->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16H : + simd_mode = tex->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16H : XE2_SAMPLER_SIMD_MODE_SIMD32H; } else { - simd_mode = inst->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16 : + simd_mode = tex->exec_size <= 16 ? XE2_SAMPLER_SIMD_MODE_SIMD16 : XE2_SAMPLER_SIMD_MODE_SIMD32; } } - brw_send_inst *send = brw_transform_inst_to_send(bld, inst); - inst = NULL; + brw_send_inst *send = brw_transform_inst_to_send(bld, tex); + tex = NULL; send->mlen = mlen; send->header_size = header_size; @@ -2679,7 +2673,7 @@ brw_lower_logical_sends(brw_shader &s) case SHADER_OPCODE_TG4_OFFSET_LOD_LOGICAL: case SHADER_OPCODE_TG4_OFFSET_BIAS_LOGICAL: case SHADER_OPCODE_SAMPLEINFO_LOGICAL: - lower_sampler_logical_send(ibld, inst); + lower_sampler_logical_send(ibld, inst->as_tex()); break; case SHADER_OPCODE_GET_BUFFER_SIZE: diff --git a/src/intel/compiler/brw_lower_simd_width.cpp b/src/intel/compiler/brw_lower_simd_width.cpp index ece13998b56..c9e0594ebd0 100644 --- a/src/intel/compiler/brw_lower_simd_width.cpp +++ b/src/intel/compiler/brw_lower_simd_width.cpp @@ -160,55 +160,48 @@ get_fpu_lowered_simd_width(const brw_shader *shader, */ static unsigned get_sampler_lowered_simd_width(const struct intel_device_info *devinfo, - const brw_inst *inst) + const brw_tex_inst *tex) { /* If we have a min_lod parameter on anything other than a simple sample * message, it will push it over 5 arguments and we have to fall back to * SIMD8. */ - if (inst->opcode != SHADER_OPCODE_TEX_LOGICAL && - inst->components_read(TEX_LOGICAL_SRC_MIN_LOD)) + if (tex->opcode != SHADER_OPCODE_TEX_LOGICAL && + tex->components_read(TEX_LOGICAL_SRC_MIN_LOD)) return devinfo->ver < 20 ? 8 : 16; /* On Gfx9+ the LOD argument is for free if we're able to use the LZ * variant of the TXL or TXF message. */ - const bool implicit_lod = (inst->opcode == SHADER_OPCODE_TXL_LOGICAL || - inst->opcode == SHADER_OPCODE_TXF_LOGICAL) && - inst->src[TEX_LOGICAL_SRC_LOD].is_zero(); + const bool implicit_lod = (tex->opcode == SHADER_OPCODE_TXL_LOGICAL || + tex->opcode == SHADER_OPCODE_TXF_LOGICAL) && + tex->src[TEX_LOGICAL_SRC_LOD].is_zero(); /* Calculate the total number of argument components that need to be passed * to the sampler unit. */ - assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); - const unsigned grad_components = - inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud; - assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); - const unsigned coord_components = - inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; - unsigned num_payload_components = - coord_components + - inst->components_read(TEX_LOGICAL_SRC_SHADOW_C) + - (implicit_lod ? 0 : inst->components_read(TEX_LOGICAL_SRC_LOD)) + - inst->components_read(TEX_LOGICAL_SRC_LOD2) + - inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) + - (inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ? - inst->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) + - inst->components_read(TEX_LOGICAL_SRC_MCS) + - inst->components_read(TEX_LOGICAL_SRC_MIN_LOD); + tex->coord_components + + tex->components_read(TEX_LOGICAL_SRC_SHADOW_C) + + (implicit_lod ? 0 : tex->components_read(TEX_LOGICAL_SRC_LOD)) + + tex->components_read(TEX_LOGICAL_SRC_LOD2) + + tex->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) + + (tex->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ? + tex->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) + + tex->components_read(TEX_LOGICAL_SRC_MCS) + + tex->components_read(TEX_LOGICAL_SRC_MIN_LOD); - if (inst->opcode == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20) { - num_payload_components += 3 - coord_components; - } else if (inst->opcode == SHADER_OPCODE_TXD_LOGICAL && + if (tex->opcode == FS_OPCODE_TXB_LOGICAL && devinfo->ver >= 20) { + num_payload_components += 3 - tex->coord_components; + } else if (tex->opcode == SHADER_OPCODE_TXD_LOGICAL && devinfo->verx10 >= 125 && devinfo->ver < 20) { num_payload_components += - 3 - coord_components + (2 - grad_components) * 2; + 3 - tex->coord_components + (2 - tex->grad_components) * 2; } else { - num_payload_components += 4 - coord_components; - if (inst->opcode == SHADER_OPCODE_TXD_LOGICAL) - num_payload_components += (3 - grad_components) * 2; + num_payload_components += 4 - tex->coord_components; + if (tex->opcode == SHADER_OPCODE_TXD_LOGICAL) + num_payload_components += (3 - tex->grad_components) * 2; } @@ -219,7 +212,7 @@ get_sampler_lowered_simd_width(const struct intel_device_info *devinfo, * maximum message size supported by the sampler, regardless of whether a * header is provided or not. */ - return MIN2(inst->exec_size, simd_limit); + return MIN2(tex->exec_size, simd_limit); } static bool @@ -379,7 +372,7 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst) case FS_OPCODE_TXB_LOGICAL: case SHADER_OPCODE_TXF_LOGICAL: case SHADER_OPCODE_TXS_LOGICAL: - return get_sampler_lowered_simd_width(devinfo, inst); + return get_sampler_lowered_simd_width(devinfo, inst->as_tex()); case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: case SHADER_OPCODE_MEMORY_STORE_LOGICAL: diff --git a/src/intel/compiler/brw_opt_cse.cpp b/src/intel/compiler/brw_opt_cse.cpp index 4da3f074bf0..0947b8e20b6 100644 --- a/src/intel/compiler/brw_opt_cse.cpp +++ b/src/intel/compiler/brw_opt_cse.cpp @@ -251,6 +251,14 @@ send_inst_match(brw_send_inst *a, brw_send_inst *b) a->send_bits == b->send_bits; } +static bool +tex_inst_match(brw_tex_inst *a, brw_tex_inst *b) +{ + return a->coord_components == b->coord_components && + a->grad_components == b->grad_components && + a->residency == b->residency; +} + static bool instructions_match(brw_inst *a, brw_inst *b, bool *negate) { @@ -258,6 +266,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate) return a->opcode == b->opcode && /* `kind` is derived from opcode, so skipped. */ (a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) && + (a->kind != BRW_KIND_TEX || tex_inst_match(a->as_tex(), b->as_tex())) && a->exec_size == b->exec_size && a->group == b->group && a->predicate == b->predicate && @@ -342,6 +351,17 @@ hash_inst(const void *v) break; } + case BRW_KIND_TEX: { + const brw_tex_inst *tex = inst->as_tex(); + const uint8_t tex_u8data[] = { + tex->coord_components, + tex->grad_components, + tex->residency, + }; + hash = HASH(hash, tex_u8data); + break; + } + case BRW_KIND_BASE: /* Nothing else to do. */ break; diff --git a/src/intel/compiler/brw_opt_txf_combiner.cpp b/src/intel/compiler/brw_opt_txf_combiner.cpp index 4d73ad9c3e1..623474f7188 100644 --- a/src/intel/compiler/brw_opt_txf_combiner.cpp +++ b/src/intel/compiler/brw_opt_txf_combiner.cpp @@ -97,46 +97,47 @@ brw_opt_combine_convergent_txf(brw_shader &s) if (inst->opcode != SHADER_OPCODE_TXF_LOGICAL) continue; + brw_tex_inst *tex = inst->as_tex(); + /* Only handle buffers or single miplevel 1D images for now */ - if (inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud > 1) + if (tex->coord_components > 1) continue; - if (inst->src[TEX_LOGICAL_SRC_RESIDENCY].ud != 0) + if (tex->residency) continue; - if (inst->predicate || inst->force_writemask_all) + if (tex->predicate || tex->force_writemask_all) continue; - if (!is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_LOD]) || - !is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_SURFACE]) || - !is_uniform_def(defs, inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE])) + if (!is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_LOD]) || + !is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_SURFACE]) || + !is_uniform_def(defs, tex->src[TEX_LOGICAL_SRC_SURFACE_HANDLE])) continue; /* Only handle immediates for now: we could check is_uniform(), * but we'd need to ensure the coordinate's definition reaches * txfs[0] which is where we'll insert the combined coordinate. */ - if (inst->src[TEX_LOGICAL_SRC_COORDINATE].file != IMM) + if (tex->src[TEX_LOGICAL_SRC_COORDINATE].file != IMM) continue; /* texelFetch from 1D buffers shouldn't have any of these */ - assert(inst->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE); - assert(inst->src[TEX_LOGICAL_SRC_LOD2].file == BAD_FILE); - assert(inst->src[TEX_LOGICAL_SRC_MIN_LOD].file == BAD_FILE); - assert(inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX].file == BAD_FILE); - assert(inst->src[TEX_LOGICAL_SRC_MCS].file == BAD_FILE); - assert(inst->src[TEX_LOGICAL_SRC_TG4_OFFSET].file == BAD_FILE); - assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM && - inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud == 0); + assert(tex->src[TEX_LOGICAL_SRC_SHADOW_C].file == BAD_FILE); + assert(tex->src[TEX_LOGICAL_SRC_LOD2].file == BAD_FILE); + assert(tex->src[TEX_LOGICAL_SRC_MIN_LOD].file == BAD_FILE); + assert(tex->src[TEX_LOGICAL_SRC_SAMPLE_INDEX].file == BAD_FILE); + assert(tex->src[TEX_LOGICAL_SRC_MCS].file == BAD_FILE); + assert(tex->src[TEX_LOGICAL_SRC_TG4_OFFSET].file == BAD_FILE); + assert(tex->grad_components == 0); if (count > 0 && - (!sources_match(defs, inst, txfs[0], TEX_LOGICAL_SRC_LOD) || - !sources_match(defs, inst, txfs[0], TEX_LOGICAL_SRC_SURFACE) || - !sources_match(defs, inst, txfs[0], + (!sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_LOD) || + !sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_SURFACE) || + !sources_match(defs, tex, txfs[0], TEX_LOGICAL_SRC_SURFACE_HANDLE))) continue; - txfs[count++] = inst; + txfs[count++] = tex; if (count == ARRAY_SIZE(txfs)) break; @@ -179,9 +180,6 @@ brw_opt_combine_convergent_txf(brw_shader &s) srcs[TEX_LOGICAL_SRC_SAMPLER] = txfs[0]->src[TEX_LOGICAL_SRC_SAMPLER]; srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] = txfs[0]->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; - srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(1); - srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0); - srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0); /* Each of our txf may have a reduced response length if some * components are never read. Use the maximum of the sizes. @@ -194,9 +192,12 @@ brw_opt_combine_convergent_txf(brw_shader &s) /* Emit the new divergent TXF */ brw_reg div = ubld.vgrf(BRW_TYPE_UD, new_dest_comps); - brw_inst *div_txf = + brw_tex_inst *div_txf = ubld.emit(SHADER_OPCODE_TXF_LOGICAL, div, srcs, - TEX_LOGICAL_NUM_SRCS); + TEX_LOGICAL_NUM_SRCS)->as_tex(); + div_txf->coord_components = 1; + div_txf->grad_components = 0; + div_txf->residency = false; /* Update it to also use response length reduction */ const unsigned per_component_regs = diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index 1ce6d02b48c..f7a56deb080 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -656,6 +656,12 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con } } + if (const brw_tex_inst *tex = inst->as_tex()) { + fprintf(file, ", coord_comps: %uu", tex->coord_components); + fprintf(file, ", grad_comps: %uu", tex->grad_components); + fprintf(file, ", residency: %s", tex->residency ? "true" : "false"); + } + fprintf(file, " "); if (inst->force_writemask_all) diff --git a/src/intel/compiler/test_opt_cmod_propagation.cpp b/src/intel/compiler/test_opt_cmod_propagation.cpp index f99e614dc04..db8def0a5d6 100644 --- a/src/intel/compiler/test_opt_cmod_propagation.cpp +++ b/src/intel/compiler/test_opt_cmod_propagation.cpp @@ -212,13 +212,14 @@ TEST_F(cmod_propagation_test, intervening_dest_write) brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS]; tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2; tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0); - tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2); - tex_srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0); - tex_srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0); bld.ADD(offset(dest, bld, 2), src0, src1); - bld.emit(SHADER_OPCODE_TEX_LOGICAL, dest, tex_srcs, TEX_LOGICAL_NUM_SRCS) - ->size_written = 4 * REG_SIZE; + + brw_tex_inst *tex = + bld.emit(SHADER_OPCODE_TEX_LOGICAL, dest, tex_srcs, TEX_LOGICAL_NUM_SRCS)->as_tex(); + tex->size_written = 4 * REG_SIZE; + tex->coord_components = 2; + bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE); EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld); diff --git a/src/intel/compiler/test_opt_saturate_propagation.cpp b/src/intel/compiler/test_opt_saturate_propagation.cpp index 058682d2bed..7267cdaaebc 100644 --- a/src/intel/compiler/test_opt_saturate_propagation.cpp +++ b/src/intel/compiler/test_opt_saturate_propagation.cpp @@ -277,13 +277,14 @@ TEST_F(saturate_propagation_test, intervening_dest_write) brw_reg tex_srcs[TEX_LOGICAL_NUM_SRCS] = {}; tex_srcs[TEX_LOGICAL_SRC_COORDINATE] = src2; tex_srcs[TEX_LOGICAL_SRC_SURFACE] = brw_imm_ud(0); - tex_srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_ud(2); - tex_srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_ud(0); - tex_srcs[TEX_LOGICAL_SRC_RESIDENCY] = brw_imm_ud(0); bld.ADD(offset(dst0, bld, 2), src0, src1); - bld.emit(SHADER_OPCODE_TEX_LOGICAL, dst0, tex_srcs, TEX_LOGICAL_NUM_SRCS) - ->size_written = 8 * REG_SIZE; + + brw_tex_inst *tex = + bld.emit(SHADER_OPCODE_TEX_LOGICAL, dst0, tex_srcs, TEX_LOGICAL_NUM_SRCS)->as_tex(); + tex->size_written = 8 * REG_SIZE; + tex->coord_components = 2; + bld.MOV(dst1, offset(dst0, bld, 2))->saturate = true; EXPECT_NO_PROGRESS(brw_opt_saturate_propagation, bld);