mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 09:10:11 +01:00
We had twice surface/sampler sources for no good reason, just add a boolean to tell whether they are bindless or not. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37527>
506 lines
15 KiB
C++
506 lines
15 KiB
C++
/* -*- c++ -*- */
|
|
/*
|
|
* Copyright © 2010-2016 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <assert.h>
|
|
#include "brw_reg.h"
|
|
#include "brw_list.h"
|
|
|
|
#define MAX_SAMPLER_MESSAGE_SIZE 11
|
|
|
|
/* The sampler can return a vec5 when sampling with sparse residency. In
|
|
* SIMD32, each component takes up 4 GRFs, so we need to allow up to size-20
|
|
* VGRFs to hold the result.
|
|
*/
|
|
#define MAX_VGRF_SIZE(devinfo) ((devinfo)->ver >= 20 ? 40 : 20)
|
|
|
|
struct bblock_t;
|
|
struct brw_shader;
|
|
|
|
enum ENUM_PACKED brw_inst_kind {
|
|
BRW_KIND_BASE,
|
|
BRW_KIND_SEND,
|
|
BRW_KIND_LOGICAL,
|
|
BRW_KIND_TEX,
|
|
BRW_KIND_MEM,
|
|
BRW_KIND_DPAS,
|
|
BRW_KIND_LOAD_PAYLOAD,
|
|
BRW_KIND_URB,
|
|
BRW_KIND_FB_WRITE,
|
|
};
|
|
|
|
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
|
|
|
|
struct brw_inst : brw_exec_node {
|
|
brw_inst() = delete;
|
|
brw_inst(const brw_inst&) = delete;
|
|
|
|
/* Enable usage of placement new. */
|
|
static void* operator new(size_t size, void *ptr) { return ptr; }
|
|
static void operator delete(void *p) {}
|
|
|
|
/* Prefer macro here instead of templates to get nicer
|
|
* helper names.
|
|
*/
|
|
#define KIND_HELPERS(HELPER_NAME, TYPE_NAME, ENUM_NAME) \
|
|
struct TYPE_NAME *HELPER_NAME() { \
|
|
return kind == ENUM_NAME ? (struct TYPE_NAME *)this \
|
|
: nullptr; \
|
|
} \
|
|
const struct TYPE_NAME *HELPER_NAME() const { \
|
|
return kind == ENUM_NAME ? (const struct TYPE_NAME *)this \
|
|
: nullptr; \
|
|
}
|
|
|
|
KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
|
|
KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX);
|
|
KIND_HELPERS(as_mem, brw_mem_inst, BRW_KIND_MEM);
|
|
KIND_HELPERS(as_dpas, brw_dpas_inst, BRW_KIND_DPAS);
|
|
KIND_HELPERS(as_load_payload, brw_load_payload_inst, BRW_KIND_LOAD_PAYLOAD);
|
|
KIND_HELPERS(as_urb, brw_urb_inst, BRW_KIND_URB);
|
|
KIND_HELPERS(as_fb_write, brw_fb_write_inst, BRW_KIND_FB_WRITE);
|
|
|
|
#undef KIND_HELPERS
|
|
|
|
bool is_send() const;
|
|
bool is_payload(unsigned arg) const;
|
|
bool is_partial_write(unsigned grf_size = REG_SIZE) const;
|
|
unsigned components_read(unsigned i) const;
|
|
unsigned size_read(const struct intel_device_info *devinfo, int arg) const;
|
|
bool can_do_source_mods(const struct intel_device_info *devinfo) const;
|
|
bool can_do_cmod() const;
|
|
bool can_change_types() const;
|
|
bool has_source_and_destination_hazard() const;
|
|
|
|
bool is_3src(const struct brw_compiler *compiler) const;
|
|
bool is_math() const;
|
|
bool is_control_flow_begin() const;
|
|
bool is_control_flow_end() const;
|
|
bool is_control_flow() const;
|
|
bool is_commutative() const;
|
|
bool is_raw_move() const;
|
|
bool can_do_saturate() const;
|
|
bool reads_accumulator_implicitly() const;
|
|
bool writes_accumulator_implicitly(const struct intel_device_info *devinfo) const;
|
|
|
|
/**
|
|
* Instructions that use indirect addressing have additional register
|
|
* regioning restrictions.
|
|
*/
|
|
bool uses_indirect_addressing() const;
|
|
|
|
void remove();
|
|
|
|
/**
|
|
* True if the instruction has side effects other than writing to
|
|
* its destination registers. You are expected not to reorder or
|
|
* optimize these out unless you know what you are doing.
|
|
*/
|
|
bool has_side_effects() const;
|
|
|
|
/**
|
|
* True if the instruction might be affected by side effects of other
|
|
* instructions.
|
|
*/
|
|
bool is_volatile() const;
|
|
|
|
/**
|
|
* Return whether \p arg is a control source of a virtual instruction which
|
|
* shouldn't contribute to the execution type and usual regioning
|
|
* restriction calculations of arithmetic instructions.
|
|
*/
|
|
bool is_control_source(unsigned arg) const;
|
|
|
|
/**
|
|
* Return the subset of flag registers read by the instruction as a bitset
|
|
* with byte granularity.
|
|
*/
|
|
unsigned flags_read(const intel_device_info *devinfo) const;
|
|
|
|
/**
|
|
* Return the subset of flag registers updated by the instruction (either
|
|
* partially or fully) as a bitset with byte granularity.
|
|
*/
|
|
unsigned flags_written(const intel_device_info *devinfo) const;
|
|
|
|
/**
|
|
* Return true if this instruction is a sampler message gathering residency
|
|
* data.
|
|
*/
|
|
bool has_sampler_residency() const;
|
|
|
|
/**
|
|
* Return true if this instruction is using the address register
|
|
* implicitly.
|
|
*/
|
|
bool uses_address_register_implicitly() const;
|
|
|
|
enum opcode opcode;
|
|
brw_inst_kind kind;
|
|
|
|
/**
|
|
* Execution size of the instruction. This is used by the generator to
|
|
* generate the correct binary for the given instruction. Current valid
|
|
* values are 1, 4, 8, 16, 32.
|
|
*/
|
|
uint8_t exec_size;
|
|
|
|
/**
|
|
* Channel group from the hardware execution and predication mask that
|
|
* should be applied to the instruction. The subset of channel enable
|
|
* signals (calculated from the EU control flow and predication state)
|
|
* given by [group, group + exec_size) will be used to mask GRF writes and
|
|
* any other side effects of the instruction.
|
|
*/
|
|
uint8_t group;
|
|
|
|
uint8_t sources; /**< Number of brw_reg sources. */
|
|
|
|
enum brw_predicate predicate;
|
|
enum brw_conditional_mod conditional_mod; /**< BRW_CONDITIONAL_* */
|
|
|
|
uint16_t size_written; /**< Data written to the destination register in bytes. */
|
|
|
|
union {
|
|
struct {
|
|
/* Chooses which flag subregister (f0.0 to f3.1) is used for
|
|
* conditional mod and predication.
|
|
*/
|
|
uint8_t flag_subreg:3;
|
|
|
|
bool predicate_inverse:1;
|
|
bool writes_accumulator:1; /**< instruction implicitly writes accumulator */
|
|
bool force_writemask_all:1;
|
|
bool saturate:1;
|
|
|
|
/**
|
|
* The predication mask applied to this instruction is guaranteed to
|
|
* be uniform and a superset of the execution mask of the present block.
|
|
* No currently enabled channel will be disabled by the predicate.
|
|
*/
|
|
bool predicate_trivial:1;
|
|
bool eot:1;
|
|
bool keep_payload_trailing_zeros:1;
|
|
/**
|
|
* Whether the parameters of the SEND instructions are build with
|
|
* NoMask (for A32 messages this covers only the surface handle, for
|
|
* A64 messages this covers the load address).
|
|
*
|
|
* Also used to signal a dummy render target SEND message that is
|
|
* never executed.
|
|
*/
|
|
bool has_no_mask_send_params:1;
|
|
|
|
uint8_t pad:5;
|
|
};
|
|
uint16_t bits;
|
|
};
|
|
|
|
tgl_swsb sched; /**< Scheduling info. */
|
|
|
|
bblock_t *block;
|
|
|
|
brw_reg dst;
|
|
brw_reg *src;
|
|
|
|
#ifndef NDEBUG
|
|
/** @{
|
|
* Annotation for the generated IR.
|
|
*/
|
|
const char *annotation;
|
|
/** @} */
|
|
#endif
|
|
};
|
|
|
|
struct brw_send_inst : brw_inst {
|
|
uint32_t desc;
|
|
uint32_t ex_desc;
|
|
uint32_t offset;
|
|
|
|
uint8_t mlen;
|
|
uint8_t ex_mlen;
|
|
uint8_t sfid;
|
|
|
|
/** The number of hardware registers used for a message header. */
|
|
uint8_t header_size;
|
|
|
|
union {
|
|
struct {
|
|
/**
|
|
* Turns it into a SENDC.
|
|
*/
|
|
bool check_tdr:1;
|
|
|
|
bool has_side_effects:1;
|
|
bool is_volatile:1;
|
|
|
|
/**
|
|
* Use extended bindless surface offset (26bits instead of 20bits)
|
|
*/
|
|
bool ex_bso:1;
|
|
|
|
/**
|
|
* Only for SHADER_OPCODE_SEND, @offset field contains an immediate
|
|
* part of the extended descriptor that must be encoded in the
|
|
* instruction.
|
|
*/
|
|
bool ex_desc_imm:1;
|
|
|
|
uint8_t pad:3;
|
|
};
|
|
uint8_t send_bits;
|
|
};
|
|
};
|
|
|
|
struct brw_tex_inst : brw_inst {
|
|
enum sampler_opcode sampler_opcode;
|
|
uint32_t offset;
|
|
uint8_t coord_components;
|
|
uint8_t grad_components;
|
|
bool residency:1;
|
|
bool surface_bindless:1;
|
|
bool sampler_bindless:1;
|
|
};
|
|
|
|
struct brw_mem_inst : brw_inst {
|
|
enum lsc_opcode lsc_op;
|
|
enum memory_logical_mode mode;
|
|
enum lsc_addr_surface_type binding_type;
|
|
enum lsc_data_size data_size;
|
|
|
|
uint8_t coord_components;
|
|
uint8_t components;
|
|
uint8_t flags;
|
|
|
|
/** Required alignment of address in bytes; 0 for natural alignment */
|
|
uint32_t alignment;
|
|
|
|
int32_t address_offset;
|
|
};
|
|
|
|
struct brw_dpas_inst : brw_inst {
|
|
/** Systolic depth. */
|
|
uint8_t sdepth;
|
|
|
|
/** Repeat count. */
|
|
uint8_t rcount;
|
|
};
|
|
|
|
struct brw_load_payload_inst : brw_inst {
|
|
/** The number of hardware registers used for a message header. */
|
|
uint8_t header_size;
|
|
};
|
|
|
|
struct brw_urb_inst : brw_inst {
|
|
uint32_t offset;
|
|
uint8_t components;
|
|
};
|
|
|
|
struct brw_fb_write_inst : brw_inst {
|
|
uint8_t components;
|
|
uint8_t target;
|
|
bool null_rt;
|
|
bool last_rt;
|
|
};
|
|
|
|
/**
|
|
* Make the execution of \p inst dependent on the evaluation of a possibly
|
|
* inverted predicate.
|
|
*/
|
|
static inline brw_inst *
|
|
set_predicate_inv(enum brw_predicate pred, bool inverse,
|
|
brw_inst *inst)
|
|
{
|
|
inst->predicate = pred;
|
|
inst->predicate_inverse = inverse;
|
|
return inst;
|
|
}
|
|
|
|
/**
|
|
* Make the execution of \p inst dependent on the evaluation of a predicate.
|
|
*/
|
|
static inline brw_inst *
|
|
set_predicate(enum brw_predicate pred, brw_inst *inst)
|
|
{
|
|
return set_predicate_inv(pred, false, inst);
|
|
}
|
|
|
|
/**
|
|
* Write the result of evaluating the condition given by \p mod to a flag
|
|
* register.
|
|
*/
|
|
static inline brw_inst *
|
|
set_condmod(enum brw_conditional_mod mod, brw_inst *inst)
|
|
{
|
|
inst->conditional_mod = mod;
|
|
return inst;
|
|
}
|
|
|
|
/**
|
|
* Clamp the result of \p inst to the saturation range of its destination
|
|
* datatype.
|
|
*/
|
|
static inline brw_inst *
|
|
set_saturate(bool saturate, brw_inst *inst)
|
|
{
|
|
inst->saturate = saturate;
|
|
return inst;
|
|
}
|
|
|
|
/**
|
|
* Return the number of dataflow registers written by the instruction (either
|
|
* fully or partially) counted from 'floor(reg_offset(inst->dst) /
|
|
* register_size)'. The somewhat arbitrary register size unit is 4B for the
|
|
* UNIFORM and IMM files and 32B for all other files.
|
|
*/
|
|
inline unsigned
|
|
regs_written(const brw_inst *inst)
|
|
{
|
|
assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
|
|
return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
|
|
inst->size_written -
|
|
MIN2(inst->size_written, reg_padding(inst->dst)),
|
|
REG_SIZE);
|
|
}
|
|
|
|
/**
|
|
* Return the number of dataflow registers read by the instruction (either
|
|
* fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
|
|
* register_size)'. The somewhat arbitrary register size unit is 4B for the
|
|
* UNIFORM files and 32B for all other files.
|
|
*/
|
|
inline unsigned
|
|
regs_read(const struct intel_device_info *devinfo, const brw_inst *inst, unsigned i)
|
|
{
|
|
if (inst->src[i].file == IMM)
|
|
return 1;
|
|
|
|
const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE;
|
|
return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
|
|
inst->size_read(devinfo, i) -
|
|
MIN2(inst->size_read(devinfo, i), reg_padding(inst->src[i])),
|
|
reg_size);
|
|
}
|
|
|
|
enum brw_reg_type get_exec_type(const brw_inst *inst);
|
|
|
|
static inline unsigned
|
|
get_exec_type_size(const brw_inst *inst)
|
|
{
|
|
return brw_type_size_bytes(get_exec_type(inst));
|
|
}
|
|
|
|
/**
|
|
* Return whether the instruction isn't an ALU instruction and cannot be
|
|
* assumed to complete in-order.
|
|
*/
|
|
static inline bool
|
|
is_unordered(const intel_device_info *devinfo, const brw_inst *inst)
|
|
{
|
|
return inst->is_send() || (devinfo->ver < 20 && inst->is_math()) ||
|
|
inst->opcode == BRW_OPCODE_DPAS ||
|
|
(devinfo->has_64bit_float_via_math_pipe &&
|
|
(get_exec_type(inst) == BRW_TYPE_DF ||
|
|
inst->dst.type == BRW_TYPE_DF));
|
|
}
|
|
|
|
static inline bool
|
|
has_bfloat_operands(const brw_inst *inst)
|
|
{
|
|
if (brw_type_is_bfloat(inst->dst.type))
|
|
return true;
|
|
|
|
for (int i = 0; i < inst->sources; i++) {
|
|
if (brw_type_is_bfloat(inst->src[i].type))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool has_dst_aligned_region_restriction(const intel_device_info *devinfo,
|
|
const brw_inst *inst,
|
|
brw_reg_type dst_type);
|
|
|
|
static inline bool
|
|
has_dst_aligned_region_restriction(const intel_device_info *devinfo,
|
|
const brw_inst *inst)
|
|
{
|
|
return has_dst_aligned_region_restriction(devinfo, inst, inst->dst.type);
|
|
}
|
|
|
|
bool has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
|
const brw_inst *inst,
|
|
const brw_reg *srcs, unsigned num_srcs);
|
|
|
|
static inline bool
|
|
has_subdword_integer_region_restriction(const intel_device_info *devinfo,
|
|
const brw_inst *inst)
|
|
{
|
|
return has_subdword_integer_region_restriction(devinfo, inst,
|
|
inst->src, inst->sources);
|
|
}
|
|
|
|
bool is_identity_payload(const struct intel_device_info *devinfo,
|
|
brw_reg_file file, const brw_inst *inst);
|
|
|
|
bool is_multi_copy_payload(const struct intel_device_info *devinfo,
|
|
const brw_inst *inst);
|
|
|
|
bool is_coalescing_payload(const struct brw_shader &s, const brw_inst *inst);
|
|
|
|
bool has_bank_conflict(const struct brw_isa_info *isa, const brw_inst *inst);
|
|
|
|
/* Return the subset of flag registers that an instruction could
|
|
* potentially read or write based on the execution controls and flag
|
|
* subregister number of the instruction.
|
|
*/
|
|
static inline unsigned
|
|
brw_flag_mask(const brw_inst *inst, unsigned width)
|
|
{
|
|
assert(util_is_power_of_two_nonzero(width));
|
|
const unsigned start = (inst->flag_subreg * 16 + inst->group) &
|
|
~(width - 1);
|
|
const unsigned end = start + ALIGN(inst->exec_size, width);
|
|
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
|
|
}
|
|
|
|
static inline unsigned
|
|
brw_bit_mask(unsigned n)
|
|
{
|
|
return (n >= CHAR_BIT * sizeof(brw_bit_mask(n)) ? ~0u : (1u << n) - 1);
|
|
}
|
|
|
|
static inline unsigned
|
|
brw_flag_mask(const brw_reg &r, unsigned sz)
|
|
{
|
|
if (r.file == ARF) {
|
|
const unsigned start = (r.nr - BRW_ARF_FLAG) * 4 + r.subnr;
|
|
const unsigned end = start + sz;
|
|
return brw_bit_mask(end) & ~brw_bit_mask(start);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|