intel/brw: Remove Gfx8- remaining opcodes

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
Caio Oliveira 2024-02-19 19:41:48 -08:00 committed by Marge Bot
parent 3ef1ed73d3
commit 5c93a0e125
14 changed files with 63 additions and 727 deletions

View file

@ -588,14 +588,14 @@ brw_disassemble(const struct brw_isa_info *isa,
}
static const struct opcode_desc opcode_descs[] = {
/* IR, HW, name, nsrc, ndst, gfx_vers */
/* IR, HW, name, nsrc, ndst, gfx_vers assuming Gfx9+ */
{ BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
{ BRW_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
{ BRW_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_GE(GFX45) & GFX_LT(GFX12) },
{ BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },
@ -609,8 +609,7 @@ static const struct opcode_desc opcode_descs[] = {
{ BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_DIM, 10, "dim", 1, 1, GFX75 },
{ BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_GE(GFX8) & GFX_LT(GFX12) },
{ BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_LT(GFX12) },
{ BRW_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },
{ BRW_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },
@ -622,46 +621,39 @@ static const struct opcode_desc opcode_descs[] = {
{ BRW_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_GE(GFX8) & GFX_LT(GFX12) },
{ BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
{ BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
{ BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
{ BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
{ BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },
{ BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_GE(GFX7) },
{ BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_ALL },
{ BRW_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },
{ BRW_OPCODE_IFF, 35, "iff", 0, 0, GFX_LE(GFX5) },
{ BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_GE(GFX7) },
{ BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_ALL },
{ BRW_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },
{ BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },
{ BRW_OPCODE_DO, 38, "do", 0, 0, GFX_LE(GFX5) },
{ BRW_OPCODE_CASE, 38, "case", 0, 0, GFX6 },
{ BRW_OPCODE_DO, 38, "do", 0, 0, 0 }, /* Pseudo opcode. */
{ BRW_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },
{ BRW_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },
{ BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },
{ BRW_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },
{ BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_GE(GFX75) },
{ BRW_OPCODE_MSAVE, 44, "msave", 0, 0, GFX_LE(GFX5) },
{ BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_GE(GFX6) },
{ BRW_OPCODE_MREST, 45, "mrest", 0, 0, GFX_LE(GFX5) },
{ BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_GE(GFX6) },
{ BRW_OPCODE_PUSH, 46, "push", 0, 0, GFX_LE(GFX5) },
{ BRW_OPCODE_FORK, 46, "fork", 0, 0, GFX6 },
{ BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_GE(GFX8) },
{ BRW_OPCODE_POP, 47, "pop", 2, 0, GFX_LE(GFX5) },
{ BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_ALL },
{ BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_ALL },
{ BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_ALL },
{ BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_ALL },
{ BRW_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },
{ BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },
{ BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_GE(GFX6) },
{ BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_LT(GFX12) },
{ BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_ALL },
{ BRW_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },
{ BRW_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },
{ BRW_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },
@ -673,11 +665,11 @@ static const struct opcode_desc opcode_descs[] = {
{ BRW_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },
{ BRW_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },
{ BRW_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },
{ BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_GE(GFX7) },
{ BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_GE(GFX7) },
{ BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_GE(GFX7) },
{ BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_GE(GFX7) },
{ BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_GE(GFX7) },
{ BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_ALL },
{ BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_ALL },
{ BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_ALL },
{ BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_ALL },
{ BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_ALL },
{ BRW_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL },
{ BRW_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL },
{ BRW_OPCODE_ADD3, 82, "add3", 3, 1, GFX_GE(GFX125) },
@ -686,13 +678,12 @@ static const struct opcode_desc opcode_descs[] = {
{ BRW_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },
{ BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
{ BRW_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) },
{ BRW_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) },
{ BRW_OPCODE_LINE, 89, "line", 2, 1, GFX9 },
{ BRW_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) },
{ BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) },
{ BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) },
{ BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) },
{ BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_GE(GFX8) },
{ BRW_OPCODE_NENOP, 125, "nenop", 0, 0, GFX45 },
{ BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX9 },
{ BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_ALL },
{ BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX9 },
{ BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_ALL },
{ BRW_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },
{ BRW_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }
};

View file

@ -1518,17 +1518,6 @@ void brw_oword_block_read(struct brw_codegen *p,
unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
void brw_oword_block_read_scratch(struct brw_codegen *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset);
void brw_oword_block_write_scratch(struct brw_codegen *p,
struct brw_reg mrf,
int num_regs,
unsigned offset);
void gfx7_block_read_scratch(struct brw_codegen *p,
struct brw_reg dest,
int num_regs,

View file

@ -2407,7 +2407,6 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
break;
case BRW_OPCODE_IF:
case BRW_OPCODE_IFF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:

View file

@ -180,53 +180,45 @@ enum opcode {
BRW_OPCODE_SYNC,
BRW_OPCODE_MOV,
BRW_OPCODE_SEL,
BRW_OPCODE_MOVI, /**< G45+ */
BRW_OPCODE_MOVI,
BRW_OPCODE_NOT,
BRW_OPCODE_AND,
BRW_OPCODE_OR,
BRW_OPCODE_XOR,
BRW_OPCODE_SHR,
BRW_OPCODE_SHL,
BRW_OPCODE_DIM, /**< Gfx7.5 only */
BRW_OPCODE_SMOV, /**< Gfx8+ */
BRW_OPCODE_SMOV,
BRW_OPCODE_ASR,
BRW_OPCODE_ROR, /**< Gfx11+ */
BRW_OPCODE_ROL, /**< Gfx11+ */
BRW_OPCODE_CMP,
BRW_OPCODE_CMPN,
BRW_OPCODE_CSEL, /**< Gfx8+ */
BRW_OPCODE_BFREV, /**< Gfx7+ */
BRW_OPCODE_BFE, /**< Gfx7+ */
BRW_OPCODE_BFI1, /**< Gfx7+ */
BRW_OPCODE_BFI2, /**< Gfx7+ */
BRW_OPCODE_CSEL,
BRW_OPCODE_BFREV,
BRW_OPCODE_BFE,
BRW_OPCODE_BFI1,
BRW_OPCODE_BFI2,
BRW_OPCODE_JMPI,
BRW_OPCODE_BRD, /**< Gfx7+ */
BRW_OPCODE_BRD,
BRW_OPCODE_IF,
BRW_OPCODE_IFF, /**< Pre-Gfx6 */
BRW_OPCODE_BRC, /**< Gfx7+ */
BRW_OPCODE_BRC,
BRW_OPCODE_ELSE,
BRW_OPCODE_ENDIF,
BRW_OPCODE_DO, /**< Pre-Gfx6 */
BRW_OPCODE_CASE, /**< Gfx6 only */
BRW_OPCODE_DO, /**< Used as pseudo opcode, will be moved later. */
BRW_OPCODE_WHILE,
BRW_OPCODE_BREAK,
BRW_OPCODE_CONTINUE,
BRW_OPCODE_HALT,
BRW_OPCODE_CALLA, /**< Gfx7.5+ */
BRW_OPCODE_MSAVE, /**< Pre-Gfx6 */
BRW_OPCODE_CALL, /**< Gfx6+ */
BRW_OPCODE_MREST, /**< Pre-Gfx6 */
BRW_OPCODE_RET, /**< Gfx6+ */
BRW_OPCODE_PUSH, /**< Pre-Gfx6 */
BRW_OPCODE_FORK, /**< Gfx6 only */
BRW_OPCODE_GOTO, /**< Gfx8+ */
BRW_OPCODE_POP, /**< Pre-Gfx6 */
BRW_OPCODE_CALLA,
BRW_OPCODE_CALL,
BRW_OPCODE_RET,
BRW_OPCODE_GOTO,
BRW_OPCODE_WAIT,
BRW_OPCODE_SEND,
BRW_OPCODE_SENDC,
BRW_OPCODE_SENDS, /**< Gfx9+ */
BRW_OPCODE_SENDSC, /**< Gfx9+ */
BRW_OPCODE_MATH, /**< Gfx6+ */
BRW_OPCODE_SENDS,
BRW_OPCODE_SENDSC,
BRW_OPCODE_MATH,
BRW_OPCODE_ADD,
BRW_OPCODE_MUL,
BRW_OPCODE_AVG,
@ -238,11 +230,11 @@ enum opcode {
BRW_OPCODE_MAC,
BRW_OPCODE_MACH,
BRW_OPCODE_LZD,
BRW_OPCODE_FBH, /**< Gfx7+ */
BRW_OPCODE_FBL, /**< Gfx7+ */
BRW_OPCODE_CBIT, /**< Gfx7+ */
BRW_OPCODE_ADDC, /**< Gfx7+ */
BRW_OPCODE_SUBB, /**< Gfx7+ */
BRW_OPCODE_FBH,
BRW_OPCODE_FBL,
BRW_OPCODE_CBIT,
BRW_OPCODE_ADDC,
BRW_OPCODE_SUBB,
BRW_OPCODE_SAD2,
BRW_OPCODE_SADA2,
BRW_OPCODE_ADD3, /* Gen12+ only */
@ -253,29 +245,19 @@ enum opcode {
BRW_OPCODE_DP4A, /**< Gfx12+ */
BRW_OPCODE_LINE,
BRW_OPCODE_DPAS, /**< Gfx12.5+ */
BRW_OPCODE_PLN, /**< G45+ */
BRW_OPCODE_MAD, /**< Gfx6+ */
BRW_OPCODE_LRP, /**< Gfx6+ */
BRW_OPCODE_MADM, /**< Gfx8+ */
BRW_OPCODE_NENOP, /**< G45 only */
BRW_OPCODE_PLN, /**< Up until Gfx9 */
BRW_OPCODE_MAD,
BRW_OPCODE_LRP,
BRW_OPCODE_MADM,
BRW_OPCODE_NOP,
NUM_BRW_OPCODES,
/* These are compiler backend opcodes that get translated into other
* instructions.
*/
FS_OPCODE_FB_WRITE = NUM_BRW_OPCODES,
/**
* Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
* individual sources instead of as a single payload blob. The
* position/ordering of the arguments are defined by the enum
* fb_write_logical_srcs.
* The position/ordering of the arguments are defined
* by the enum fb_write_logical_srcs.
*/
FS_OPCODE_FB_WRITE_LOGICAL,
FS_OPCODE_REP_FB_WRITE,
FS_OPCODE_FB_WRITE_LOGICAL = NUM_BRW_OPCODES,
FS_OPCODE_FB_READ,
FS_OPCODE_FB_READ_LOGICAL,
@ -459,10 +441,6 @@ enum opcode {
*/
FS_OPCODE_SCHEDULING_FENCE,
SHADER_OPCODE_GFX4_SCRATCH_READ,
SHADER_OPCODE_GFX4_SCRATCH_WRITE,
SHADER_OPCODE_GFX7_SCRATCH_READ,
SHADER_OPCODE_SCRATCH_HEADER,
/**
@ -559,155 +537,12 @@ enum opcode {
FS_OPCODE_PIXEL_X,
FS_OPCODE_PIXEL_Y,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GFX7,
VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
/**
* Terminate the geometry shader thread by doing an empty URB write.
*
* This opcode doesn't do an implied move from R0 to the first MRF. This
* allows the geometry shader to override the "GS Number of Output Vertices
* for Slot {0,1}" fields in the message header.
*/
GS_OPCODE_THREAD_END,
/**
* Set the "Slot {0,1} Offset" fields of a URB_WRITE message header.
*
* - dst is the MRF containing the message header.
*
* - src0.x indicates which portion of the URB should be written to (e.g. a
* vertex number)
*
* - src1 is an immediate multiplier which will be applied to src0
* (e.g. the size of a single vertex in the URB).
*
* Note: the hardware will apply this offset *in addition to* the offset in
* vec4_instruction::offset.
*/
GS_OPCODE_SET_WRITE_OFFSET,
/**
* Set the "GS Number of Output Vertices for Slot {0,1}" fields of a
* URB_WRITE message header.
*
* - dst is the MRF containing the message header.
*
* - src0.x is the vertex count. The upper 16 bits will be ignored.
*/
GS_OPCODE_SET_VERTEX_COUNT,
/**
* Set DWORD 2 of dst to the value in src.
*/
GS_OPCODE_SET_DWORD_2,
/**
* Prepare the dst register for storage in the "Channel Mask" fields of a
* URB_WRITE message header.
*
* DWORD 4 of dst is shifted left by 4 bits, so that later,
* GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
* final channel mask.
*
* Note: since GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to
* form the final channel mask, DWORDs 0 and 4 of the dst register must not
* have any extraneous bits set prior to execution of this opcode (that is,
* they should be in the range 0x0 to 0xf).
*/
GS_OPCODE_PREPARE_CHANNEL_MASKS,
/**
* Set the "Channel Mask" fields of a URB_WRITE message header.
*
* - dst is the MRF containing the message header.
*
* - src.x is the channel mask, as prepared by
* GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to
* form the final channel mask.
*/
GS_OPCODE_SET_CHANNEL_MASKS,
/**
* Get the "Instance ID" fields from the payload.
*
* - dst is the GRF for gl_InvocationID.
*/
GS_OPCODE_GET_INSTANCE_ID,
/**
* Send a FF_SYNC message to allocate initial URB handles (gfx6).
*
* - dst will be used as the writeback register for the FF_SYNC operation.
*
* - src0 is the number of primitives written.
*
* - src1 is the value to hold in M0.0: number of SO vertices to write
* and number of SO primitives needed. Its value will be overwritten
* with the SVBI values if transform feedback is enabled.
*
* Note: This opcode uses an implicit MRF register for the ff_sync message
* header, so the caller is expected to set inst->base_mrf and initialize
* that MRF register to r0. This opcode will also write to this MRF register
* to include the allocated URB handle so it can then be reused directly as
* the header in the URB write operation we are allocating the handle for.
*/
GS_OPCODE_FF_SYNC,
/**
* Move r0.1 (which holds PrimitiveID information in gfx6) to a separate
* register.
*
* - dst is the GRF where PrimitiveID information will be moved.
*/
GS_OPCODE_SET_PRIMITIVE_ID,
/**
* Write transform feedback data to the SVB by sending a SVB WRITE message.
* Used in gfx6.
*
* - dst is the MRF register containing the message header.
*
* - src0 is the register where the vertex data is going to be copied from.
*
* - src1 is the destination register when write commit occurs.
*/
GS_OPCODE_SVB_WRITE,
/**
* Set destination index in the SVB write message payload (M0.5). Used
* in gfx6 for transform feedback.
*
* - dst is the header to save the destination indices for SVB WRITE.
* - src is the register that holds the destination indices value.
*/
GS_OPCODE_SVB_SET_DST_INDEX,
/**
* Prepare Mx.0 subregister for being used in the FF_SYNC message header.
* Used in gfx6 for transform feedback.
*
* - dst will hold the register with the final Mx.0 value.
*
* - src0 has the number of vertices emitted in SO (NumSOVertsToWrite)
*
* - src1 has the number of needed primitives for SO (NumSOPrimsNeeded)
*
* - src2 is the value to hold in M0: number of SO vertices to write
* and number of SO primitives needed.
*/
GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
/**
* Terminate the compute shader.
*/
@ -742,17 +577,6 @@ enum opcode {
/** Fills out a relocatable immediate */
SHADER_OPCODE_MOV_RELOC_IMM,
TCS_OPCODE_GET_INSTANCE_ID,
TCS_OPCODE_GET_PRIMITIVE_ID,
TCS_OPCODE_CREATE_BARRIER_HEADER,
TCS_OPCODE_SRC0_010_IS_ZERO,
TCS_OPCODE_RELEASE_INPUT,
TCS_OPCODE_THREAD_END,
TES_OPCODE_GET_PRIMITIVE_ID,
TES_OPCODE_CREATE_INPUT_READ_HEADER,
TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
SHADER_OPCODE_BTD_SPAWN_LOGICAL,
SHADER_OPCODE_BTD_RETIRE_LOGICAL,

View file

@ -224,8 +224,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
if (reg.file == BRW_IMMEDIATE_VALUE) {
if (reg.type == BRW_REGISTER_TYPE_DF ||
brw_inst_opcode(p->isa, inst) == BRW_OPCODE_DIM)
if (reg.type == BRW_REGISTER_TYPE_DF)
brw_inst_set_imm_df(devinfo, inst, reg.df);
else if (reg.type == BRW_REGISTER_TYPE_UQ ||
reg.type == BRW_REGISTER_TYPE_Q)
@ -963,7 +962,6 @@ ALU2(OR)
ALU2(XOR)
ALU2(SHR)
ALU2(SHL)
ALU1(DIM)
ALU2(ASR)
ALU2(ROL)
ALU2(ROR)
@ -1520,165 +1518,6 @@ brw_scratch_surface_idx(const struct brw_codegen *p)
return GFX8_BTI_STATELESS_NON_COHERENT;
}
/**
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
* using a constant offset per channel.
*
* The offset must be aligned to oword size (16 bytes). Used for
* register spilling.
*/
void brw_oword_block_write_scratch(struct brw_codegen *p,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
const struct intel_device_info *devinfo = p->devinfo;
const unsigned target_cache = GFX7_SFID_DATAPORT_DATA_CACHE;
const struct tgl_swsb swsb = brw_get_default_swsb(p);
uint32_t msg_type;
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
const unsigned mlen = 1 + num_regs;
/* Set up the message header. This is g0, with g0.2 filled with
* the offset. We don't want to leave our offset around in g0 or
* it'll screw up texture samples, so set it up inside the message
* reg.
*/
{
brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p,
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
mrf.nr,
2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(offset));
brw_pop_insn_state(p);
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
{
struct brw_reg dest;
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
int send_commit_msg;
struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UW);
brw_inst_set_sfid(devinfo, insn, target_cache);
if (brw_inst_exec_size(devinfo, insn) >= 16)
src_header = vec16(src_header);
assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
/* Until gfx6, writes followed by reads from the same location
* are not guaranteed to be ordered unless write_commit is set.
* If set, then a no-op write is issued to the destination
* register to set a dependency, and a read from the destination
* can be used to ensure the ordering.
*
* For gfx6, only writes between different threads need ordering
* protection. Our use of DP writes is all about register
* spilling within a thread.
*/
dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
send_commit_msg = 0;
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, mrf);
msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
brw_set_desc(p, insn,
brw_message_desc(devinfo, mlen, send_commit_msg, true) |
brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p),
BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
msg_type, send_commit_msg));
}
}
/**
* Read a block of owords (half a GRF each) from the scratch buffer
* using a constant index per channel.
*
* Offset must be aligned to oword size (16 bytes). Used for register
* spilling.
*/
void
brw_oword_block_read_scratch(struct brw_codegen *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
const struct intel_device_info *devinfo = p->devinfo;
const struct tgl_swsb swsb = brw_get_default_swsb(p);
offset /= 16;
/* On gen 7 and above, we no longer have message registers and we can
* send from any register we want. By using the destination register
* for the message, we guarantee that the implied message write won't
* accidentally overwrite anything. This has been a problem because
* the MRF registers and source for the final FB write are both fixed
* and may overlap.
*/
mrf = retype(dest, BRW_REGISTER_TYPE_UD);
dest = retype(dest, BRW_REGISTER_TYPE_UW);
const unsigned rlen = num_regs;
const unsigned target_cache = GFX7_SFID_DATAPORT_DATA_CACHE;
{
brw_push_insn_state(p);
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_set_default_swsb(p, tgl_swsb_null());
brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
brw_pop_insn_state(p);
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
}
{
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
brw_inst_set_sfid(devinfo, insn, target_cache);
assert(brw_inst_pred_control(devinfo, insn) == 0);
brw_set_dest(p, insn, dest); /* UW? */
brw_set_src0(p, insn, mrf);
brw_set_desc(p, insn,
brw_message_desc(devinfo, 1, rlen, true) |
brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p),
BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
}
}
void
gfx7_block_read_scratch(struct brw_codegen *p,
struct brw_reg dest,

View file

@ -216,7 +216,6 @@ fs_inst::is_send_from_grf() const
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == VGRF;
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_READ:
return src[0].file == VGRF;
default:
@ -229,7 +228,6 @@ fs_inst::is_control_source(unsigned arg) const
{
switch (opcode) {
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
return arg == 0;
case SHADER_OPCODE_BROADCAST:
@ -277,7 +275,6 @@ bool
fs_inst::is_payload(unsigned arg) const
{
switch (opcode) {
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_READ:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
@ -864,16 +861,6 @@ fs_inst::size_read(int arg) const
}
break;
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_REP_FB_WRITE:
if (arg == 0) {
if (base_mrf >= 0)
return src[0].file == BAD_FILE ? 0 : 2 * REG_SIZE;
else
return mlen * REG_SIZE;
}
break;
case FS_OPCODE_FB_READ:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
@ -881,11 +868,6 @@ fs_inst::size_read(int arg) const
return mlen * REG_SIZE;
break;
case FS_OPCODE_SET_SAMPLE_ID:
if (arg == 1)
return 1;
break;
case FS_OPCODE_LINTERP:
if (arg == 1)
return 16;
@ -1022,11 +1004,10 @@ fs_inst::flags_read(const intel_device_info *devinfo) const
unsigned
fs_inst::flags_written(const intel_device_info *devinfo) const
{
if ((conditional_mod && (opcode != BRW_OPCODE_SEL &&
opcode != BRW_OPCODE_CSEL &&
opcode != BRW_OPCODE_IF &&
opcode != BRW_OPCODE_WHILE)) ||
opcode == FS_OPCODE_FB_WRITE) {
if (conditional_mod && (opcode != BRW_OPCODE_SEL &&
opcode != BRW_OPCODE_CSEL &&
opcode != BRW_OPCODE_IF &&
opcode != BRW_OPCODE_WHILE)) {
return brw_fs_flag_mask(this, 1);
} else if (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
@ -1083,15 +1064,7 @@ fs_inst::implied_mrf_writes() const
case SHADER_OPCODE_LOD:
case SHADER_OPCODE_SAMPLEINFO:
return 1;
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_REP_FB_WRITE:
return src[0].file == BAD_FILE ? 0 : 2;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_GFX4_SCRATCH_READ:
return 1;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
return mlen;
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
return mlen;
default:
unreachable("not reached");
@ -2160,10 +2133,7 @@ brw_fb_write_msg_control(const fs_inst *inst,
{
uint32_t mctl;
if (inst->opcode == FS_OPCODE_REP_FB_WRITE) {
assert(inst->group == 0 && inst->exec_size == 16);
mctl = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
} else if (prog_data->dual_src_blend) {
if (prog_data->dual_src_blend) {
assert(inst->exec_size == 8);
if (inst->group % 16 == 0)

View file

@ -461,19 +461,11 @@ private:
struct brw_reg dst, struct brw_reg src);
void generate_ddy(const fs_inst *inst,
struct brw_reg dst, struct brw_reg src);
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst);
void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
void generate_set_sample_id(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1);
void generate_halt(fs_inst *inst);
void generate_mov_indirect(fs_inst *inst,

View file

@ -603,7 +603,6 @@ namespace brw {
ALU3(BFI2)
ALU1(BFREV)
ALU1(CBIT)
ALU1(DIM)
ALU2(DP2)
ALU2(DP3)
ALU2(DP4)

View file

@ -740,10 +740,6 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
return false;
}
if (has_source_modifiers &&
inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_WRITE)
return false;
/* Some instructions implemented in the generator backend, such as
* derivatives, assume that their operands are packed so we can't
* generally propagate strided regions to them.

View file

@ -795,63 +795,6 @@ fs_generator::generate_halt(fs_inst *)
brw_HALT(p);
}
void
fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
{
/* The 32-wide messages only respect the first 16-wide half of the channel
* enable signals which are replicated identically for the second group of
* 16 channels, so we cannot use them unless the write is marked
* force_writemask_all.
*/
const unsigned lower_size = inst->force_writemask_all ? inst->exec_size :
MIN2(16, inst->exec_size);
const unsigned block_size = 4 * lower_size / REG_SIZE;
const tgl_swsb swsb = brw_get_default_swsb(p);
assert(inst->mlen != 0);
brw_push_insn_state(p);
brw_set_default_exec_size(p, cvt(lower_size) - 1);
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
brw_set_default_group(p, inst->group + lower_size * i);
if (i > 0) {
assert(swsb.mode & TGL_SBID_SET);
brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_SRC, swsb.sbid));
} else {
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
}
brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf),
block_size,
inst->offset + block_size * REG_SIZE * i);
}
brw_pop_insn_state(p);
}
void
fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst)
{
assert(inst->exec_size <= 16 || inst->force_writemask_all);
assert(inst->mlen != 0);
brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf),
inst->exec_size / 8, inst->offset);
}
void
fs_generator::generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst)
{
assert(inst->exec_size <= 16 || inst->force_writemask_all);
gfx7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset);
}
/* The A32 messages take a buffer base address in header.5:[31:0] (See
* MH1_A32_PSM for typed messages or MH_A32_GO for byte/dword scattered
* and OWord block messages in the SKL PRM Vol. 2d for more details.)
@ -943,35 +886,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
read_offset, surf_index);
}
/* Sets vstride=1, width=4, hstride=0 of register src1 during
* the ADD instruction.
*/
void
fs_generator::generate_set_sample_id(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
struct brw_reg src1)
{
assert(dst.type == BRW_REGISTER_TYPE_D ||
dst.type == BRW_REGISTER_TYPE_UD);
assert(src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD);
const struct brw_reg reg = stride(src1, 1, 4, 0);
const unsigned lower_size = MIN2(inst->exec_size, 16);
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
brw_inst *insn = brw_ADD(p, offset(dst, i * lower_size / 8),
offset(src0, (src0.vstride == 0 ? 0 : (1 << (src0.vstride - 1)) *
(i * lower_size / (1 << src0.width))) *
type_sz(src0.type) / REG_SIZE),
suboffset(reg, i * lower_size / 4));
brw_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1);
brw_inst_set_group(devinfo, insn, inst->group + lower_size * i);
brw_set_default_swsb(p, tgl_swsb_null());
}
}
void
fs_generator::enable_debug(const char *shader_name)
{
@ -1411,21 +1325,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
generate_ddy(inst, dst, src[0]);
break;
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
generate_scratch_write(inst, src[0]);
send_count++;
break;
case SHADER_OPCODE_GFX4_SCRATCH_READ:
generate_scratch_read(inst, dst);
send_count++;
break;
case SHADER_OPCODE_GFX7_SCRATCH_READ:
generate_scratch_read_gfx7(inst, dst);
send_count++;
break;
case SHADER_OPCODE_SCRATCH_HEADER:
generate_scratch_header(inst, dst);
break;
@ -1571,10 +1470,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
break;
}
case FS_OPCODE_SET_SAMPLE_ID:
generate_set_sample_id(inst, dst, src[0], src[1]);
break;
case SHADER_OPCODE_HALT_TARGET:
/* This is the place where the final HALT needs to be inserted if
* we've emitted any discards. If not, this will emit no code.

View file

@ -505,18 +505,6 @@ fs_reg_alloc::setup_inst_interference(const fs_inst *inst)
inst->dst.file == VGRF)
ra_add_node_interference(g, first_vgrf_node + inst->dst.nr,
grf127_send_hack_node);
/* Spilling instruction are generated as SEND messages from MRF but as
* Gfx7+ supports sending from GRF the driver will maps assingn these
* MRF registers to a GRF. Implementations reuses the dest of the send
* message as source. So as we will have an overlap for sure, we create
* an interference between destination and grf127.
*/
if ((inst->opcode == SHADER_OPCODE_GFX7_SCRATCH_READ ||
inst->opcode == SHADER_OPCODE_GFX4_SCRATCH_READ) &&
inst->dst.file == VGRF)
ra_add_node_interference(g, first_vgrf_node + inst->dst.nr,
grf127_send_hack_node);
}
/* From the Skylake PRM Vol. 2a docs for sends:
@ -918,7 +906,6 @@ fs_reg_alloc::set_spill_costs()
break;
case BRW_OPCODE_IF:
case BRW_OPCODE_IFF:
block_scale *= 0.5;
break;

View file

@ -283,7 +283,6 @@ namespace {
case BRW_OPCODE_XOR:
case BRW_OPCODE_SHR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_DIM:
case BRW_OPCODE_ASR:
case BRW_OPCODE_CMPN:
case BRW_OPCODE_BFREV:
@ -315,18 +314,6 @@ namespace {
case FS_OPCODE_DDY_COARSE:
case FS_OPCODE_PIXEL_X:
case FS_OPCODE_PIXEL_Y:
case FS_OPCODE_SET_SAMPLE_ID:
case GS_OPCODE_SET_DWORD_2:
case GS_OPCODE_SET_WRITE_OFFSET:
case GS_OPCODE_SET_VERTEX_COUNT:
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
case GS_OPCODE_SET_CHANNEL_MASKS:
case GS_OPCODE_GET_INSTANCE_ID:
case GS_OPCODE_SET_PRIMITIVE_ID:
case GS_OPCODE_SVB_SET_DST_INDEX:
case TCS_OPCODE_SRC0_010_IS_ZERO:
case TCS_OPCODE_GET_PRIMITIVE_ID:
case TES_OPCODE_GET_PRIMITIVE_ID:
case SHADER_OPCODE_READ_SR_REG:
if (devinfo->ver >= 11) {
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,
@ -565,27 +552,6 @@ namespace {
0, 2 /* XXX */,
0, 0, 0, 8 /* XXX */, 0, 0);
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
case TCS_OPCODE_GET_INSTANCE_ID:
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
return calculate_desc(info, EU_UNIT_FPU, 22 /* XXX */, 0, 0,
6 /* XXX */, 0,
0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */,
0, 0);
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
case TCS_OPCODE_CREATE_BARRIER_HEADER:
return calculate_desc(info, EU_UNIT_FPU, 32 /* XXX */, 0, 0,
8 /* XXX */, 0,
0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */,
0, 0);
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
return calculate_desc(info, EU_UNIT_FPU, 12 /* XXX */, 0, 0,
4 /* XXX */, 0,
0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */,
0, 0);
case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
@ -608,18 +574,10 @@ namespace {
case SHADER_OPCODE_TG4_OFFSET_LOD:
case SHADER_OPCODE_TG4_OFFSET_BIAS:
case SHADER_OPCODE_SAMPLEINFO:
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16 /* XXX */,
8 /* XXX */, 750 /* XXX */, 0, 0,
2 /* XXX */, 0);
case GS_OPCODE_THREAD_END:
case GS_OPCODE_FF_SYNC:
case TCS_OPCODE_RELEASE_INPUT:
case TCS_OPCODE_THREAD_END:
return calculate_desc(info, EU_UNIT_URB, 2, 0, 0, 0, 6 /* XXX */,
32 /* XXX */, 200 /* XXX */, 0, 0, 0, 0);
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_INTERLOCK:
switch (info.sfid) {
@ -640,33 +598,14 @@ namespace {
abort();
}
case SHADER_OPCODE_GFX4_SCRATCH_READ:
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
case SHADER_OPCODE_GFX7_SCRATCH_READ:
return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 0, 8 /* XXX */,
10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0);
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_READ:
case FS_OPCODE_REP_FB_WRITE:
return calculate_desc(info, EU_UNIT_DP_RC, 2, 0, 0, 0, 450 /* XXX */,
10 /* XXX */, 300 /* XXX */, 0, 0, 0, 0);
case GS_OPCODE_SVB_WRITE:
return calculate_desc(info, EU_UNIT_DP_RC, 2 /* XXX */, 0, 0,
0, 450 /* XXX */,
10 /* XXX */, 300 /* XXX */, 0, 0,
0, 0);
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return calculate_desc(info, EU_UNIT_DP_CC, 2, 0, 0, 0, 16 /* XXX */,
10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0);
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16,
8, 750, 0, 0, 2, 0);
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:

View file

@ -317,9 +317,7 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
latency = 100;
break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD:
/* testing using varying-index pull constants:
*
* 16 cycles:
@ -349,18 +347,6 @@ schedule_node::set_latency(const struct brw_isa_info *isa)
latency = 200;
break;
case SHADER_OPCODE_GFX7_SCRATCH_READ:
/* Testing a load from offset 0, that had been previously written:
*
* send(8) g114<1>UW g0<8,8,1>F data (0, 0, 0) mlen 1 rlen 1 { align1 WE_normal 1Q };
* mov(8) null g114<8,8,1>F { align1 WE_normal 1Q };
*
* The cycles spent seemed to be grouped around 40-50 (as low as 38),
* then around 140. Presumably this is cache hit vs miss.
*/
latency = 50;
break;
case SHADER_OPCODE_SEND:
switch (inst->sfid) {
case BRW_SFID_SAMPLER: {

View file

@ -164,12 +164,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
assert(brw_opcode_desc(isa, op)->name);
return brw_opcode_desc(isa, op)->name;
case FS_OPCODE_FB_WRITE:
return "fb_write";
case FS_OPCODE_FB_WRITE_LOGICAL:
return "fb_write_logical";
case FS_OPCODE_REP_FB_WRITE:
return "rep_fb_write";
case FS_OPCODE_FB_READ:
return "fb_read";
case FS_OPCODE_FB_READ_LOGICAL:
@ -342,12 +338,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
case FS_OPCODE_PACK:
return "pack";
case SHADER_OPCODE_GFX4_SCRATCH_READ:
return "gfx4_scratch_read";
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
return "gfx4_scratch_write";
case SHADER_OPCODE_GFX7_SCRATCH_READ:
return "gfx7_scratch_read";
case SHADER_OPCODE_SCRATCH_HEADER:
return "scratch_header";
@ -398,14 +388,9 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return "uniform_pull_const";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4:
return "varying_pull_const_gfx4";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
return "varying_pull_const_logical";
case FS_OPCODE_SET_SAMPLE_ID:
return "set_sample_id";
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
return "pack_half_2x16_split";
@ -419,38 +404,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return "interp_per_slot_offset";
case VS_OPCODE_PULL_CONSTANT_LOAD:
return "pull_constant_load";
case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7:
return "pull_constant_load_gfx7";
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
return "unpack_flags_simd4x2";
case GS_OPCODE_THREAD_END:
return "gs_thread_end";
case GS_OPCODE_SET_WRITE_OFFSET:
return "set_write_offset";
case GS_OPCODE_SET_VERTEX_COUNT:
return "set_vertex_count";
case GS_OPCODE_SET_DWORD_2:
return "set_dword_2";
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
return "prepare_channel_masks";
case GS_OPCODE_SET_CHANNEL_MASKS:
return "set_channel_masks";
case GS_OPCODE_GET_INSTANCE_ID:
return "get_instance_id";
case GS_OPCODE_FF_SYNC:
return "ff_sync";
case GS_OPCODE_SET_PRIMITIVE_ID:
return "set_primitive_id";
case GS_OPCODE_SVB_WRITE:
return "gs_svb_write";
case GS_OPCODE_SVB_SET_DST_INDEX:
return "gs_svb_set_dst_index";
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
return "gs_ff_sync_set_primitives";
case CS_OPCODE_CS_TERMINATE:
return "cs_terminate";
case SHADER_OPCODE_BARRIER:
@ -466,25 +419,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
case SHADER_OPCODE_MOV_RELOC_IMM:
return "mov_reloc_imm";
case TCS_OPCODE_GET_INSTANCE_ID:
return "tcs_get_instance_id";
case TCS_OPCODE_GET_PRIMITIVE_ID:
return "tcs_get_primitive_id";
case TCS_OPCODE_CREATE_BARRIER_HEADER:
return "tcs_create_barrier_header";
case TCS_OPCODE_SRC0_010_IS_ZERO:
return "tcs_src0<0,1,0>_is_zero";
case TCS_OPCODE_RELEASE_INPUT:
return "tcs_release_input";
case TCS_OPCODE_THREAD_END:
return "tcs_thread_end";
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
return "tes_create_input_read_header";
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
return "tes_add_indirect_urb_offset";
case TES_OPCODE_GET_PRIMITIVE_ID:
return "tes_get_primitive_id";
case RT_OPCODE_TRACE_RAY_LOGICAL:
return "rt_trace_ray_logical";
@ -1042,7 +976,6 @@ backend_instruction::has_side_effects() const
case BRW_OPCODE_SYNC:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
@ -1054,11 +987,8 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_INTERLOCK:
case SHADER_OPCODE_URB_WRITE_LOGICAL:
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_WRITE_LOGICAL:
case FS_OPCODE_REP_FB_WRITE:
case SHADER_OPCODE_BARRIER:
case TCS_OPCODE_RELEASE_INPUT:
case SHADER_OPCODE_RND_MODE:
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
case FS_OPCODE_SCHEDULING_FENCE: