aco: Add missing GFX10 specific fields and some README notes.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
This commit is contained in:
Timur Kristóf 2019-09-26 17:45:13 +02:00
parent a01d796de4
commit fd1d947457
3 changed files with 33 additions and 2 deletions

View file

@ -69,12 +69,38 @@ situations.
The `image_atomic_{swap,cmpswap,add,sub}` opcodes in the GCN3 ISA reference
guide are incorrect. The Vega ISA reference guide has the correct ones.
## VINTRP encoding
VEGA ISA doc says the encoding should be `110010` but `110101` works.
## VOP1 instructions encoded as VOP3
RDNA ISA doc says that `0x140` should be added to the opcode, but that doesn't
work. What works is adding `0x180`, which LLVM also does.
## FLAT, Scratch, Global instructions
The NV bit was removed in RDNA, but some parts of the doc still mention it.
## Legacy instructions
Some instructions have a `_LEGACY` variant which implements "DX9 rules", in which
the zero "wins" in multiplications, ie. `0.0*x` is always `0.0`. The VEGA ISA
mentions `V_MAC_LEGACY_F32` but this instruction is not really there on VEGA.
## RDNA L0, L1 cache and DLC, GLC bits
The old L1 cache was renamed to L0, and a new L1 cache was added to RDNA. The
L1 cache is 1 cache per shader array. Some instruction encodings have DLC and
GLC bits that interact with the cache.
* DLC ("device level coherent") bit: controls the L1 cache
* GLC ("globally coherent") bit: controls the L0 cache
The recommendation from AMD devs is to always set these two bits at the same time,
as it doesn't make too much sense to set them independently, aside from some
circumstances (eg. we needn't set DLC when only one shader array is used).
# Hardware Bugs
## SMEM corrupts VCCZ on SI/CI

View file

@ -16,6 +16,8 @@ struct asm_context {
asm_context(Program* program) : program(program), chip_class(program->chip_class) {
if (chip_class <= GFX9)
opcode = &instr_info.opcode_gfx9[0];
else if (chip_class == GFX10)
opcode = &instr_info.opcode_gfx10[0];
}
};

View file

@ -221,6 +221,7 @@ struct PhysReg {
/* helper expressions for special registers */
static constexpr PhysReg m0{124};
static constexpr PhysReg vcc{106};
static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
static constexpr PhysReg exec{126};
static constexpr PhysReg exec_lo{126};
static constexpr PhysReg exec_hi{127};
@ -765,6 +766,7 @@ struct MTBUF_instruction : public Instruction {
*/
struct MIMG_instruction : public Instruction {
unsigned dmask; /* Data VGPR enable mask */
unsigned dim; /* NAVI: dimensionality */
bool unrm; /* Force address to be un-normalized */
bool dlc; /* NAVI: device level coherent */
bool glc; /* globally coherent */
@ -789,8 +791,9 @@ struct MIMG_instruction : public Instruction {
*/
struct FLAT_instruction : public Instruction {
uint16_t offset; /* Vega only */
bool slc;
bool glc;
bool slc; /* system level coherent */
bool glc; /* globally coherent */
bool dlc; /* NAVI: device level coherent */
bool lds;
bool nv;
};