diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index ce5306a7a0f..0840d8af410 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -451,11 +451,11 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding = (0xFF & instr->operands[2].physReg()); /* VADDR */ if (!instr->definitions.empty()) { encoding |= (0xFF & instr->definitions[0].physReg()) << 8; /* VDATA */ - } else if (instr->operands[1].regClass().type() == RegType::vgpr) { - encoding |= (0xFF & instr->operands[1].physReg()) << 8; /* VDATA */ + } else if (instr->operands.size() >= 4) { + encoding |= (0xFF & instr->operands[3].physReg()) << 8; /* VDATA */ } encoding |= (0x1F & (instr->operands[0].physReg() >> 2)) << 16; /* T# (resource) */ - if (instr->operands[1].regClass().type() == RegType::sgpr) + if (!instr->operands[1].isUndefined()) encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */ assert(!mimg->d16 || ctx.chip_class >= GFX9); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5f1b57a8365..0b539692618 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6021,10 +6021,11 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr) bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0; aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip; - aco_ptr store{create_instruction(opcode, Format::MIMG, 3, 0)}; + aco_ptr store{create_instruction(opcode, Format::MIMG, 4, 0)}; store->operands[0] = Operand(resource); - store->operands[1] = Operand(data); + store->operands[1] = Operand(s4); /* no sampler */ store->operands[2] = Operand(coords); + store->operands[3] = Operand(data); store->glc = glc; store->dlc = false; store->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array); @@ -6148,10 +6149,11 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr) Temp coords = get_image_coords(ctx, instr, type); Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), ACO_DESC_IMAGE, nullptr, true, true); - aco_ptr mimg{create_instruction(image_op, Format::MIMG, 3, return_previous ? 1 : 0)}; + aco_ptr mimg{create_instruction(image_op, Format::MIMG, 4, return_previous ? 1 : 0)}; mimg->operands[0] = Operand(resource); - mimg->operands[1] = Operand(data); + mimg->operands[1] = Operand(s4); /* no sampler */ mimg->operands[2] = Operand(coords); + mimg->operands[3] = Operand(data); if (return_previous) mimg->definitions[0] = Definition(dst); mimg->glc = return_previous; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 4a9b6060cec..9345321c1dd 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1279,8 +1279,8 @@ static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected * Vector Memory Image Instructions * Operand(0) SRSRC - Scalar GPR that specifies the resource constant. * Operand(1): SSAMP - Scalar GPR that specifies sampler constant. - * or VDATA - Vector GPR for write data. * Operand(2): VADDR - Address source. Can carry an offset or an index. + * Operand(3): VDATA - Vector GPR for write data or zero if TFE/LWE=1. * Definition(0): VDATA - Vector GPR for read result. * */ diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index cbf1aa9edef..3f0f44b6bf6 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2124,8 +2124,8 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc instr->definitions[0].setFixed(instr->operands[3].physReg()); } else if (instr->format == Format::MIMG && instr->definitions.size() == 1 && - instr->operands[1].regClass().type() == RegType::vgpr) { - instr->definitions[0].setFixed(instr->operands[1].physReg()); + instr->operands.size() >= 4) { + instr->definitions[0].setFixed(instr->operands[3].physReg()); } ctx.defs_done.reset(); diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index e4ed3f0704c..0f19a1752b5 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -435,15 +435,18 @@ bool validate_ir(Program* program) break; } case Format::MIMG: { - check(instr->operands.size() == 3, "MIMG instructions must have exactly 3 operands", instr.get()); + check(instr->operands.size() >= 3, "MIMG instructions must have 3 or 4 operands", instr.get()); + check(instr->operands.size() <= 4, "MIMG instructions must have 3 or 4 operands", instr.get()); check(instr->operands[0].hasRegClass() && (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8), "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get()); - if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::sgpr) + if (instr->operands[1].hasRegClass()) check(instr->operands[1].regClass() == s4, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get()); - else if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr) - check((instr->definitions.empty() || instr->definitions[0].regClass() == instr->operands[1].regClass() || - instr->opcode == aco_opcode::image_atomic_cmpswap || instr->opcode == aco_opcode::image_atomic_fcmpswap), - "MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr.get()); + if (instr->operands.size() >= 4) { + bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap || + instr->opcode == aco_opcode::image_atomic_fcmpswap; + check(instr->definitions.empty() || (instr->definitions[0].regClass() == instr->operands[3].regClass() || is_cmpswap), + "MIMG operands[3] (VDATA) must be the same as definitions[0] for atomics and TFE/LWE loads", instr.get()); + } check(instr->operands[2].hasRegClass() && instr->operands[2].regClass().type() == RegType::vgpr, "MIMG operands[2] (VADDR) must be VGPR", instr.get()); check(instr->definitions.empty() || (instr->definitions[0].isTemp() && instr->definitions[0].regClass().type() == RegType::vgpr),