diff --git a/src/intel/compiler/brw_analysis_performance.cpp b/src/intel/compiler/brw_analysis_performance.cpp index 6c5583fe413..5c11a63878c 100644 --- a/src/intel/compiler/brw_analysis_performance.cpp +++ b/src/intel/compiler/brw_analysis_performance.cpp @@ -650,6 +650,8 @@ namespace { case LSC_OP_STORE: case LSC_OP_LOAD_CMASK: case LSC_OP_STORE_CMASK: + case LSC_OP_LOAD_CMASK_MSRT: + case LSC_OP_STORE_CMASK_MSRT: return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 0, 20 /* XXX */, 10 /* XXX */, 100 /* XXX */, 0, 0, diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 63d006fb48e..301d686ff2c 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -588,6 +588,8 @@ static const char *const lsc_operation[] = { [LSC_OP_ATOMIC_AND] = "atomic_and", [LSC_OP_ATOMIC_OR] = "atomic_or", [LSC_OP_ATOMIC_XOR] = "atomic_xor", + [LSC_OP_LOAD_CMASK_MSRT] = "load_cmask_msrt", + [LSC_OP_STORE_CMASK_MSRT] = "store_cmask_msrt", }; const char * @@ -2279,6 +2281,7 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, switch(op) { case LSC_OP_LOAD_CMASK: case LSC_OP_LOAD: + case LSC_OP_LOAD_CMASK_MSRT: format(file, ","); err |= control(file, "cache_load", devinfo->ver >= 20 ? diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index db423b6f23f..3a467ec0236 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -930,7 +930,9 @@ brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, static inline bool lsc_opcode_has_cmask(enum lsc_opcode opcode) { - return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK; + return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK || + opcode == LSC_OP_LOAD_CMASK_MSRT || + opcode == LSC_OP_STORE_CMASK_MSRT; } static inline bool @@ -943,7 +945,8 @@ static inline bool lsc_opcode_is_store(enum lsc_opcode opcode) { return opcode == LSC_OP_STORE || - opcode == LSC_OP_STORE_CMASK; + opcode == LSC_OP_STORE_CMASK || + opcode == LSC_OP_STORE_CMASK_MSRT; } static inline bool @@ -1006,6 +1009,7 @@ lsc_op_num_data_values(unsigned _op) case LSC_OP_LOAD: case LSC_OP_LOAD_CMASK: case LSC_OP_FENCE: + case LSC_OP_LOAD_CMASK_MSRT: /* XXX: actually check docs */ return 0; default: @@ -1062,6 +1066,8 @@ lsc_op_to_legacy_atomic(unsigned _op) case LSC_OP_STORE: case LSC_OP_STORE_CMASK: case LSC_OP_FENCE: + case LSC_OP_LOAD_CMASK_MSRT: + case LSC_OP_STORE_CMASK_MSRT: unreachable("not an atomic op"); } diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index f1f2bc92a0b..9a65670d824 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -1575,7 +1575,9 @@ enum lsc_opcode { LSC_OP_ATOMIC_AND = 24, LSC_OP_ATOMIC_OR = 25, LSC_OP_ATOMIC_XOR = 26, - LSC_OP_FENCE = 31 + LSC_OP_FENCE = 31, + LSC_OP_LOAD_CMASK_MSRT = 49, + LSC_OP_STORE_CMASK_MSRT = 50 }; /* diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index b95009e56b5..11d25dce605 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -6920,6 +6920,24 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, case nir_intrinsic_image_store: case nir_intrinsic_image_atomic: case nir_intrinsic_image_atomic_swap: + /* Bspec 73734 (r50040): + * + * Instruction_StoreCmaskMSRT::Src0 Length: + * + * "num_coordinates is the number of address coordinates used in + * message. For TGM it will be 4 (U, V, R, SAMPLE_INDEX)." + * + */ + srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud( + (devinfo->ver >= 30 && + nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS) ? 4 : + nir_image_intrinsic_coord_components(instr)); + + /* MSAA image atomic accesses not supported, must be lowered to UGM */ + assert((instr->intrinsic != nir_intrinsic_bindless_image_atomic && + instr->intrinsic != nir_intrinsic_bindless_image_atomic_swap) || + nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_MS); + srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_TYPED); srcs[MEMORY_LOGICAL_BINDING] = get_nir_image_intrinsic_image(ntb, bld, instr); @@ -6928,8 +6946,6 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI); srcs[MEMORY_LOGICAL_ADDRESS] = get_nir_src(ntb, instr->src[1]); - srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = - brw_imm_ud(nir_image_intrinsic_coord_components(instr)); data_src = 3; break; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index f6a140719e5..81f7b81b3d0 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -2170,11 +2170,15 @@ lsc_op_for_nir_intrinsic(const nir_intrinsic_instr *intrin) case nir_intrinsic_image_load: case nir_intrinsic_bindless_image_load: - return LSC_OP_LOAD_CMASK; + return nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS ? + LSC_OP_LOAD_CMASK_MSRT : + LSC_OP_LOAD_CMASK; case nir_intrinsic_image_store: case nir_intrinsic_bindless_image_store: - return LSC_OP_STORE_CMASK; + return nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS ? + LSC_OP_STORE_CMASK_MSRT : + LSC_OP_STORE_CMASK; default: assert(nir_intrinsic_has_atomic_op(intrin)); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 34362cb1612..394821826c2 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -504,6 +504,8 @@ schedule_node::set_latency(const struct brw_isa_info *isa) case LSC_OP_STORE: case LSC_OP_LOAD_CMASK: case LSC_OP_STORE_CMASK: + case LSC_OP_LOAD_CMASK_MSRT: + case LSC_OP_STORE_CMASK_MSRT: latency = 300; break; case LSC_OP_FENCE: diff --git a/src/intel/compiler/brw_validate.cpp b/src/intel/compiler/brw_validate.cpp index b4e3c1f96a7..fac775af657 100644 --- a/src/intel/compiler/brw_validate.cpp +++ b/src/intel/compiler/brw_validate.cpp @@ -159,7 +159,8 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst) switch (inst->opcode) { case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: - fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK); + fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK || + op == LSC_OP_LOAD_CMASK_MSRT); fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE); fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE); break;