diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 0e802a3b546..c85f511a5aa 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -110,7 +110,7 @@ blorp_blit_get_cs_dst_coords(nir_builder *b, coord = nir_isub(b, coord, nir_load_var(b, v->v_dst_offset)); assert(!key->persample_msaa_dispatch); - return nir_trim_vector(b, coord, 2); + return nir_trim_vector(b, coord, key->dst_samples > 1 ? 3 : 2); } /** @@ -1183,6 +1183,11 @@ blorp_build_nir_shader(struct blorp_context *blorp, const struct blorp_blit_prog_key *key) { const struct intel_device_info *devinfo = blorp->isl_dev->info; + + /* Compute MSAA is only available on Gfx30+ */ + if (key->base.shader_pipeline == BLORP_SHADER_PIPELINE_COMPUTE) + assert(key->dst_samples == 1 || devinfo->ver >= 30); + nir_def *src_pos, *dst_pos, *color; /* Sanity checks */ @@ -1479,12 +1484,30 @@ blorp_build_nir_shader(struct blorp_context *blorp, if (compute) { nir_def *store_pos = nir_load_global_invocation_id(&b, 32); + + /* Load sample index for MSAA image store */ + nir_def *sample_idx = nir_imm_int(&b, 0); + + if (key->dst_samples > 1) { + nir_def *num_layers_data = + nir_load_inline_data_intel(&b, 1, 32, + .base = BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION); + + nir_def *z_pos = nir_umod(&b, nir_channel(&b, store_pos, 2), + num_layers_data); + sample_idx = nir_idiv(&b, nir_channel(&b, store_pos, 2), + num_layers_data); + + store_pos = nir_vector_insert_imm(&b, store_pos, z_pos, 2); + } nir_image_store(&b, nir_imm_int(&b, 0), nir_pad_vector_imm_int(&b, store_pos, 0, 4), - nir_imm_int(&b, 0), + sample_idx, nir_pad_vector_imm_int(&b, color, 0, 4), nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D, + .image_dim = key->dst_samples > 1 ? + GLSL_SAMPLER_DIM_MS: + GLSL_SAMPLER_DIM_2D, .image_array = true, .access = ACCESS_NON_READABLE); } else if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) { @@ -1566,7 +1589,7 @@ blorp_get_blit_kernel_cs(struct blorp_batch *batch, nir->info.name = ralloc_strdup(nir, "BLORP-gpgpu-blit"); blorp_set_cs_dims(nir, prog_key->local_y); - assert(prog_key->rt_samples == 1); + assert(batch->blorp->isl_dev->info->ver >= 30 || prog_key->rt_samples == 1); const struct blorp_program p = blorp_compile_cs(blorp, mem_ctx, nir); @@ -2436,10 +2459,8 @@ blorp_blit_supports_compute(struct blorp_context *blorp, const struct isl_surf *dst_surf, enum isl_aux_usage dst_aux_usage) { - /* Our compiler doesn't currently support typed image writes with MSAA. - * Also, our BLORP compute shaders don't handle multisampling cases. - */ - if (dst_surf->samples > 1 || src_surf->samples > 1) + /* Platforms < Xe3 doesn't support typed image writes with MSAA. */ + if (blorp->isl_dev->info->ver < 30 && dst_surf->samples > 1) return false; if (blorp->isl_dev->info->ver >= 12) { diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index dd4d6e7af21..0313c1b717d 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -169,12 +169,27 @@ blorp_params_get_clear_kernel_cs(struct blorp_batch *batch, nir_push_if(&b, in_bounds); + nir_def *sample_idx = nir_imm_int(&b, 0); + + /* Strip sample index from the coord since we are going to send it + * separately and include that later in coord as 4th component in + * lower_image_sample_index_in_coord lowering pass. + */ + if (blorp->isl_dev->info->ver >= 30 && params->num_samples > 1) { + sample_idx = nir_channel(&b, dst_pos, 2); + dst_pos = nir_vec3(&b, nir_channel(&b, dst_pos, 0), + nir_channel(&b, dst_pos, 1), + nir_imm_int(&b, 0)); + } + nir_image_store(&b, nir_imm_int(&b, 0), nir_pad_vector_imm_int(&b, dst_pos, 0, 4), - nir_imm_int(&b, 0), + sample_idx, nir_pad_vector_imm_int(&b, color, 0, 4), nir_imm_int(&b, 0), - .image_dim = GLSL_SAMPLER_DIM_2D, + .image_dim = params->num_samples > 1 ? + GLSL_SAMPLER_DIM_MS : + GLSL_SAMPLER_DIM_2D, .image_array = true, .access = ACCESS_NON_READABLE); diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index 70b1fc4c6e5..ccdfbfd16a9 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -1727,7 +1727,7 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) uint32_t group_x1 = DIV_ROUND_UP(params->x1, cs_prog_data->local_size[0]); uint32_t group_y1 = DIV_ROUND_UP(params->y1, cs_prog_data->local_size[1]); assert(params->num_layers >= 1); - uint32_t group_z1 = params->dst.z_offset + params->num_layers; + uint32_t group_z1 = params->num_samples * params->num_layers; assert(cs_prog_data->local_size[2] == 1); #if GFX_VERx10 >= 125 @@ -1791,6 +1791,14 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params) .IndirectDataStartAddress = push_const_offset, .IndirectDataLength = push_const_size, + /* Send number of layers as inline register parameter to copy 2D MSAA + * array image/texture properly. + */ + .EmitInlineParameter = true, + .InlineData = { + [BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION / 4 + 0] = params->num_layers, + }, + #if GFX_VERx10 >= 125 .GenerateLocalID = cs_prog_data->generate_local_id != 0, .EmitLocal = cs_prog_data->generate_local_id, diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h index e8446721ddd..8689b81999e 100644 --- a/src/intel/blorp/blorp_priv.h +++ b/src/intel/blorp/blorp_priv.h @@ -35,6 +35,9 @@ extern "C" { #endif + +#define BLORP_INLINE_PARAM_THREAD_GROUP_ID_Z_DIMENSION (0) + void blorp_init(struct blorp_context *blorp, void *driver_ctx, struct isl_device *isl_dev, const struct blorp_config *config);