diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 6d9d1660335..df0b4cedd6e 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2741,7 +2741,7 @@ system_value("front_face_op_pco", 1, bit_sizes=[32]) system_value("fs_meta_pco", 1, bit_sizes=[32]) -intrinsic("flush_tile_buffer_pco", src_comp=[1, 1]) +intrinsic("flush_tile_buffer_pco", src_comp=[1, 1], indices=[BASE, RANGE]) intrinsic("dummy_load_store_pco", indices=[BASE]) diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h index 02e8999fae7..3b3bcf95dd6 100644 --- a/src/imagination/include/hwdef/rogue_hw_utils.h +++ b/src/imagination/include/hwdef/rogue_hw_utils.h @@ -482,4 +482,23 @@ rogue_max_wg_temps(const struct pvr_device_info *dev_info, return temps; } + +static inline uint32_t +rogue_num_uscs_per_tile(const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, tile_per_usc) || + PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) + return 1; + + return 4; +} + +static inline uint32_t rogue_usc_indexed_pixel_output_index_scale( + const struct pvr_device_info *dev_info) +{ + if (PVR_HAS_FEATURE(dev_info, tile_per_usc)) + return 4; + + return 1; +} #endif /* ROGUE_HW_UTILS_H */ diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index df069f73467..b0b5bbedf15 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -506,7 +506,8 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src) pco_ref_drc(PCO_DRC_0), pco_ref_imm8(chans), addr_data, - cov_mask); + cov_mask, + .olchk = true); } static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx, @@ -537,12 +538,21 @@ static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx, pco_ref idx_reg = pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX); - pco_mbyp(&tctx->b, idx_reg, pco_zero); + unsigned base = nir_intrinsic_base(intr); + pco_movi32(&tctx->b, idx_reg, pco_ref_imm32(base)); + + pco_ref burst_len = pco_ref_new_ssa32(tctx->func); + unsigned range = nir_intrinsic_range(intr); + assert(range <= 1024); + if (range == 1024) + range = 0; + + pco_movi32(&tctx->b, burst_len, pco_ref_imm32(range)); pco_ref dest = pco_ref_hwreg(0, PCO_REG_CLASS_PIXOUT); dest = pco_ref_hwreg_idx_from(idx_reg_num, dest); - return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), pco_zero, addr); + return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), burst_len, addr); } static unsigned fetch_resource_base_reg(const pco_common_data *common, diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index f2c84416190..766b60495b9 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -490,6 +490,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( const uint32_t emit_count, const uint32_t *pbe_cs_words, const unsigned *tile_buffer_ids, + unsigned pixel_output_width, struct pvr_pds_upload *const pds_upload_out) { struct pvr_pds_event_program pixel_event_program = { @@ -513,6 +514,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( pco_shader *eot; VkResult result; + bool has_tile_buffers = false; for (unsigned u = 0; u < emit_count; ++u) { unsigned tile_buffer_id = tile_buffer_ids[u]; if (tile_buffer_id == ~0) @@ -521,9 +523,20 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( assert(tile_buffer_id < tile_buffer_state->buffer_count); props.tile_buffer_addrs[u] = tile_buffer_state->buffers[tile_buffer_id]->vma->dev_addr.addr; + has_tile_buffers = true; } - eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props); + if (has_tile_buffers) { + props.num_output_regs = pixel_output_width; + props.msaa_samples = + cmd_buffer->vk.dynamic_graphics_state.ms.rasterization_samples; + + if (!props.msaa_samples) + props.msaa_samples = 1; + } + + eot = + pvr_usc_eot(device->pdevice->pco_ctx, &props, &device->pdevice->dev_info); usc_temp_count = pco_shader_data(eot)->common.temps; result = pvr_cmd_buffer_upload_usc(cmd_buffer, @@ -1501,11 +1514,17 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, emit_state.pbe_reg_words, sizeof(job->pbe_reg_words)); + unsigned pixel_output_width = + pvr_pass_get_pixel_output_width(render_pass_info->pass, + sub_cmd->hw_render_idx, + dev_info); + result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload( cmd_buffer, emit_state.emit_count, emit_state.pbe_cs_words[0], emit_state.tile_buffer_ids, + pixel_output_width, &pds_pixel_event_program); if (result != VK_SUCCESS) return result; diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c index ae7156c49d9..c0c150ed53c 100644 --- a/src/imagination/vulkan/pvr_job_context.c +++ b/src/imagination/vulkan/pvr_job_context.c @@ -1243,7 +1243,9 @@ static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device, pco_shader *eot; VkResult result; - eot = pvr_usc_eot(device->pdevice->pco_ctx, &props); + eot = pvr_usc_eot(device->pdevice->pco_ctx, + &props, + &device->pdevice->dev_info); result = pvr_gpu_upload_usc(device, pco_shader_binary_data(eot), diff --git a/src/imagination/vulkan/pvr_spm.c b/src/imagination/vulkan/pvr_spm.c index ab9a0abc5d4..4d90a6cfa23 100644 --- a/src/imagination/vulkan/pvr_spm.c +++ b/src/imagination/vulkan/pvr_spm.c @@ -735,7 +735,7 @@ pvr_spm_init_eot_state(struct pvr_device *device, .state_words = pbe_state_words[0], }; - eot = pvr_usc_eot(device->pdevice->pco_ctx, &props); + eot = pvr_usc_eot(device->pdevice->pco_ctx, &props, dev_info); usc_temp_count = pco_shader_data(eot)->common.temps; /* TODO: Create a #define in the compiler code to replace the 16. */ diff --git a/src/imagination/vulkan/pvr_usc.c b/src/imagination/vulkan/pvr_usc.c index 5393a92536b..6a6228061ce 100644 --- a/src/imagination/vulkan/pvr_usc.c +++ b/src/imagination/vulkan/pvr_usc.c @@ -10,6 +10,7 @@ * \brief USC internal shader generation. */ +#include "hwdef/rogue_hw_utils.h" #include "nir/nir.h" #include "nir/nir_builder.h" #include "nir/nir_format_convert.h" @@ -75,7 +76,9 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage) * \param props End of tile shader properties. * \return The end-of-tile shader. */ -pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props) +pco_shader *pvr_usc_eot(pco_ctx *ctx, + struct pvr_eot_props *props, + const struct pvr_device_info *dev_info) { nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, @@ -92,14 +95,44 @@ pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props) nir_wop_pco(&b); if (props->tile_buffer_addrs[u]) { - nir_def *tile_buffer_addr_lo = - nir_imm_int(&b, props->tile_buffer_addrs[u] & 0xffffffff); - nir_def *tile_buffer_addr_hi = - nir_imm_int(&b, props->tile_buffer_addrs[u] >> 32); + uint64_t tile_buffer_addr = props->tile_buffer_addrs[u]; - nir_flush_tile_buffer_pco(&b, - tile_buffer_addr_lo, - tile_buffer_addr_hi); + unsigned data_size = + (PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U) * + PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U) * + props->num_output_regs) / + rogue_num_uscs_per_tile(dev_info); + assert(data_size); + + assert(props->msaa_samples); + if (props->msaa_samples > 1) { + if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe) && + PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U) == + 4) { + data_size *= props->msaa_samples; + } else { + data_size *= 2; + } + } + + /* We can burst up to 1024 dwords at a time. */ + unsigned num_loads = DIV_ROUND_UP(data_size, 1024); + unsigned scale = rogue_usc_indexed_pixel_output_index_scale(dev_info); + for (unsigned l = 0; l < num_loads; ++l) { + unsigned offset = l * 1024; + unsigned idx_offset = offset / scale; + bool last_load = l == (num_loads - 1); + unsigned range = last_load ? data_size - offset : 1024; + + nir_flush_tile_buffer_pco( + &b, + nir_imm_int(&b, tile_buffer_addr & 0xffffffff), + nir_imm_int(&b, tile_buffer_addr >> 32), + .base = idx_offset, + .range = range); + + tile_buffer_addr += 1024 * sizeof(uint32_t); + } } nir_def *state0; diff --git a/src/imagination/vulkan/pvr_usc.h b/src/imagination/vulkan/pvr_usc.h index ffd0102f36b..9844f6a6a59 100644 --- a/src/imagination/vulkan/pvr_usc.h +++ b/src/imagination/vulkan/pvr_usc.h @@ -31,10 +31,15 @@ struct pvr_eot_props { const unsigned *state_regs; }; + unsigned msaa_samples; + unsigned num_output_regs; + uint64_t tile_buffer_addrs[PVR_MAX_COLOR_ATTACHMENTS]; }; -pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props); +pco_shader *pvr_usc_eot(pco_ctx *ctx, + struct pvr_eot_props *props, + const struct pvr_device_info *dev_info); /* Transfer queue shader generation. */ struct pvr_tq_props {