pvr, pco: full support for tile buffer eot handling

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-05-28 18:45:56 +01:00 committed by Marge Bot
parent 6d051ce480
commit a67120cda3
8 changed files with 104 additions and 16 deletions

View file

@ -2741,7 +2741,7 @@ system_value("front_face_op_pco", 1, bit_sizes=[32])
system_value("fs_meta_pco", 1, bit_sizes=[32])
intrinsic("flush_tile_buffer_pco", src_comp=[1, 1])
intrinsic("flush_tile_buffer_pco", src_comp=[1, 1], indices=[BASE, RANGE])
intrinsic("dummy_load_store_pco", indices=[BASE])

View file

@ -482,4 +482,23 @@ rogue_max_wg_temps(const struct pvr_device_info *dev_info,
return temps;
}
static inline uint32_t
rogue_num_uscs_per_tile(const struct pvr_device_info *dev_info)
{
if (PVR_HAS_FEATURE(dev_info, tile_per_usc) ||
PVR_HAS_FEATURE(dev_info, pbe2_in_xe))
return 1;
return 4;
}
static inline uint32_t rogue_usc_indexed_pixel_output_index_scale(
const struct pvr_device_info *dev_info)
{
if (PVR_HAS_FEATURE(dev_info, tile_per_usc))
return 4;
return 1;
}
#endif /* ROGUE_HW_UTILS_H */

View file

@ -506,7 +506,8 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
pco_ref_drc(PCO_DRC_0),
pco_ref_imm8(chans),
addr_data,
cov_mask);
cov_mask,
.olchk = true);
}
static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx,
@ -537,12 +538,21 @@ static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx,
pco_ref idx_reg =
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
pco_mbyp(&tctx->b, idx_reg, pco_zero);
unsigned base = nir_intrinsic_base(intr);
pco_movi32(&tctx->b, idx_reg, pco_ref_imm32(base));
pco_ref burst_len = pco_ref_new_ssa32(tctx->func);
unsigned range = nir_intrinsic_range(intr);
assert(range <= 1024);
if (range == 1024)
range = 0;
pco_movi32(&tctx->b, burst_len, pco_ref_imm32(range));
pco_ref dest = pco_ref_hwreg(0, PCO_REG_CLASS_PIXOUT);
dest = pco_ref_hwreg_idx_from(idx_reg_num, dest);
return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), pco_zero, addr);
return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), burst_len, addr);
}
static unsigned fetch_resource_base_reg(const pco_common_data *common,

View file

@ -490,6 +490,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
const uint32_t emit_count,
const uint32_t *pbe_cs_words,
const unsigned *tile_buffer_ids,
unsigned pixel_output_width,
struct pvr_pds_upload *const pds_upload_out)
{
struct pvr_pds_event_program pixel_event_program = {
@ -513,6 +514,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
pco_shader *eot;
VkResult result;
bool has_tile_buffers = false;
for (unsigned u = 0; u < emit_count; ++u) {
unsigned tile_buffer_id = tile_buffer_ids[u];
if (tile_buffer_id == ~0)
@ -521,9 +523,20 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
assert(tile_buffer_id < tile_buffer_state->buffer_count);
props.tile_buffer_addrs[u] =
tile_buffer_state->buffers[tile_buffer_id]->vma->dev_addr.addr;
has_tile_buffers = true;
}
eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props);
if (has_tile_buffers) {
props.num_output_regs = pixel_output_width;
props.msaa_samples =
cmd_buffer->vk.dynamic_graphics_state.ms.rasterization_samples;
if (!props.msaa_samples)
props.msaa_samples = 1;
}
eot =
pvr_usc_eot(device->pdevice->pco_ctx, &props, &device->pdevice->dev_info);
usc_temp_count = pco_shader_data(eot)->common.temps;
result = pvr_cmd_buffer_upload_usc(cmd_buffer,
@ -1501,11 +1514,17 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
emit_state.pbe_reg_words,
sizeof(job->pbe_reg_words));
unsigned pixel_output_width =
pvr_pass_get_pixel_output_width(render_pass_info->pass,
sub_cmd->hw_render_idx,
dev_info);
result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
cmd_buffer,
emit_state.emit_count,
emit_state.pbe_cs_words[0],
emit_state.tile_buffer_ids,
pixel_output_width,
&pds_pixel_event_program);
if (result != VK_SUCCESS)
return result;

View file

@ -1243,7 +1243,9 @@ static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
pco_shader *eot;
VkResult result;
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
eot = pvr_usc_eot(device->pdevice->pco_ctx,
&props,
&device->pdevice->dev_info);
result = pvr_gpu_upload_usc(device,
pco_shader_binary_data(eot),

View file

@ -735,7 +735,7 @@ pvr_spm_init_eot_state(struct pvr_device *device,
.state_words = pbe_state_words[0],
};
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props, dev_info);
usc_temp_count = pco_shader_data(eot)->common.temps;
/* TODO: Create a #define in the compiler code to replace the 16. */

View file

@ -10,6 +10,7 @@
* \brief USC internal shader generation.
*/
#include "hwdef/rogue_hw_utils.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "nir/nir_format_convert.h"
@ -75,7 +76,9 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage)
* \param props End of tile shader properties.
* \return The end-of-tile shader.
*/
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
pco_shader *pvr_usc_eot(pco_ctx *ctx,
struct pvr_eot_props *props,
const struct pvr_device_info *dev_info)
{
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
@ -92,14 +95,44 @@ pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
nir_wop_pco(&b);
if (props->tile_buffer_addrs[u]) {
nir_def *tile_buffer_addr_lo =
nir_imm_int(&b, props->tile_buffer_addrs[u] & 0xffffffff);
nir_def *tile_buffer_addr_hi =
nir_imm_int(&b, props->tile_buffer_addrs[u] >> 32);
uint64_t tile_buffer_addr = props->tile_buffer_addrs[u];
nir_flush_tile_buffer_pco(&b,
tile_buffer_addr_lo,
tile_buffer_addr_hi);
unsigned data_size =
(PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U) *
PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U) *
props->num_output_regs) /
rogue_num_uscs_per_tile(dev_info);
assert(data_size);
assert(props->msaa_samples);
if (props->msaa_samples > 1) {
if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe) &&
PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U) ==
4) {
data_size *= props->msaa_samples;
} else {
data_size *= 2;
}
}
/* We can burst up to 1024 dwords at a time. */
unsigned num_loads = DIV_ROUND_UP(data_size, 1024);
unsigned scale = rogue_usc_indexed_pixel_output_index_scale(dev_info);
for (unsigned l = 0; l < num_loads; ++l) {
unsigned offset = l * 1024;
unsigned idx_offset = offset / scale;
bool last_load = l == (num_loads - 1);
unsigned range = last_load ? data_size - offset : 1024;
nir_flush_tile_buffer_pco(
&b,
nir_imm_int(&b, tile_buffer_addr & 0xffffffff),
nir_imm_int(&b, tile_buffer_addr >> 32),
.base = idx_offset,
.range = range);
tile_buffer_addr += 1024 * sizeof(uint32_t);
}
}
nir_def *state0;

View file

@ -31,10 +31,15 @@ struct pvr_eot_props {
const unsigned *state_regs;
};
unsigned msaa_samples;
unsigned num_output_regs;
uint64_t tile_buffer_addrs[PVR_MAX_COLOR_ATTACHMENTS];
};
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props);
pco_shader *pvr_usc_eot(pco_ctx *ctx,
struct pvr_eot_props *props,
const struct pvr_device_info *dev_info);
/* Transfer queue shader generation. */
struct pvr_tq_props {