mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 08:50:13 +01:00
pvr, pco: full support for tile buffer eot handling
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
parent
6d051ce480
commit
a67120cda3
8 changed files with 104 additions and 16 deletions
|
|
@ -2741,7 +2741,7 @@ system_value("front_face_op_pco", 1, bit_sizes=[32])
|
|||
|
||||
system_value("fs_meta_pco", 1, bit_sizes=[32])
|
||||
|
||||
intrinsic("flush_tile_buffer_pco", src_comp=[1, 1])
|
||||
intrinsic("flush_tile_buffer_pco", src_comp=[1, 1], indices=[BASE, RANGE])
|
||||
|
||||
intrinsic("dummy_load_store_pco", indices=[BASE])
|
||||
|
||||
|
|
|
|||
|
|
@ -482,4 +482,23 @@ rogue_max_wg_temps(const struct pvr_device_info *dev_info,
|
|||
|
||||
return temps;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rogue_num_uscs_per_tile(const struct pvr_device_info *dev_info)
|
||||
{
|
||||
if (PVR_HAS_FEATURE(dev_info, tile_per_usc) ||
|
||||
PVR_HAS_FEATURE(dev_info, pbe2_in_xe))
|
||||
return 1;
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
static inline uint32_t rogue_usc_indexed_pixel_output_index_scale(
|
||||
const struct pvr_device_info *dev_info)
|
||||
{
|
||||
if (PVR_HAS_FEATURE(dev_info, tile_per_usc))
|
||||
return 4;
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif /* ROGUE_HW_UTILS_H */
|
||||
|
|
|
|||
|
|
@ -506,7 +506,8 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
|
|||
pco_ref_drc(PCO_DRC_0),
|
||||
pco_ref_imm8(chans),
|
||||
addr_data,
|
||||
cov_mask);
|
||||
cov_mask,
|
||||
.olchk = true);
|
||||
}
|
||||
|
||||
static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx,
|
||||
|
|
@ -537,12 +538,21 @@ static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx,
|
|||
pco_ref idx_reg =
|
||||
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
|
||||
|
||||
pco_mbyp(&tctx->b, idx_reg, pco_zero);
|
||||
unsigned base = nir_intrinsic_base(intr);
|
||||
pco_movi32(&tctx->b, idx_reg, pco_ref_imm32(base));
|
||||
|
||||
pco_ref burst_len = pco_ref_new_ssa32(tctx->func);
|
||||
unsigned range = nir_intrinsic_range(intr);
|
||||
assert(range <= 1024);
|
||||
if (range == 1024)
|
||||
range = 0;
|
||||
|
||||
pco_movi32(&tctx->b, burst_len, pco_ref_imm32(range));
|
||||
|
||||
pco_ref dest = pco_ref_hwreg(0, PCO_REG_CLASS_PIXOUT);
|
||||
dest = pco_ref_hwreg_idx_from(idx_reg_num, dest);
|
||||
|
||||
return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), pco_zero, addr);
|
||||
return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), burst_len, addr);
|
||||
}
|
||||
|
||||
static unsigned fetch_resource_base_reg(const pco_common_data *common,
|
||||
|
|
|
|||
|
|
@ -490,6 +490,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
|||
const uint32_t emit_count,
|
||||
const uint32_t *pbe_cs_words,
|
||||
const unsigned *tile_buffer_ids,
|
||||
unsigned pixel_output_width,
|
||||
struct pvr_pds_upload *const pds_upload_out)
|
||||
{
|
||||
struct pvr_pds_event_program pixel_event_program = {
|
||||
|
|
@ -513,6 +514,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
|||
pco_shader *eot;
|
||||
VkResult result;
|
||||
|
||||
bool has_tile_buffers = false;
|
||||
for (unsigned u = 0; u < emit_count; ++u) {
|
||||
unsigned tile_buffer_id = tile_buffer_ids[u];
|
||||
if (tile_buffer_id == ~0)
|
||||
|
|
@ -521,9 +523,20 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
|||
assert(tile_buffer_id < tile_buffer_state->buffer_count);
|
||||
props.tile_buffer_addrs[u] =
|
||||
tile_buffer_state->buffers[tile_buffer_id]->vma->dev_addr.addr;
|
||||
has_tile_buffers = true;
|
||||
}
|
||||
|
||||
eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props);
|
||||
if (has_tile_buffers) {
|
||||
props.num_output_regs = pixel_output_width;
|
||||
props.msaa_samples =
|
||||
cmd_buffer->vk.dynamic_graphics_state.ms.rasterization_samples;
|
||||
|
||||
if (!props.msaa_samples)
|
||||
props.msaa_samples = 1;
|
||||
}
|
||||
|
||||
eot =
|
||||
pvr_usc_eot(device->pdevice->pco_ctx, &props, &device->pdevice->dev_info);
|
||||
usc_temp_count = pco_shader_data(eot)->common.temps;
|
||||
|
||||
result = pvr_cmd_buffer_upload_usc(cmd_buffer,
|
||||
|
|
@ -1501,11 +1514,17 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
emit_state.pbe_reg_words,
|
||||
sizeof(job->pbe_reg_words));
|
||||
|
||||
unsigned pixel_output_width =
|
||||
pvr_pass_get_pixel_output_width(render_pass_info->pass,
|
||||
sub_cmd->hw_render_idx,
|
||||
dev_info);
|
||||
|
||||
result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
||||
cmd_buffer,
|
||||
emit_state.emit_count,
|
||||
emit_state.pbe_cs_words[0],
|
||||
emit_state.tile_buffer_ids,
|
||||
pixel_output_width,
|
||||
&pds_pixel_event_program);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -1243,7 +1243,9 @@ static VkResult pvr_transfer_eot_shaders_init(struct pvr_device *device,
|
|||
pco_shader *eot;
|
||||
VkResult result;
|
||||
|
||||
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
|
||||
eot = pvr_usc_eot(device->pdevice->pco_ctx,
|
||||
&props,
|
||||
&device->pdevice->dev_info);
|
||||
|
||||
result = pvr_gpu_upload_usc(device,
|
||||
pco_shader_binary_data(eot),
|
||||
|
|
|
|||
|
|
@ -735,7 +735,7 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
.state_words = pbe_state_words[0],
|
||||
};
|
||||
|
||||
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props);
|
||||
eot = pvr_usc_eot(device->pdevice->pco_ctx, &props, dev_info);
|
||||
usc_temp_count = pco_shader_data(eot)->common.temps;
|
||||
|
||||
/* TODO: Create a #define in the compiler code to replace the 16. */
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
* \brief USC internal shader generation.
|
||||
*/
|
||||
|
||||
#include "hwdef/rogue_hw_utils.h"
|
||||
#include "nir/nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
#include "nir/nir_format_convert.h"
|
||||
|
|
@ -75,7 +76,9 @@ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage)
|
|||
* \param props End of tile shader properties.
|
||||
* \return The end-of-tile shader.
|
||||
*/
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx,
|
||||
struct pvr_eot_props *props,
|
||||
const struct pvr_device_info *dev_info)
|
||||
{
|
||||
nir_builder b =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
|
||||
|
|
@ -92,14 +95,44 @@ pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
|
|||
nir_wop_pco(&b);
|
||||
|
||||
if (props->tile_buffer_addrs[u]) {
|
||||
nir_def *tile_buffer_addr_lo =
|
||||
nir_imm_int(&b, props->tile_buffer_addrs[u] & 0xffffffff);
|
||||
nir_def *tile_buffer_addr_hi =
|
||||
nir_imm_int(&b, props->tile_buffer_addrs[u] >> 32);
|
||||
uint64_t tile_buffer_addr = props->tile_buffer_addrs[u];
|
||||
|
||||
nir_flush_tile_buffer_pco(&b,
|
||||
tile_buffer_addr_lo,
|
||||
tile_buffer_addr_hi);
|
||||
unsigned data_size =
|
||||
(PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U) *
|
||||
PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U) *
|
||||
props->num_output_regs) /
|
||||
rogue_num_uscs_per_tile(dev_info);
|
||||
assert(data_size);
|
||||
|
||||
assert(props->msaa_samples);
|
||||
if (props->msaa_samples > 1) {
|
||||
if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe) &&
|
||||
PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U) ==
|
||||
4) {
|
||||
data_size *= props->msaa_samples;
|
||||
} else {
|
||||
data_size *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* We can burst up to 1024 dwords at a time. */
|
||||
unsigned num_loads = DIV_ROUND_UP(data_size, 1024);
|
||||
unsigned scale = rogue_usc_indexed_pixel_output_index_scale(dev_info);
|
||||
for (unsigned l = 0; l < num_loads; ++l) {
|
||||
unsigned offset = l * 1024;
|
||||
unsigned idx_offset = offset / scale;
|
||||
bool last_load = l == (num_loads - 1);
|
||||
unsigned range = last_load ? data_size - offset : 1024;
|
||||
|
||||
nir_flush_tile_buffer_pco(
|
||||
&b,
|
||||
nir_imm_int(&b, tile_buffer_addr & 0xffffffff),
|
||||
nir_imm_int(&b, tile_buffer_addr >> 32),
|
||||
.base = idx_offset,
|
||||
.range = range);
|
||||
|
||||
tile_buffer_addr += 1024 * sizeof(uint32_t);
|
||||
}
|
||||
}
|
||||
|
||||
nir_def *state0;
|
||||
|
|
|
|||
|
|
@ -31,10 +31,15 @@ struct pvr_eot_props {
|
|||
const unsigned *state_regs;
|
||||
};
|
||||
|
||||
unsigned msaa_samples;
|
||||
unsigned num_output_regs;
|
||||
|
||||
uint64_t tile_buffer_addrs[PVR_MAX_COLOR_ATTACHMENTS];
|
||||
};
|
||||
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props);
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx,
|
||||
struct pvr_eot_props *props,
|
||||
const struct pvr_device_info *dev_info);
|
||||
|
||||
/* Transfer queue shader generation. */
|
||||
struct pvr_tq_props {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue