pvr, pco: tile buffer support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-04-03 14:20:51 +01:00 committed by Marge Bot
parent c31e0a7159
commit 297a0c269a
14 changed files with 499 additions and 65 deletions

View file

@ -2740,3 +2740,5 @@ system_value("face_ccw_pco", 1, bit_sizes=[32])
system_value("front_face_op_pco", 1, bit_sizes=[32])
system_value("fs_meta_pco", 1, bit_sizes=[32])
intrinsic("flush_tile_buffer_pco", src_comp=[1, 1])

View file

@ -30,7 +30,10 @@ typedef struct _pco_shader pco_shader;
typedef struct _pco_range {
unsigned start;
unsigned count;
unsigned stride;
union {
unsigned stride;
unsigned offset;
};
} pco_range;
/** PCO vertex shader-specific data. */
@ -71,17 +74,21 @@ typedef struct _pco_fs_data {
/** Results/output mappings. */
pco_range outputs[FRAG_RESULT_MAX];
/** If outputs are to be placed in pixout regs. */
bool output_reg[FRAG_RESULT_MAX];
/** If outputs are to be placed in tile buffers. */
uint8_t output_tile_buffers;
/** Fragment output formats. */
enum pipe_format output_formats[FRAG_RESULT_MAX];
/** On-chip input attachment mappings. */
pco_range ias_onchip[4];
pco_range ias_onchip[8];
/** On-chip input attachment formats. */
enum pipe_format ia_formats[4];
enum pipe_format ia_formats[8];
uint8_t ia_tile_buffers;
unsigned num_tile_buffers;
pco_range meta;
@ -94,6 +101,8 @@ typedef struct _pco_fs_data {
uint16_t rasterization_samples;
pco_range tile_buffers;
struct {
bool w; /** Whether the shader uses pos.w. */
bool z; /** Whether the shader uses pos.z */

View file

@ -1336,6 +1336,19 @@ encode_map(O_LD,
op_ref_maps=[('backend', ['s3'], ['drc', 'imm', ['s0', 's1', 's2', 's3', 's4', 's5']])]
)
encode_map(O_LD_REGBL,
encodings=[
(I_LD_REGBL, [
('drc', ('pco_ref_get_drc', SRC(0))),
('srcseladd', ('pco_ref_srcsel', SRC(2))),
('srcselbl', ('pco_ref_srcsel', SRC(1))),
('cachemode_ld', OM_MCU_CACHE_MODE_LD)
])
],
op_ref_maps=[('backend', ['s3'], ['drc', ['s0', 's1', 's2', 's3', 's4', 's5'], ['s0', 's1', 's2', 's3', 's4', 's5']])]
)
encode_map(O_ST,
encodings=[
(I_ST_IMMBL, [
@ -1350,6 +1363,21 @@ encode_map(O_ST,
op_ref_maps=[('backend', [], [['s0', 's1', 's2', 's3', 's4', 's5'], 'imm', 'drc', 'imm', ['s0', 's1', 's2', 's3', 's4', 's5'], ['s0', 's1', 's2', 's3', 's4', 's5', '_']])]
)
encode_map(O_ST_TILED,
encodings=[
(I_ST_IMMBL_TILED, [
('drc', ('pco_ref_get_drc', SRC(2))),
('srcseladd', ('pco_ref_srcsel', SRC(4))),
('burstlen', ('pco_ref_get_imm', SRC(3))),
('cachemode_st', OM_MCU_CACHE_MODE_ST),
('srcseldata', ('pco_ref_srcsel', SRC(0))),
('dsize', ('pco_ref_get_imm', SRC(1))),
('srcmask', ('pco_ref_srcsel', SRC(5)))
])
],
op_ref_maps=[('backend', [], [['s0', 's1', 's2', 's3', 's4', 's5'], 'imm', 'drc', 'imm', ['s0', 's1', 's2', 's3', 's4', 's5'], ['s0', 's1', 's2', 's3', 's4', 's5', '_']])]
)
encode_map(O_ATOMIC,
encodings=[
(I_ATOMIC, [
@ -2608,6 +2636,25 @@ group_map(O_ST32,
]
)
group_map(O_ST_TILED,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', 1)
]),
enc_ops=[('backend', O_ST_TILED)],
srcs=[
('s[0]', ('backend', SRC(0)), 's0'),
('s[3]', ('backend', SRC(4)), 's3'),
('s[4]', ('backend', SRC(5)), 's4')
]
)
group_map(O_IADD32_ATOMIC,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1'),
@ -2857,6 +2904,25 @@ group_map(O_LD,
]
)
group_map(O_LD_REGBL,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', 1)
]),
enc_ops=[('backend', O_LD_REGBL)],
srcs=[
('s[0]', ('backend', SRC(2)), 's0'),
('s[1]', ('backend', SRC(1)), 's1'),
('s[3]', ('backend', DEST(0)), 's3')
]
)
group_map(O_ATOMIC,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),

View file

@ -183,7 +183,7 @@ lower_image_derefs(nir_builder *b, nir_intrinsic_instr *intr, pco_data *data)
if (ia) {
unsigned ia_idx = var->data.index;
bool onchip = data->fs.ias_onchip[ia_idx].count > 0;
bool onchip = data->fs.ia_formats[ia_idx] != PIPE_FORMAT_NONE;
if (onchip) {
nir_def *elem = array_elem_from_deref(b, deref);

View file

@ -380,7 +380,13 @@ O_FITR = hw_op('fitr', [OM_EXEC_CND, OM_END, OM_ITR_MODE, OM_SAT], 1, 3)
O_FITRP = hw_op('fitrp', [OM_EXEC_CND, OM_END, OM_ITR_MODE, OM_SAT], 1, 4)
O_LD = hw_op('ld', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_LD], 1, 3)
O_LD_REGBL = hw_op('ld.regbl', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_LD], 1, 3)
O_ST = hw_direct_op('st', [OM_MCU_CACHE_MODE_ST], 0, 6)
# { data, data size, drc, chans, addr, cov_msk}
O_ST_TILED = hw_op('st.tiled', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST], 0, 6)
O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2)
O_SMP = hw_op('smp', OM_ALU_RPT1 + [OM_DIM, OM_PROJ, OM_FCNORM, OM_NNCOORDS,

View file

@ -429,6 +429,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
static pco_instr *
trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
{
pco_fs_data *fs_data = &tctx->shader->data.fs;
ASSERTED unsigned base = nir_intrinsic_base(intr);
assert(!base);
@ -440,15 +441,108 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
gl_frag_result location = nir_intrinsic_io_semantics(intr).location;
const pco_range *range = &tctx->shader->data.fs.outputs[location];
const pco_range *range = &fs_data->outputs[location];
assert(component < range->count);
ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location];
assert(output_reg);
/* TODO: tile buffer support. */
unsigned idx = location - FRAG_RESULT_DATA0;
bool tile_buffer = fs_data->output_tile_buffers & BITFIELD_BIT(idx);
if (!tile_buffer) {
pco_ref dest =
pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
return pco_mov(&tctx->b, dest, src, .olchk = true);
}
pco_ref dest = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
return pco_mov(&tctx->b, dest, src, .olchk = true);
unsigned tile_buffer_id = range->start;
pco_range *tile_buffers = &fs_data->tile_buffers;
assert(tile_buffer_id < (tile_buffers->count / tile_buffers->stride));
unsigned sh_index =
tile_buffers->start + tile_buffer_id * tile_buffers->stride;
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
pco_ref addr_data_comps[3] = {
[2] = src,
};
pco_ref_new_ssa_addr_comps(tctx->func, addr_data_comps);
component += range->offset;
assert(component < 8);
unsigned sr_index = component < 4 ? component + PCO_SR_TILED_ST_COMP0
: component + PCO_SR_TILED_ST_COMP4 - 4;
pco_ref tiled_offset = pco_ref_hwreg(sr_index, PCO_REG_CLASS_SPEC);
pco_add64_32(&tctx->b,
addr_data_comps[0],
addr_data_comps[1],
base_addr[0],
base_addr[1],
tiled_offset,
pco_ref_null(),
.olchk = true,
.s = true);
unsigned chans = pco_ref_get_chans(src);
pco_ref addr_data = pco_ref_new_ssa_addr_data(tctx->func, chans);
pco_vec(&tctx->b, addr_data, ARRAY_SIZE(addr_data_comps), addr_data_comps);
pco_ref data_comp =
pco_ref_new_ssa(tctx->func, pco_ref_get_bits(src), chans);
pco_comp(&tctx->b, data_comp, addr_data, pco_ref_val16(2));
pco_ref cov_mask = pco_ref_new_ssa32(tctx->func);
pco_ref sample_id = pco_ref_hwreg(PCO_SR_SAMP_NUM, PCO_REG_CLASS_SPEC);
pco_shift(&tctx->b,
cov_mask,
pco_one,
sample_id,
pco_ref_null(),
.shiftop = PCO_SHIFTOP_LSL);
return pco_st_tiled(&tctx->b,
data_comp,
pco_ref_imm8(PCO_DSIZE_32BIT),
pco_ref_drc(PCO_DRC_0),
pco_ref_imm8(chans),
addr_data,
cov_mask);
}
static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref src_addr_lo,
pco_ref src_addr_hi)
{
pco_ref addr_comps[2];
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps);
pco_ref tiled_offset =
pco_ref_hwreg(PCO_SR_TILED_LD_COMP0, PCO_REG_CLASS_SPEC);
pco_add64_32(&tctx->b,
addr_comps[0],
addr_comps[1],
src_addr_lo,
src_addr_hi,
tiled_offset,
pco_ref_null(),
.olchk = true,
.s = true);
pco_ref addr = pco_ref_new_ssa_addr(tctx->func);
pco_vec(&tctx->b, addr, ARRAY_SIZE(addr_comps), addr_comps);
unsigned idx_reg_num = 0;
pco_ref idx_reg =
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
pco_mbyp(&tctx->b, idx_reg, pco_zero);
pco_ref dest = pco_ref_hwreg(0, PCO_REG_CLASS_PIXOUT);
dest = pco_ref_hwreg_idx_from(idx_reg_num, dest);
return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), pco_zero, addr);
}
static unsigned fetch_resource_base_reg(const pco_common_data *common,
@ -515,7 +609,10 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common,
static pco_instr *
trans_load_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
{
pco_fs_data *fs_data = &tctx->shader->data.fs;
unsigned base = nir_intrinsic_base(intr);
assert(pco_ref_is_scalar(dest));
unsigned component = nir_intrinsic_component(intr);
ASSERTED const nir_src offset = intr->src[0];
@ -524,26 +621,67 @@ trans_load_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
gl_frag_result location = nir_intrinsic_io_semantics(intr).location;
const pco_range *range;
bool tile_buffer;
if (location >= FRAG_RESULT_DATA0) {
assert(!base);
range = &tctx->shader->data.fs.outputs[location];
ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location];
assert(output_reg);
/* TODO: tile buffer support. */
unsigned idx = location - FRAG_RESULT_DATA0;
tile_buffer = fs_data->output_tile_buffers & BITFIELD_BIT(idx);
} else if (location == FRAG_RESULT_COLOR) {
/* Special case for on-chip input attachments. */
assert(base < ARRAY_SIZE(tctx->shader->data.fs.ias_onchip));
range = &tctx->shader->data.fs.ias_onchip[base];
tile_buffer = fs_data->ia_tile_buffers & BITFIELD_BIT(base);
} else {
UNREACHABLE("");
}
assert(component < range->count);
pco_ref src = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
return pco_mov(&tctx->b, dest, src, .olchk = true);
if (!tile_buffer) {
pco_ref src =
pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
return pco_mov(&tctx->b, dest, src, .olchk = true);
}
unsigned tile_buffer_id = range->start;
pco_range *tile_buffers = &fs_data->tile_buffers;
assert(tile_buffer_id < (tile_buffers->count / tile_buffers->stride));
unsigned sh_index =
tile_buffers->start + tile_buffer_id * tile_buffers->stride;
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
pco_ref addr_comps[2];
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps);
component += range->offset;
assert(component < 8);
unsigned sr_index = component < 4 ? component + PCO_SR_TILED_LD_COMP0
: component + PCO_SR_TILED_LD_COMP4 - 4;
pco_ref tiled_offset = pco_ref_hwreg(sr_index, PCO_REG_CLASS_SPEC);
pco_add64_32(&tctx->b,
addr_comps[0],
addr_comps[1],
base_addr[0],
base_addr[1],
tiled_offset,
pco_ref_null(),
.olchk = true,
.s = true);
pco_ref addr = pco_ref_new_ssa_addr(tctx->func);
pco_vec(&tctx->b, addr, ARRAY_SIZE(addr_comps), addr_comps);
unsigned chans = pco_ref_get_chans(dest);
return pco_ld(&tctx->b,
dest,
pco_ref_drc(PCO_DRC_0),
pco_ref_imm8(chans),
addr);
}
static pco_instr *trans_load_common_store(trans_ctx *tctx,
@ -1295,6 +1433,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
instr = trans_load_output_fs(tctx, intr, dest);
break;
case nir_intrinsic_flush_tile_buffer_pco:
assert(tctx->stage == MESA_SHADER_FRAGMENT);
instr = trans_flush_tile_buffer(tctx, intr, src[0], src[1]);
break;
case nir_intrinsic_load_preamble:
instr = pco_mov(&tctx->b,
dest,

View file

@ -900,6 +900,7 @@ struct pvr_pds_descriptor_set {
#define PVR_BUFFER_TYPE_IA_SAMPLER (8)
#define PVR_BUFFER_TYPE_FRONT_FACE_OP (9)
#define PVR_BUFFER_TYPE_FS_META (10)
#define PVR_BUFFER_TYPE_TILE_BUFFERS (11)
#define PVR_BUFFER_TYPE_INVALID (~0)
struct pvr_pds_buffer {

View file

@ -1580,7 +1580,8 @@ void pvr_pds_generate_descriptor_upload_program(
case PVR_BUFFER_TYPE_POINT_SAMPLER:
case PVR_BUFFER_TYPE_IA_SAMPLER:
case PVR_BUFFER_TYPE_FRONT_FACE_OP:
case PVR_BUFFER_TYPE_FS_META: {
case PVR_BUFFER_TYPE_FS_META:
case PVR_BUFFER_TYPE_TILE_BUFFERS: {
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
special_buffer_entry =

View file

@ -489,6 +489,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
struct pvr_cmd_buffer *const cmd_buffer,
const uint32_t emit_count,
const uint32_t *pbe_cs_words,
const unsigned *tile_buffer_ids,
struct pvr_pds_upload *const pds_upload_out)
{
struct pvr_pds_event_program pixel_event_program = {
@ -499,6 +500,8 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
PVR_DW_TO_BYTES(cmd_buffer->device->pixel_event_data_size_in_dwords);
const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
struct pvr_device *const device = cmd_buffer->device;
const struct pvr_device_tile_buffer_state *tile_buffer_state =
&device->tile_buffer_state;
struct pvr_suballoc_bo *usc_eot_program = NULL;
struct pvr_eot_props props = {
.emit_count = emit_count,
@ -510,6 +513,16 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
pco_shader *eot;
VkResult result;
for (unsigned u = 0; u < emit_count; ++u) {
unsigned tile_buffer_id = tile_buffer_ids[u];
if (tile_buffer_id == ~0)
continue;
assert(tile_buffer_id < tile_buffer_state->buffer_count);
props.tile_buffer_addrs[u] =
tile_buffer_state->buffers[tile_buffer_id]->vma->dev_addr.addr;
}
eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props);
usc_temp_count = pco_shader_data(eot)->common.temps;
@ -778,6 +791,12 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
buffer_size +=
texture_count * sizeof(struct pvr_combined_image_sampler_descriptor);
unsigned tile_buffer_offset = buffer_size;
buffer_size += load_op->num_tile_buffers * sizeof(uint64_t);
assert(!(buffer_size % sizeof(uint32_t)));
assert(buffer_size / sizeof(uint32_t) == load_op->shareds_count);
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
cmd_buffer->device->heaps.general_heap,
buffer_size,
@ -794,6 +813,20 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
texture_states,
texture_count * sizeof(struct pvr_combined_image_sampler_descriptor));
struct pvr_device *const device = cmd_buffer->device;
const struct pvr_device_tile_buffer_state *tile_buffer_state =
&device->tile_buffer_state;
uint32_t *tile_buffers = (uint32_t *)&buffer[tile_buffer_offset];
for (unsigned u = 0; u < load_op->num_tile_buffers; ++u) {
assert(u < tile_buffer_state->buffer_count);
uint64_t tile_buffer_addr =
tile_buffer_state->buffers[u]->vma->dev_addr.addr;
tile_buffers[2 * u] = tile_buffer_addr & 0xffffffff;
tile_buffers[2 * u + 1] = tile_buffer_addr >> 32;
}
*addr_out = clear_bo->dev_addr;
return VK_SUCCESS;
@ -1313,6 +1346,8 @@ struct pvr_emit_state {
uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
[ROGUE_NUM_PBESTATE_REG_WORDS];
unsigned tile_buffer_ids[PVR_MAX_COLOR_ATTACHMENTS];
uint32_t emit_count;
};
@ -1375,6 +1410,11 @@ pvr_setup_emit_state(const struct pvr_device_info *dev_info,
assert(emit_state->emit_count < ARRAY_SIZE(emit_state->pbe_cs_words));
assert(emit_state->emit_count < ARRAY_SIZE(emit_state->pbe_reg_words));
emit_state->tile_buffer_ids[emit_state->emit_count] =
mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY
? mrt_resource->mem.tile_buffer
: ~0;
pvr_setup_pbe_state(dev_info,
framebuffer,
emit_state->emit_count,
@ -1451,6 +1491,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
.pixel_event_program_data_offset;
} else {
struct pvr_emit_state emit_state = { 0 };
memset(emit_state.tile_buffer_ids,
~0,
sizeof(emit_state.tile_buffer_ids));
pvr_setup_emit_state(dev_info, hw_render, render_pass_info, &emit_state);
@ -1462,6 +1505,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
cmd_buffer,
emit_state.emit_count,
emit_state.pbe_cs_words[0],
emit_state.tile_buffer_ids,
&pds_pixel_event_program);
if (result != VK_SUCCESS)
return result;
@ -3870,6 +3914,44 @@ static VkResult pvr_setup_descriptor_mappings(
break;
}
case PVR_BUFFER_TYPE_TILE_BUFFERS: {
const struct pvr_device_tile_buffer_state *tile_buffer_state =
&cmd_buffer->device->tile_buffer_state;
const struct pvr_graphics_pipeline *const gfx_pipeline =
cmd_buffer->state.gfx_pipeline;
const pco_data *const fs_data = &gfx_pipeline->fs_data;
unsigned num_tile_buffers =
fs_data->fs.tile_buffers.count / fs_data->fs.tile_buffers.stride;
uint32_t tile_buffer_addrs[PVR_MAX_TILE_BUFFER_COUNT * 2];
for (unsigned u = 0; u < num_tile_buffers; ++u) {
assert(u < tile_buffer_state->buffer_count);
uint64_t tile_buffer_addr =
tile_buffer_state->buffers[u]->vma->dev_addr.addr;
tile_buffer_addrs[2 * u] = tile_buffer_addr & 0xffffffff;
tile_buffer_addrs[2 * u + 1] = tile_buffer_addr >> 32;
}
struct pvr_suballoc_bo *tile_buffer_bo;
result = pvr_cmd_buffer_upload_general(cmd_buffer,
&tile_buffer_addrs,
num_tile_buffers *
sizeof(uint64_t),
&tile_buffer_bo);
if (result != VK_SUCCESS)
return result;
PVR_WRITE(qword_buffer,
tile_buffer_bo->dev_addr.addr,
special_buff_entry->const_offset,
pds_info->data_size_in_dwords);
break;
}
default:
UNREACHABLE("Unsupported special buffer type.");
}

View file

@ -621,6 +621,14 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
};
}
if (stage == MESA_SHADER_FRAGMENT && data->fs.tile_buffers.count > 0) {
program.buffers[program.buffer_count++] = (struct pvr_pds_buffer){
.type = PVR_BUFFER_TYPE_TILE_BUFFERS,
.size_in_dwords = data->fs.tile_buffers.count,
.destination = data->fs.tile_buffers.start,
};
}
pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
@ -1844,21 +1852,45 @@ pvr_init_fs_outputs(pco_data *data,
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
{
unsigned u;
pco_fs_data *fs = &data->fs;
for (u = 0; u < subpass->color_count; ++u) {
unsigned idx = subpass->color_attachments[u];
const struct usc_mrt_resource *mrt_resource;
bool tile_buffer;
if (idx == VK_ATTACHMENT_UNUSED)
continue;
gl_frag_result location = FRAG_RESULT_DATA0 + u;
VkFormat vk_format = pass->attachments[idx].vk_format;
data->fs.output_formats[location] = vk_format_to_pipe_format(vk_format);
fs->output_formats[location] = vk_format_to_pipe_format(vk_format);
mrt_resource = &hw_subpass->setup.mrt_resources[u];
tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
if (tile_buffer) {
fs->num_tile_buffers =
MAX2(fs->num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
fs->output_tile_buffers |= BITFIELD_BIT(u);
}
}
data->fs.z_replicate = ~0u;
fs->z_replicate = ~0u;
if (hw_subpass->z_replicate >= 0) {
gl_frag_result location = FRAG_RESULT_DATA0 + u;
data->fs.output_formats[location] = PIPE_FORMAT_R32_FLOAT;
data->fs.z_replicate = location;
const struct usc_mrt_resource *mrt_resource =
&hw_subpass->setup.mrt_resources[u];
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
fs->output_formats[location] = PIPE_FORMAT_R32_FLOAT;
fs->z_replicate = location;
if (tile_buffer) {
fs->num_tile_buffers =
MAX2(fs->num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
fs->output_tile_buffers |= BITFIELD_BIT(u);
}
}
}
@ -1869,13 +1901,14 @@ pvr_setup_fs_outputs(pco_data *data,
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
{
uint64_t outputs_written = nir->info.outputs_written;
pco_fs_data *fs = &data->fs;
unsigned u;
for (u = 0; u < subpass->color_count; ++u) {
gl_frag_result location = FRAG_RESULT_DATA0 + u;
unsigned idx = subpass->color_attachments[u];
const struct usc_mrt_resource *mrt_resource;
ASSERTED bool output_reg;
bool tile_buffer;
nir_variable *var;
if (idx == VK_ATTACHMENT_UNUSED)
@ -1886,16 +1919,16 @@ pvr_setup_fs_outputs(pco_data *data,
continue;
mrt_resource = &hw_subpass->setup.mrt_resources[u];
tile_buffer = fs->output_tile_buffers & BITFIELD_BIT(u);
/* TODO: tile buffer support. */
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(output_reg);
set_var(data->fs.outputs,
mrt_resource->reg.output_reg,
set_var(fs->outputs,
tile_buffer ? mrt_resource->mem.tile_buffer
: mrt_resource->reg.output_reg,
var,
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)));
data->fs.output_reg[location] = output_reg;
if (tile_buffer)
fs->outputs[location].offset = mrt_resource->mem.offset_dw;
outputs_written &= ~BITFIELD64_BIT(location);
}
@ -1904,21 +1937,20 @@ pvr_setup_fs_outputs(pco_data *data,
const struct usc_mrt_resource *mrt_resource =
&hw_subpass->setup.mrt_resources[hw_subpass->z_replicate];
gl_frag_result location = FRAG_RESULT_DATA0 + u;
ASSERTED bool output_reg;
nir_variable *var;
var = nir_find_variable_with_location(nir, nir_var_shader_out, location);
nir_variable *var =
nir_find_variable_with_location(nir, nir_var_shader_out, location);
if (var) {
/* TODO: tile buffer support. */
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(output_reg);
bool tile_buffer = fs->output_tile_buffers & BITFIELD_BIT(u);
set_var(data->fs.outputs,
mrt_resource->reg.output_reg,
set_var(fs->outputs,
tile_buffer ? mrt_resource->mem.tile_buffer
: mrt_resource->reg.output_reg,
var,
DIV_ROUND_UP(mrt_resource->intermediate_size,
sizeof(uint32_t)));
data->fs.output_reg[location] = output_reg;
if (tile_buffer)
fs->outputs[location].offset = mrt_resource->mem.offset_dw;
outputs_written &= ~BITFIELD64_BIT(location);
}
@ -1933,6 +1965,7 @@ static void pvr_init_fs_input_attachments(
const struct pvr_render_subpass *const subpass,
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
{
pco_fs_data *fs = &data->fs;
for (unsigned u = 0; u < subpass->input_count; ++u) {
unsigned idx = subpass->input_attachments[u];
if (idx == VK_ATTACHMENT_UNUSED)
@ -1949,22 +1982,20 @@ static void pvr_init_fs_input_attachments(
vk_format = VK_FORMAT_R32_SFLOAT;
}
data->fs.ia_formats[u] = vk_format_to_pipe_format(vk_format);
fs->ia_formats[u] = vk_format_to_pipe_format(vk_format);
assert(fs->ia_formats[u] != PIPE_FORMAT_NONE);
unsigned mrt_idx = hw_subpass->input_access[u].on_chip_rt;
const struct usc_mrt_resource *mrt_resource =
&hw_subpass->setup.mrt_resources[mrt_idx];
ASSERTED bool output_reg = mrt_resource->type ==
USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(output_reg);
/* TODO: tile buffer support. */
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
data->fs.ias_onchip[u] = (pco_range){
.start = mrt_resource->reg.output_reg,
.count =
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)),
};
if (tile_buffer) {
fs->num_tile_buffers =
MAX2(fs->num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
fs->ia_tile_buffers |= BITFIELD_BIT(u);
}
}
}
@ -2019,7 +2050,33 @@ static void pvr_setup_fs_input_attachments(
const struct pvr_render_subpass *const subpass,
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
{
/* pvr_finishme("pvr_setup_fs_input_attachments"); */
pco_fs_data *fs = &data->fs;
for (unsigned u = 0; u < subpass->input_count; ++u) {
unsigned idx = subpass->input_attachments[u];
if (idx == VK_ATTACHMENT_UNUSED)
continue;
bool onchip = hw_subpass->input_access[u].type !=
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
if (!onchip)
continue;
unsigned mrt_idx = hw_subpass->input_access[u].on_chip_rt;
const struct usc_mrt_resource *mrt_resource =
&hw_subpass->setup.mrt_resources[mrt_idx];
bool tile_buffer = fs->ia_tile_buffers & BITFIELD_BIT(u);
fs->ias_onchip[u] = (pco_range){
.start = tile_buffer ? mrt_resource->mem.tile_buffer
: mrt_resource->reg.output_reg,
.count =
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)),
};
if (tile_buffer)
fs->ias_onchip[u].offset = mrt_resource->mem.offset_dw;
}
}
static void pvr_setup_fs_blend(pco_data *data)
@ -2036,6 +2093,29 @@ static void pvr_setup_fs_blend(pco_data *data)
data->common.shareds += num_blend_consts;
}
static void pvr_init_fs_tile_buffers(pco_data *data)
{
if (!data->fs.num_tile_buffers)
return;
unsigned tile_buffer_addr_dwords =
data->fs.num_tile_buffers * (sizeof(uint64_t) / sizeof(uint32_t));
data->fs.tile_buffers = (pco_range){
.count = tile_buffer_addr_dwords,
.stride = sizeof(uint64_t) / sizeof(uint32_t),
};
}
static void pvr_setup_fs_tile_buffers(pco_data *data)
{
if (!data->fs.tile_buffers.count)
return;
data->fs.tile_buffers.start = data->common.shareds;
data->common.shareds += data->fs.tile_buffers.count;
}
static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir)
{
BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
@ -2317,6 +2397,7 @@ pvr_preprocess_shader_data(pco_data *data,
pvr_init_fs_outputs(data, pass, subpass, hw_subpass);
pvr_init_fs_input_attachments(data, pass, subpass, hw_subpass);
pvr_init_fs_blend(data, state->cb);
pvr_init_fs_tile_buffers(data);
if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_MASK) ||
(state->ms && state->ms->sample_mask != 0xffff)) {
@ -2374,6 +2455,7 @@ static void pvr_postprocess_shader_data(pco_data *data,
pvr_setup_fs_outputs(data, nir, subpass, hw_subpass);
pvr_setup_fs_input_attachments(data, nir, subpass, hw_subpass);
pvr_setup_fs_blend(data);
pvr_setup_fs_tile_buffers(data);
/* TODO: push consts, blend consts, dynamic state, etc. */
break;

View file

@ -1123,6 +1123,7 @@ struct pvr_load_op {
struct pvr_suballoc_bo *usc_frag_prog_bo;
uint32_t const_shareds_count;
uint32_t shareds_count;
uint32_t num_tile_buffers;
struct pvr_pds_upload pds_frag_prog;

View file

@ -707,6 +707,7 @@ pvr_spm_init_eot_state(struct pvr_device *device,
/* Store off-chip tile data (i.e. tile buffers). */
for (uint32_t i = 0; i < hw_render->tile_buffers_count; i++) {
continue;
assert(!"Add support for tile buffers in EOT");
pvr_finishme("Add support for tile buffers in EOT");

View file

@ -91,6 +91,17 @@ pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
if (u > 0)
nir_wop_pco(&b);
if (props->tile_buffer_addrs[u]) {
nir_def *tile_buffer_addr_lo =
nir_imm_int(&b, props->tile_buffer_addrs[u] & 0xffffffff);
nir_def *tile_buffer_addr_hi =
nir_imm_int(&b, props->tile_buffer_addrs[u] >> 32);
nir_flush_tile_buffer_pco(&b,
tile_buffer_addr_lo,
tile_buffer_addr_hi);
}
nir_def *state0;
nir_def *state1;
if (props->shared_words) {
@ -860,7 +871,6 @@ pco_shader *pvr_uscgen_tq(pco_ctx *ctx,
.start = 0,
.count = pixel_size,
};
data.fs.output_reg[FRAG_RESULT_DATA0] = true;
nir_def *loaded_data;
nir_def *coords =
@ -1024,15 +1034,21 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
}
struct usc_mrt_resource *mrt_resource = &mrt_setup->mrt_resources[rt_idx];
/* TODO: tile buffer support */
assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG);
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
data.fs.outputs[FRAG_RESULT_DATA0 + rt_idx] = (pco_range){
.start = mrt_resource->reg.output_reg,
.start = tile_buffer ? mrt_resource->mem.tile_buffer
: mrt_resource->reg.output_reg,
.count = accum_size_dwords,
};
data.fs.output_reg[FRAG_RESULT_DATA0 + rt_idx] = true;
if (tile_buffer) {
data.fs.num_tile_buffers =
MAX2(data.fs.num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
data.fs.output_tile_buffers |= BITFIELD_BIT(rt_idx);
data.fs.outputs[FRAG_RESULT_DATA0 + rt_idx].offset =
mrt_resource->mem.offset_dw;
}
nir_create_variable_with_location(b.shader,
nir_var_shader_out,
@ -1064,11 +1080,11 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
struct usc_mrt_resource *mrt_resource =
&mrt_setup->mrt_resources[depth_idx];
/* TODO: tile buffer support */
assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG);
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
assert(DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)) ==
1);
unsigned accum_size_dwords =
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t));
assert(accum_size_dwords == 1);
data.fs.output_formats[FRAG_RESULT_DATA0 + depth_idx] =
PIPE_FORMAT_R32_FLOAT;
@ -1076,11 +1092,18 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
const glsl_type *type = glsl_float_type();
data.fs.outputs[FRAG_RESULT_DATA0 + depth_idx] = (pco_range){
.start = mrt_resource->reg.output_reg,
.count = 1,
.start = tile_buffer ? mrt_resource->mem.tile_buffer
: mrt_resource->reg.output_reg,
.count = accum_size_dwords,
};
data.fs.output_reg[FRAG_RESULT_DATA0 + depth_idx] = true;
if (tile_buffer) {
data.fs.num_tile_buffers =
MAX2(data.fs.num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
data.fs.output_tile_buffers |= BITFIELD_BIT(depth_idx);
data.fs.outputs[FRAG_RESULT_DATA0 + depth_idx].offset =
mrt_resource->mem.offset_dw;
}
nir_create_variable_with_location(b.shader,
nir_var_shader_out,
@ -1159,6 +1182,21 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
}
}
if (data.fs.num_tile_buffers > 0) {
unsigned tile_buffer_addr_dwords =
data.fs.num_tile_buffers * (sizeof(uint64_t) / sizeof(uint32_t));
data.fs.tile_buffers = (pco_range){
.start = shared_regs,
.count = tile_buffer_addr_dwords,
.stride = sizeof(uint64_t) / sizeof(uint32_t),
};
shared_regs += tile_buffer_addr_dwords;
load_op->num_tile_buffers = data.fs.num_tile_buffers;
}
nir_jump(&b, nir_jump_return);
load_op->const_shareds_count = shared_regs;

View file

@ -30,6 +30,8 @@ struct pvr_eot_props {
const uint32_t *state_words;
const unsigned *state_regs;
};
uint64_t tile_buffer_addrs[PVR_MAX_COLOR_ATTACHMENTS];
};
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props);