mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
pvr, pco: tile buffer support
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
parent
c31e0a7159
commit
297a0c269a
14 changed files with 499 additions and 65 deletions
|
|
@ -2740,3 +2740,5 @@ system_value("face_ccw_pco", 1, bit_sizes=[32])
|
|||
system_value("front_face_op_pco", 1, bit_sizes=[32])
|
||||
|
||||
system_value("fs_meta_pco", 1, bit_sizes=[32])
|
||||
|
||||
intrinsic("flush_tile_buffer_pco", src_comp=[1, 1])
|
||||
|
|
|
|||
|
|
@ -30,7 +30,10 @@ typedef struct _pco_shader pco_shader;
|
|||
typedef struct _pco_range {
|
||||
unsigned start;
|
||||
unsigned count;
|
||||
unsigned stride;
|
||||
union {
|
||||
unsigned stride;
|
||||
unsigned offset;
|
||||
};
|
||||
} pco_range;
|
||||
|
||||
/** PCO vertex shader-specific data. */
|
||||
|
|
@ -71,17 +74,21 @@ typedef struct _pco_fs_data {
|
|||
/** Results/output mappings. */
|
||||
pco_range outputs[FRAG_RESULT_MAX];
|
||||
|
||||
/** If outputs are to be placed in pixout regs. */
|
||||
bool output_reg[FRAG_RESULT_MAX];
|
||||
/** If outputs are to be placed in tile buffers. */
|
||||
uint8_t output_tile_buffers;
|
||||
|
||||
/** Fragment output formats. */
|
||||
enum pipe_format output_formats[FRAG_RESULT_MAX];
|
||||
|
||||
/** On-chip input attachment mappings. */
|
||||
pco_range ias_onchip[4];
|
||||
pco_range ias_onchip[8];
|
||||
|
||||
/** On-chip input attachment formats. */
|
||||
enum pipe_format ia_formats[4];
|
||||
enum pipe_format ia_formats[8];
|
||||
|
||||
uint8_t ia_tile_buffers;
|
||||
|
||||
unsigned num_tile_buffers;
|
||||
|
||||
pco_range meta;
|
||||
|
||||
|
|
@ -94,6 +101,8 @@ typedef struct _pco_fs_data {
|
|||
|
||||
uint16_t rasterization_samples;
|
||||
|
||||
pco_range tile_buffers;
|
||||
|
||||
struct {
|
||||
bool w; /** Whether the shader uses pos.w. */
|
||||
bool z; /** Whether the shader uses pos.z */
|
||||
|
|
|
|||
|
|
@ -1336,6 +1336,19 @@ encode_map(O_LD,
|
|||
op_ref_maps=[('backend', ['s3'], ['drc', 'imm', ['s0', 's1', 's2', 's3', 's4', 's5']])]
|
||||
)
|
||||
|
||||
encode_map(O_LD_REGBL,
|
||||
encodings=[
|
||||
(I_LD_REGBL, [
|
||||
('drc', ('pco_ref_get_drc', SRC(0))),
|
||||
|
||||
('srcseladd', ('pco_ref_srcsel', SRC(2))),
|
||||
('srcselbl', ('pco_ref_srcsel', SRC(1))),
|
||||
('cachemode_ld', OM_MCU_CACHE_MODE_LD)
|
||||
])
|
||||
],
|
||||
op_ref_maps=[('backend', ['s3'], ['drc', ['s0', 's1', 's2', 's3', 's4', 's5'], ['s0', 's1', 's2', 's3', 's4', 's5']])]
|
||||
)
|
||||
|
||||
encode_map(O_ST,
|
||||
encodings=[
|
||||
(I_ST_IMMBL, [
|
||||
|
|
@ -1350,6 +1363,21 @@ encode_map(O_ST,
|
|||
op_ref_maps=[('backend', [], [['s0', 's1', 's2', 's3', 's4', 's5'], 'imm', 'drc', 'imm', ['s0', 's1', 's2', 's3', 's4', 's5'], ['s0', 's1', 's2', 's3', 's4', 's5', '_']])]
|
||||
)
|
||||
|
||||
encode_map(O_ST_TILED,
|
||||
encodings=[
|
||||
(I_ST_IMMBL_TILED, [
|
||||
('drc', ('pco_ref_get_drc', SRC(2))),
|
||||
('srcseladd', ('pco_ref_srcsel', SRC(4))),
|
||||
('burstlen', ('pco_ref_get_imm', SRC(3))),
|
||||
('cachemode_st', OM_MCU_CACHE_MODE_ST),
|
||||
('srcseldata', ('pco_ref_srcsel', SRC(0))),
|
||||
('dsize', ('pco_ref_get_imm', SRC(1))),
|
||||
('srcmask', ('pco_ref_srcsel', SRC(5)))
|
||||
])
|
||||
],
|
||||
op_ref_maps=[('backend', [], [['s0', 's1', 's2', 's3', 's4', 's5'], 'imm', 'drc', 'imm', ['s0', 's1', 's2', 's3', 's4', 's5'], ['s0', 's1', 's2', 's3', 's4', 's5', '_']])]
|
||||
)
|
||||
|
||||
encode_map(O_ATOMIC,
|
||||
encodings=[
|
||||
(I_ATOMIC, [
|
||||
|
|
@ -2608,6 +2636,25 @@ group_map(O_ST32,
|
|||
]
|
||||
)
|
||||
|
||||
group_map(O_ST_TILED,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'be'),
|
||||
('olchk', OM_OLCHK),
|
||||
('w1p', False),
|
||||
('w0p', False),
|
||||
('cc', OM_EXEC_CND),
|
||||
('end', OM_END),
|
||||
('atom', OM_ATOM),
|
||||
('rpt', 1)
|
||||
]),
|
||||
enc_ops=[('backend', O_ST_TILED)],
|
||||
srcs=[
|
||||
('s[0]', ('backend', SRC(0)), 's0'),
|
||||
('s[3]', ('backend', SRC(4)), 's3'),
|
||||
('s[4]', ('backend', SRC(5)), 's4')
|
||||
]
|
||||
)
|
||||
|
||||
group_map(O_IADD32_ATOMIC,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'p0_p1'),
|
||||
|
|
@ -2857,6 +2904,25 @@ group_map(O_LD,
|
|||
]
|
||||
)
|
||||
|
||||
group_map(O_LD_REGBL,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'be'),
|
||||
('olchk', OM_OLCHK),
|
||||
('w1p', False),
|
||||
('w0p', False),
|
||||
('cc', OM_EXEC_CND),
|
||||
('end', OM_END),
|
||||
('atom', OM_ATOM),
|
||||
('rpt', 1)
|
||||
]),
|
||||
enc_ops=[('backend', O_LD_REGBL)],
|
||||
srcs=[
|
||||
('s[0]', ('backend', SRC(2)), 's0'),
|
||||
('s[1]', ('backend', SRC(1)), 's1'),
|
||||
('s[3]', ('backend', DEST(0)), 's3')
|
||||
]
|
||||
)
|
||||
|
||||
group_map(O_ATOMIC,
|
||||
hdr=(I_IGRP_HDR_MAIN, [
|
||||
('oporg', 'be'),
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ lower_image_derefs(nir_builder *b, nir_intrinsic_instr *intr, pco_data *data)
|
|||
|
||||
if (ia) {
|
||||
unsigned ia_idx = var->data.index;
|
||||
bool onchip = data->fs.ias_onchip[ia_idx].count > 0;
|
||||
bool onchip = data->fs.ia_formats[ia_idx] != PIPE_FORMAT_NONE;
|
||||
|
||||
if (onchip) {
|
||||
nir_def *elem = array_elem_from_deref(b, deref);
|
||||
|
|
|
|||
|
|
@ -380,7 +380,13 @@ O_FITR = hw_op('fitr', [OM_EXEC_CND, OM_END, OM_ITR_MODE, OM_SAT], 1, 3)
|
|||
O_FITRP = hw_op('fitrp', [OM_EXEC_CND, OM_END, OM_ITR_MODE, OM_SAT], 1, 4)
|
||||
|
||||
O_LD = hw_op('ld', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_LD], 1, 3)
|
||||
O_LD_REGBL = hw_op('ld.regbl', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_LD], 1, 3)
|
||||
|
||||
O_ST = hw_direct_op('st', [OM_MCU_CACHE_MODE_ST], 0, 6)
|
||||
|
||||
# { data, data size, drc, chans, addr, cov_msk}
|
||||
O_ST_TILED = hw_op('st.tiled', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST], 0, 6)
|
||||
|
||||
O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2)
|
||||
|
||||
O_SMP = hw_op('smp', OM_ALU_RPT1 + [OM_DIM, OM_PROJ, OM_FCNORM, OM_NNCOORDS,
|
||||
|
|
|
|||
|
|
@ -429,6 +429,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
static pco_instr *
|
||||
trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
|
||||
{
|
||||
pco_fs_data *fs_data = &tctx->shader->data.fs;
|
||||
ASSERTED unsigned base = nir_intrinsic_base(intr);
|
||||
assert(!base);
|
||||
|
||||
|
|
@ -440,15 +441,108 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
|
|||
|
||||
gl_frag_result location = nir_intrinsic_io_semantics(intr).location;
|
||||
|
||||
const pco_range *range = &tctx->shader->data.fs.outputs[location];
|
||||
const pco_range *range = &fs_data->outputs[location];
|
||||
assert(component < range->count);
|
||||
|
||||
ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location];
|
||||
assert(output_reg);
|
||||
/* TODO: tile buffer support. */
|
||||
unsigned idx = location - FRAG_RESULT_DATA0;
|
||||
bool tile_buffer = fs_data->output_tile_buffers & BITFIELD_BIT(idx);
|
||||
if (!tile_buffer) {
|
||||
pco_ref dest =
|
||||
pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
|
||||
return pco_mov(&tctx->b, dest, src, .olchk = true);
|
||||
}
|
||||
|
||||
pco_ref dest = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
|
||||
return pco_mov(&tctx->b, dest, src, .olchk = true);
|
||||
unsigned tile_buffer_id = range->start;
|
||||
pco_range *tile_buffers = &fs_data->tile_buffers;
|
||||
assert(tile_buffer_id < (tile_buffers->count / tile_buffers->stride));
|
||||
unsigned sh_index =
|
||||
tile_buffers->start + tile_buffer_id * tile_buffers->stride;
|
||||
|
||||
pco_ref base_addr[2];
|
||||
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
|
||||
|
||||
pco_ref addr_data_comps[3] = {
|
||||
[2] = src,
|
||||
};
|
||||
pco_ref_new_ssa_addr_comps(tctx->func, addr_data_comps);
|
||||
|
||||
component += range->offset;
|
||||
assert(component < 8);
|
||||
|
||||
unsigned sr_index = component < 4 ? component + PCO_SR_TILED_ST_COMP0
|
||||
: component + PCO_SR_TILED_ST_COMP4 - 4;
|
||||
pco_ref tiled_offset = pco_ref_hwreg(sr_index, PCO_REG_CLASS_SPEC);
|
||||
|
||||
pco_add64_32(&tctx->b,
|
||||
addr_data_comps[0],
|
||||
addr_data_comps[1],
|
||||
base_addr[0],
|
||||
base_addr[1],
|
||||
tiled_offset,
|
||||
pco_ref_null(),
|
||||
.olchk = true,
|
||||
.s = true);
|
||||
|
||||
unsigned chans = pco_ref_get_chans(src);
|
||||
pco_ref addr_data = pco_ref_new_ssa_addr_data(tctx->func, chans);
|
||||
pco_vec(&tctx->b, addr_data, ARRAY_SIZE(addr_data_comps), addr_data_comps);
|
||||
|
||||
pco_ref data_comp =
|
||||
pco_ref_new_ssa(tctx->func, pco_ref_get_bits(src), chans);
|
||||
pco_comp(&tctx->b, data_comp, addr_data, pco_ref_val16(2));
|
||||
|
||||
pco_ref cov_mask = pco_ref_new_ssa32(tctx->func);
|
||||
pco_ref sample_id = pco_ref_hwreg(PCO_SR_SAMP_NUM, PCO_REG_CLASS_SPEC);
|
||||
pco_shift(&tctx->b,
|
||||
cov_mask,
|
||||
pco_one,
|
||||
sample_id,
|
||||
pco_ref_null(),
|
||||
.shiftop = PCO_SHIFTOP_LSL);
|
||||
|
||||
return pco_st_tiled(&tctx->b,
|
||||
data_comp,
|
||||
pco_ref_imm8(PCO_DSIZE_32BIT),
|
||||
pco_ref_drc(PCO_DRC_0),
|
||||
pco_ref_imm8(chans),
|
||||
addr_data,
|
||||
cov_mask);
|
||||
}
|
||||
|
||||
static pco_instr *trans_flush_tile_buffer(trans_ctx *tctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
pco_ref src_addr_lo,
|
||||
pco_ref src_addr_hi)
|
||||
{
|
||||
pco_ref addr_comps[2];
|
||||
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps);
|
||||
|
||||
pco_ref tiled_offset =
|
||||
pco_ref_hwreg(PCO_SR_TILED_LD_COMP0, PCO_REG_CLASS_SPEC);
|
||||
|
||||
pco_add64_32(&tctx->b,
|
||||
addr_comps[0],
|
||||
addr_comps[1],
|
||||
src_addr_lo,
|
||||
src_addr_hi,
|
||||
tiled_offset,
|
||||
pco_ref_null(),
|
||||
.olchk = true,
|
||||
.s = true);
|
||||
|
||||
pco_ref addr = pco_ref_new_ssa_addr(tctx->func);
|
||||
pco_vec(&tctx->b, addr, ARRAY_SIZE(addr_comps), addr_comps);
|
||||
|
||||
unsigned idx_reg_num = 0;
|
||||
pco_ref idx_reg =
|
||||
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
|
||||
|
||||
pco_mbyp(&tctx->b, idx_reg, pco_zero);
|
||||
|
||||
pco_ref dest = pco_ref_hwreg(0, PCO_REG_CLASS_PIXOUT);
|
||||
dest = pco_ref_hwreg_idx_from(idx_reg_num, dest);
|
||||
|
||||
return pco_ld_regbl(&tctx->b, dest, pco_ref_drc(PCO_DRC_0), pco_zero, addr);
|
||||
}
|
||||
|
||||
static unsigned fetch_resource_base_reg(const pco_common_data *common,
|
||||
|
|
@ -515,7 +609,10 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common,
|
|||
static pco_instr *
|
||||
trans_load_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
||||
{
|
||||
pco_fs_data *fs_data = &tctx->shader->data.fs;
|
||||
unsigned base = nir_intrinsic_base(intr);
|
||||
|
||||
assert(pco_ref_is_scalar(dest));
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
|
||||
ASSERTED const nir_src offset = intr->src[0];
|
||||
|
|
@ -524,26 +621,67 @@ trans_load_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
gl_frag_result location = nir_intrinsic_io_semantics(intr).location;
|
||||
|
||||
const pco_range *range;
|
||||
bool tile_buffer;
|
||||
if (location >= FRAG_RESULT_DATA0) {
|
||||
assert(!base);
|
||||
|
||||
range = &tctx->shader->data.fs.outputs[location];
|
||||
|
||||
ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location];
|
||||
assert(output_reg);
|
||||
/* TODO: tile buffer support. */
|
||||
unsigned idx = location - FRAG_RESULT_DATA0;
|
||||
tile_buffer = fs_data->output_tile_buffers & BITFIELD_BIT(idx);
|
||||
} else if (location == FRAG_RESULT_COLOR) {
|
||||
/* Special case for on-chip input attachments. */
|
||||
assert(base < ARRAY_SIZE(tctx->shader->data.fs.ias_onchip));
|
||||
range = &tctx->shader->data.fs.ias_onchip[base];
|
||||
tile_buffer = fs_data->ia_tile_buffers & BITFIELD_BIT(base);
|
||||
} else {
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
assert(component < range->count);
|
||||
|
||||
pco_ref src = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
|
||||
return pco_mov(&tctx->b, dest, src, .olchk = true);
|
||||
if (!tile_buffer) {
|
||||
pco_ref src =
|
||||
pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
|
||||
return pco_mov(&tctx->b, dest, src, .olchk = true);
|
||||
}
|
||||
|
||||
unsigned tile_buffer_id = range->start;
|
||||
pco_range *tile_buffers = &fs_data->tile_buffers;
|
||||
assert(tile_buffer_id < (tile_buffers->count / tile_buffers->stride));
|
||||
unsigned sh_index =
|
||||
tile_buffers->start + tile_buffer_id * tile_buffers->stride;
|
||||
|
||||
pco_ref base_addr[2];
|
||||
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
|
||||
|
||||
pco_ref addr_comps[2];
|
||||
pco_ref_new_ssa_addr_comps(tctx->func, addr_comps);
|
||||
|
||||
component += range->offset;
|
||||
assert(component < 8);
|
||||
|
||||
unsigned sr_index = component < 4 ? component + PCO_SR_TILED_LD_COMP0
|
||||
: component + PCO_SR_TILED_LD_COMP4 - 4;
|
||||
pco_ref tiled_offset = pco_ref_hwreg(sr_index, PCO_REG_CLASS_SPEC);
|
||||
|
||||
pco_add64_32(&tctx->b,
|
||||
addr_comps[0],
|
||||
addr_comps[1],
|
||||
base_addr[0],
|
||||
base_addr[1],
|
||||
tiled_offset,
|
||||
pco_ref_null(),
|
||||
.olchk = true,
|
||||
.s = true);
|
||||
|
||||
pco_ref addr = pco_ref_new_ssa_addr(tctx->func);
|
||||
pco_vec(&tctx->b, addr, ARRAY_SIZE(addr_comps), addr_comps);
|
||||
|
||||
unsigned chans = pco_ref_get_chans(dest);
|
||||
return pco_ld(&tctx->b,
|
||||
dest,
|
||||
pco_ref_drc(PCO_DRC_0),
|
||||
pco_ref_imm8(chans),
|
||||
addr);
|
||||
}
|
||||
|
||||
static pco_instr *trans_load_common_store(trans_ctx *tctx,
|
||||
|
|
@ -1295,6 +1433,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
instr = trans_load_output_fs(tctx, intr, dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_flush_tile_buffer_pco:
|
||||
assert(tctx->stage == MESA_SHADER_FRAGMENT);
|
||||
instr = trans_flush_tile_buffer(tctx, intr, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_preamble:
|
||||
instr = pco_mov(&tctx->b,
|
||||
dest,
|
||||
|
|
|
|||
|
|
@ -900,6 +900,7 @@ struct pvr_pds_descriptor_set {
|
|||
#define PVR_BUFFER_TYPE_IA_SAMPLER (8)
|
||||
#define PVR_BUFFER_TYPE_FRONT_FACE_OP (9)
|
||||
#define PVR_BUFFER_TYPE_FS_META (10)
|
||||
#define PVR_BUFFER_TYPE_TILE_BUFFERS (11)
|
||||
#define PVR_BUFFER_TYPE_INVALID (~0)
|
||||
|
||||
struct pvr_pds_buffer {
|
||||
|
|
|
|||
|
|
@ -1580,7 +1580,8 @@ void pvr_pds_generate_descriptor_upload_program(
|
|||
case PVR_BUFFER_TYPE_POINT_SAMPLER:
|
||||
case PVR_BUFFER_TYPE_IA_SAMPLER:
|
||||
case PVR_BUFFER_TYPE_FRONT_FACE_OP:
|
||||
case PVR_BUFFER_TYPE_FS_META: {
|
||||
case PVR_BUFFER_TYPE_FS_META:
|
||||
case PVR_BUFFER_TYPE_TILE_BUFFERS: {
|
||||
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
|
||||
|
||||
special_buffer_entry =
|
||||
|
|
|
|||
|
|
@ -489,6 +489,7 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
|||
struct pvr_cmd_buffer *const cmd_buffer,
|
||||
const uint32_t emit_count,
|
||||
const uint32_t *pbe_cs_words,
|
||||
const unsigned *tile_buffer_ids,
|
||||
struct pvr_pds_upload *const pds_upload_out)
|
||||
{
|
||||
struct pvr_pds_event_program pixel_event_program = {
|
||||
|
|
@ -499,6 +500,8 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
|||
PVR_DW_TO_BYTES(cmd_buffer->device->pixel_event_data_size_in_dwords);
|
||||
const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
|
||||
struct pvr_device *const device = cmd_buffer->device;
|
||||
const struct pvr_device_tile_buffer_state *tile_buffer_state =
|
||||
&device->tile_buffer_state;
|
||||
struct pvr_suballoc_bo *usc_eot_program = NULL;
|
||||
struct pvr_eot_props props = {
|
||||
.emit_count = emit_count,
|
||||
|
|
@ -510,6 +513,16 @@ static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
|
|||
pco_shader *eot;
|
||||
VkResult result;
|
||||
|
||||
for (unsigned u = 0; u < emit_count; ++u) {
|
||||
unsigned tile_buffer_id = tile_buffer_ids[u];
|
||||
if (tile_buffer_id == ~0)
|
||||
continue;
|
||||
|
||||
assert(tile_buffer_id < tile_buffer_state->buffer_count);
|
||||
props.tile_buffer_addrs[u] =
|
||||
tile_buffer_state->buffers[tile_buffer_id]->vma->dev_addr.addr;
|
||||
}
|
||||
|
||||
eot = pvr_usc_eot(cmd_buffer->device->pdevice->pco_ctx, &props);
|
||||
usc_temp_count = pco_shader_data(eot)->common.temps;
|
||||
|
||||
|
|
@ -778,6 +791,12 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
|||
buffer_size +=
|
||||
texture_count * sizeof(struct pvr_combined_image_sampler_descriptor);
|
||||
|
||||
unsigned tile_buffer_offset = buffer_size;
|
||||
buffer_size += load_op->num_tile_buffers * sizeof(uint64_t);
|
||||
|
||||
assert(!(buffer_size % sizeof(uint32_t)));
|
||||
assert(buffer_size / sizeof(uint32_t) == load_op->shareds_count);
|
||||
|
||||
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
cmd_buffer->device->heaps.general_heap,
|
||||
buffer_size,
|
||||
|
|
@ -794,6 +813,20 @@ pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
|||
texture_states,
|
||||
texture_count * sizeof(struct pvr_combined_image_sampler_descriptor));
|
||||
|
||||
struct pvr_device *const device = cmd_buffer->device;
|
||||
const struct pvr_device_tile_buffer_state *tile_buffer_state =
|
||||
&device->tile_buffer_state;
|
||||
|
||||
uint32_t *tile_buffers = (uint32_t *)&buffer[tile_buffer_offset];
|
||||
for (unsigned u = 0; u < load_op->num_tile_buffers; ++u) {
|
||||
assert(u < tile_buffer_state->buffer_count);
|
||||
uint64_t tile_buffer_addr =
|
||||
tile_buffer_state->buffers[u]->vma->dev_addr.addr;
|
||||
|
||||
tile_buffers[2 * u] = tile_buffer_addr & 0xffffffff;
|
||||
tile_buffers[2 * u + 1] = tile_buffer_addr >> 32;
|
||||
}
|
||||
|
||||
*addr_out = clear_bo->dev_addr;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -1313,6 +1346,8 @@ struct pvr_emit_state {
|
|||
uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
|
||||
[ROGUE_NUM_PBESTATE_REG_WORDS];
|
||||
|
||||
unsigned tile_buffer_ids[PVR_MAX_COLOR_ATTACHMENTS];
|
||||
|
||||
uint32_t emit_count;
|
||||
};
|
||||
|
||||
|
|
@ -1375,6 +1410,11 @@ pvr_setup_emit_state(const struct pvr_device_info *dev_info,
|
|||
assert(emit_state->emit_count < ARRAY_SIZE(emit_state->pbe_cs_words));
|
||||
assert(emit_state->emit_count < ARRAY_SIZE(emit_state->pbe_reg_words));
|
||||
|
||||
emit_state->tile_buffer_ids[emit_state->emit_count] =
|
||||
mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY
|
||||
? mrt_resource->mem.tile_buffer
|
||||
: ~0;
|
||||
|
||||
pvr_setup_pbe_state(dev_info,
|
||||
framebuffer,
|
||||
emit_state->emit_count,
|
||||
|
|
@ -1451,6 +1491,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
.pixel_event_program_data_offset;
|
||||
} else {
|
||||
struct pvr_emit_state emit_state = { 0 };
|
||||
memset(emit_state.tile_buffer_ids,
|
||||
~0,
|
||||
sizeof(emit_state.tile_buffer_ids));
|
||||
|
||||
pvr_setup_emit_state(dev_info, hw_render, render_pass_info, &emit_state);
|
||||
|
||||
|
|
@ -1462,6 +1505,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
|||
cmd_buffer,
|
||||
emit_state.emit_count,
|
||||
emit_state.pbe_cs_words[0],
|
||||
emit_state.tile_buffer_ids,
|
||||
&pds_pixel_event_program);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
@ -3870,6 +3914,44 @@ static VkResult pvr_setup_descriptor_mappings(
|
|||
break;
|
||||
}
|
||||
|
||||
case PVR_BUFFER_TYPE_TILE_BUFFERS: {
|
||||
const struct pvr_device_tile_buffer_state *tile_buffer_state =
|
||||
&cmd_buffer->device->tile_buffer_state;
|
||||
const struct pvr_graphics_pipeline *const gfx_pipeline =
|
||||
cmd_buffer->state.gfx_pipeline;
|
||||
const pco_data *const fs_data = &gfx_pipeline->fs_data;
|
||||
|
||||
unsigned num_tile_buffers =
|
||||
fs_data->fs.tile_buffers.count / fs_data->fs.tile_buffers.stride;
|
||||
|
||||
uint32_t tile_buffer_addrs[PVR_MAX_TILE_BUFFER_COUNT * 2];
|
||||
|
||||
for (unsigned u = 0; u < num_tile_buffers; ++u) {
|
||||
assert(u < tile_buffer_state->buffer_count);
|
||||
uint64_t tile_buffer_addr =
|
||||
tile_buffer_state->buffers[u]->vma->dev_addr.addr;
|
||||
|
||||
tile_buffer_addrs[2 * u] = tile_buffer_addr & 0xffffffff;
|
||||
tile_buffer_addrs[2 * u + 1] = tile_buffer_addr >> 32;
|
||||
}
|
||||
|
||||
struct pvr_suballoc_bo *tile_buffer_bo;
|
||||
result = pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
&tile_buffer_addrs,
|
||||
num_tile_buffers *
|
||||
sizeof(uint64_t),
|
||||
&tile_buffer_bo);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
PVR_WRITE(qword_buffer,
|
||||
tile_buffer_bo->dev_addr.addr,
|
||||
special_buff_entry->const_offset,
|
||||
pds_info->data_size_in_dwords);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported special buffer type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -621,6 +621,14 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
};
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT && data->fs.tile_buffers.count > 0) {
|
||||
program.buffers[program.buffer_count++] = (struct pvr_pds_buffer){
|
||||
.type = PVR_BUFFER_TYPE_TILE_BUFFERS,
|
||||
.size_in_dwords = data->fs.tile_buffers.count,
|
||||
.destination = data->fs.tile_buffers.start,
|
||||
};
|
||||
}
|
||||
|
||||
pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
|
||||
|
||||
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
|
||||
|
|
@ -1844,21 +1852,45 @@ pvr_init_fs_outputs(pco_data *data,
|
|||
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
|
||||
{
|
||||
unsigned u;
|
||||
pco_fs_data *fs = &data->fs;
|
||||
|
||||
for (u = 0; u < subpass->color_count; ++u) {
|
||||
unsigned idx = subpass->color_attachments[u];
|
||||
const struct usc_mrt_resource *mrt_resource;
|
||||
bool tile_buffer;
|
||||
|
||||
if (idx == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
gl_frag_result location = FRAG_RESULT_DATA0 + u;
|
||||
VkFormat vk_format = pass->attachments[idx].vk_format;
|
||||
data->fs.output_formats[location] = vk_format_to_pipe_format(vk_format);
|
||||
fs->output_formats[location] = vk_format_to_pipe_format(vk_format);
|
||||
|
||||
mrt_resource = &hw_subpass->setup.mrt_resources[u];
|
||||
tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
|
||||
if (tile_buffer) {
|
||||
fs->num_tile_buffers =
|
||||
MAX2(fs->num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
|
||||
fs->output_tile_buffers |= BITFIELD_BIT(u);
|
||||
}
|
||||
}
|
||||
|
||||
data->fs.z_replicate = ~0u;
|
||||
fs->z_replicate = ~0u;
|
||||
if (hw_subpass->z_replicate >= 0) {
|
||||
gl_frag_result location = FRAG_RESULT_DATA0 + u;
|
||||
data->fs.output_formats[location] = PIPE_FORMAT_R32_FLOAT;
|
||||
data->fs.z_replicate = location;
|
||||
const struct usc_mrt_resource *mrt_resource =
|
||||
&hw_subpass->setup.mrt_resources[u];
|
||||
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
|
||||
fs->output_formats[location] = PIPE_FORMAT_R32_FLOAT;
|
||||
fs->z_replicate = location;
|
||||
|
||||
if (tile_buffer) {
|
||||
fs->num_tile_buffers =
|
||||
MAX2(fs->num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
|
||||
fs->output_tile_buffers |= BITFIELD_BIT(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1869,13 +1901,14 @@ pvr_setup_fs_outputs(pco_data *data,
|
|||
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
|
||||
{
|
||||
uint64_t outputs_written = nir->info.outputs_written;
|
||||
pco_fs_data *fs = &data->fs;
|
||||
|
||||
unsigned u;
|
||||
for (u = 0; u < subpass->color_count; ++u) {
|
||||
gl_frag_result location = FRAG_RESULT_DATA0 + u;
|
||||
unsigned idx = subpass->color_attachments[u];
|
||||
const struct usc_mrt_resource *mrt_resource;
|
||||
ASSERTED bool output_reg;
|
||||
bool tile_buffer;
|
||||
nir_variable *var;
|
||||
|
||||
if (idx == VK_ATTACHMENT_UNUSED)
|
||||
|
|
@ -1886,16 +1919,16 @@ pvr_setup_fs_outputs(pco_data *data,
|
|||
continue;
|
||||
|
||||
mrt_resource = &hw_subpass->setup.mrt_resources[u];
|
||||
tile_buffer = fs->output_tile_buffers & BITFIELD_BIT(u);
|
||||
|
||||
/* TODO: tile buffer support. */
|
||||
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
assert(output_reg);
|
||||
|
||||
set_var(data->fs.outputs,
|
||||
mrt_resource->reg.output_reg,
|
||||
set_var(fs->outputs,
|
||||
tile_buffer ? mrt_resource->mem.tile_buffer
|
||||
: mrt_resource->reg.output_reg,
|
||||
var,
|
||||
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)));
|
||||
data->fs.output_reg[location] = output_reg;
|
||||
|
||||
if (tile_buffer)
|
||||
fs->outputs[location].offset = mrt_resource->mem.offset_dw;
|
||||
|
||||
outputs_written &= ~BITFIELD64_BIT(location);
|
||||
}
|
||||
|
|
@ -1904,21 +1937,20 @@ pvr_setup_fs_outputs(pco_data *data,
|
|||
const struct usc_mrt_resource *mrt_resource =
|
||||
&hw_subpass->setup.mrt_resources[hw_subpass->z_replicate];
|
||||
gl_frag_result location = FRAG_RESULT_DATA0 + u;
|
||||
ASSERTED bool output_reg;
|
||||
nir_variable *var;
|
||||
|
||||
var = nir_find_variable_with_location(nir, nir_var_shader_out, location);
|
||||
nir_variable *var =
|
||||
nir_find_variable_with_location(nir, nir_var_shader_out, location);
|
||||
if (var) {
|
||||
/* TODO: tile buffer support. */
|
||||
output_reg = mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
assert(output_reg);
|
||||
bool tile_buffer = fs->output_tile_buffers & BITFIELD_BIT(u);
|
||||
|
||||
set_var(data->fs.outputs,
|
||||
mrt_resource->reg.output_reg,
|
||||
set_var(fs->outputs,
|
||||
tile_buffer ? mrt_resource->mem.tile_buffer
|
||||
: mrt_resource->reg.output_reg,
|
||||
var,
|
||||
DIV_ROUND_UP(mrt_resource->intermediate_size,
|
||||
sizeof(uint32_t)));
|
||||
data->fs.output_reg[location] = output_reg;
|
||||
|
||||
if (tile_buffer)
|
||||
fs->outputs[location].offset = mrt_resource->mem.offset_dw;
|
||||
|
||||
outputs_written &= ~BITFIELD64_BIT(location);
|
||||
}
|
||||
|
|
@ -1933,6 +1965,7 @@ static void pvr_init_fs_input_attachments(
|
|||
const struct pvr_render_subpass *const subpass,
|
||||
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
|
||||
{
|
||||
pco_fs_data *fs = &data->fs;
|
||||
for (unsigned u = 0; u < subpass->input_count; ++u) {
|
||||
unsigned idx = subpass->input_attachments[u];
|
||||
if (idx == VK_ATTACHMENT_UNUSED)
|
||||
|
|
@ -1949,22 +1982,20 @@ static void pvr_init_fs_input_attachments(
|
|||
vk_format = VK_FORMAT_R32_SFLOAT;
|
||||
}
|
||||
|
||||
data->fs.ia_formats[u] = vk_format_to_pipe_format(vk_format);
|
||||
fs->ia_formats[u] = vk_format_to_pipe_format(vk_format);
|
||||
assert(fs->ia_formats[u] != PIPE_FORMAT_NONE);
|
||||
|
||||
unsigned mrt_idx = hw_subpass->input_access[u].on_chip_rt;
|
||||
const struct usc_mrt_resource *mrt_resource =
|
||||
&hw_subpass->setup.mrt_resources[mrt_idx];
|
||||
|
||||
ASSERTED bool output_reg = mrt_resource->type ==
|
||||
USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
assert(output_reg);
|
||||
/* TODO: tile buffer support. */
|
||||
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
|
||||
data->fs.ias_onchip[u] = (pco_range){
|
||||
.start = mrt_resource->reg.output_reg,
|
||||
.count =
|
||||
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)),
|
||||
};
|
||||
if (tile_buffer) {
|
||||
fs->num_tile_buffers =
|
||||
MAX2(fs->num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
|
||||
fs->ia_tile_buffers |= BITFIELD_BIT(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2019,7 +2050,33 @@ static void pvr_setup_fs_input_attachments(
|
|||
const struct pvr_render_subpass *const subpass,
|
||||
const struct pvr_renderpass_hwsetup_subpass *hw_subpass)
|
||||
{
|
||||
/* pvr_finishme("pvr_setup_fs_input_attachments"); */
|
||||
pco_fs_data *fs = &data->fs;
|
||||
for (unsigned u = 0; u < subpass->input_count; ++u) {
|
||||
unsigned idx = subpass->input_attachments[u];
|
||||
if (idx == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
bool onchip = hw_subpass->input_access[u].type !=
|
||||
PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
|
||||
if (!onchip)
|
||||
continue;
|
||||
|
||||
unsigned mrt_idx = hw_subpass->input_access[u].on_chip_rt;
|
||||
const struct usc_mrt_resource *mrt_resource =
|
||||
&hw_subpass->setup.mrt_resources[mrt_idx];
|
||||
|
||||
bool tile_buffer = fs->ia_tile_buffers & BITFIELD_BIT(u);
|
||||
|
||||
fs->ias_onchip[u] = (pco_range){
|
||||
.start = tile_buffer ? mrt_resource->mem.tile_buffer
|
||||
: mrt_resource->reg.output_reg,
|
||||
.count =
|
||||
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)),
|
||||
};
|
||||
|
||||
if (tile_buffer)
|
||||
fs->ias_onchip[u].offset = mrt_resource->mem.offset_dw;
|
||||
}
|
||||
}
|
||||
|
||||
static void pvr_setup_fs_blend(pco_data *data)
|
||||
|
|
@ -2036,6 +2093,29 @@ static void pvr_setup_fs_blend(pco_data *data)
|
|||
data->common.shareds += num_blend_consts;
|
||||
}
|
||||
|
||||
static void pvr_init_fs_tile_buffers(pco_data *data)
|
||||
{
|
||||
if (!data->fs.num_tile_buffers)
|
||||
return;
|
||||
|
||||
unsigned tile_buffer_addr_dwords =
|
||||
data->fs.num_tile_buffers * (sizeof(uint64_t) / sizeof(uint32_t));
|
||||
|
||||
data->fs.tile_buffers = (pco_range){
|
||||
.count = tile_buffer_addr_dwords,
|
||||
.stride = sizeof(uint64_t) / sizeof(uint32_t),
|
||||
};
|
||||
}
|
||||
|
||||
static void pvr_setup_fs_tile_buffers(pco_data *data)
|
||||
{
|
||||
if (!data->fs.tile_buffers.count)
|
||||
return;
|
||||
|
||||
data->fs.tile_buffers.start = data->common.shareds;
|
||||
data->common.shareds += data->fs.tile_buffers.count;
|
||||
}
|
||||
|
||||
static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir)
|
||||
{
|
||||
BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
|
||||
|
|
@ -2317,6 +2397,7 @@ pvr_preprocess_shader_data(pco_data *data,
|
|||
pvr_init_fs_outputs(data, pass, subpass, hw_subpass);
|
||||
pvr_init_fs_input_attachments(data, pass, subpass, hw_subpass);
|
||||
pvr_init_fs_blend(data, state->cb);
|
||||
pvr_init_fs_tile_buffers(data);
|
||||
|
||||
if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_SAMPLE_MASK) ||
|
||||
(state->ms && state->ms->sample_mask != 0xffff)) {
|
||||
|
|
@ -2374,6 +2455,7 @@ static void pvr_postprocess_shader_data(pco_data *data,
|
|||
pvr_setup_fs_outputs(data, nir, subpass, hw_subpass);
|
||||
pvr_setup_fs_input_attachments(data, nir, subpass, hw_subpass);
|
||||
pvr_setup_fs_blend(data);
|
||||
pvr_setup_fs_tile_buffers(data);
|
||||
|
||||
/* TODO: push consts, blend consts, dynamic state, etc. */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1123,6 +1123,7 @@ struct pvr_load_op {
|
|||
struct pvr_suballoc_bo *usc_frag_prog_bo;
|
||||
uint32_t const_shareds_count;
|
||||
uint32_t shareds_count;
|
||||
uint32_t num_tile_buffers;
|
||||
|
||||
struct pvr_pds_upload pds_frag_prog;
|
||||
|
||||
|
|
|
|||
|
|
@ -707,6 +707,7 @@ pvr_spm_init_eot_state(struct pvr_device *device,
|
|||
/* Store off-chip tile data (i.e. tile buffers). */
|
||||
|
||||
for (uint32_t i = 0; i < hw_render->tile_buffers_count; i++) {
|
||||
continue;
|
||||
assert(!"Add support for tile buffers in EOT");
|
||||
pvr_finishme("Add support for tile buffers in EOT");
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,17 @@ pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props)
|
|||
if (u > 0)
|
||||
nir_wop_pco(&b);
|
||||
|
||||
if (props->tile_buffer_addrs[u]) {
|
||||
nir_def *tile_buffer_addr_lo =
|
||||
nir_imm_int(&b, props->tile_buffer_addrs[u] & 0xffffffff);
|
||||
nir_def *tile_buffer_addr_hi =
|
||||
nir_imm_int(&b, props->tile_buffer_addrs[u] >> 32);
|
||||
|
||||
nir_flush_tile_buffer_pco(&b,
|
||||
tile_buffer_addr_lo,
|
||||
tile_buffer_addr_hi);
|
||||
}
|
||||
|
||||
nir_def *state0;
|
||||
nir_def *state1;
|
||||
if (props->shared_words) {
|
||||
|
|
@ -860,7 +871,6 @@ pco_shader *pvr_uscgen_tq(pco_ctx *ctx,
|
|||
.start = 0,
|
||||
.count = pixel_size,
|
||||
};
|
||||
data.fs.output_reg[FRAG_RESULT_DATA0] = true;
|
||||
|
||||
nir_def *loaded_data;
|
||||
nir_def *coords =
|
||||
|
|
@ -1024,15 +1034,21 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
|
|||
}
|
||||
|
||||
struct usc_mrt_resource *mrt_resource = &mrt_setup->mrt_resources[rt_idx];
|
||||
/* TODO: tile buffer support */
|
||||
assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG);
|
||||
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
|
||||
data.fs.outputs[FRAG_RESULT_DATA0 + rt_idx] = (pco_range){
|
||||
.start = mrt_resource->reg.output_reg,
|
||||
.start = tile_buffer ? mrt_resource->mem.tile_buffer
|
||||
: mrt_resource->reg.output_reg,
|
||||
.count = accum_size_dwords,
|
||||
};
|
||||
|
||||
data.fs.output_reg[FRAG_RESULT_DATA0 + rt_idx] = true;
|
||||
if (tile_buffer) {
|
||||
data.fs.num_tile_buffers =
|
||||
MAX2(data.fs.num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
|
||||
data.fs.output_tile_buffers |= BITFIELD_BIT(rt_idx);
|
||||
data.fs.outputs[FRAG_RESULT_DATA0 + rt_idx].offset =
|
||||
mrt_resource->mem.offset_dw;
|
||||
}
|
||||
|
||||
nir_create_variable_with_location(b.shader,
|
||||
nir_var_shader_out,
|
||||
|
|
@ -1064,11 +1080,11 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
|
|||
|
||||
struct usc_mrt_resource *mrt_resource =
|
||||
&mrt_setup->mrt_resources[depth_idx];
|
||||
/* TODO: tile buffer support */
|
||||
assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG);
|
||||
bool tile_buffer = mrt_resource->type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
|
||||
|
||||
assert(DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t)) ==
|
||||
1);
|
||||
unsigned accum_size_dwords =
|
||||
DIV_ROUND_UP(mrt_resource->intermediate_size, sizeof(uint32_t));
|
||||
assert(accum_size_dwords == 1);
|
||||
|
||||
data.fs.output_formats[FRAG_RESULT_DATA0 + depth_idx] =
|
||||
PIPE_FORMAT_R32_FLOAT;
|
||||
|
|
@ -1076,11 +1092,18 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
|
|||
const glsl_type *type = glsl_float_type();
|
||||
|
||||
data.fs.outputs[FRAG_RESULT_DATA0 + depth_idx] = (pco_range){
|
||||
.start = mrt_resource->reg.output_reg,
|
||||
.count = 1,
|
||||
.start = tile_buffer ? mrt_resource->mem.tile_buffer
|
||||
: mrt_resource->reg.output_reg,
|
||||
.count = accum_size_dwords,
|
||||
};
|
||||
|
||||
data.fs.output_reg[FRAG_RESULT_DATA0 + depth_idx] = true;
|
||||
if (tile_buffer) {
|
||||
data.fs.num_tile_buffers =
|
||||
MAX2(data.fs.num_tile_buffers, mrt_resource->mem.tile_buffer + 1);
|
||||
data.fs.output_tile_buffers |= BITFIELD_BIT(depth_idx);
|
||||
data.fs.outputs[FRAG_RESULT_DATA0 + depth_idx].offset =
|
||||
mrt_resource->mem.offset_dw;
|
||||
}
|
||||
|
||||
nir_create_variable_with_location(b.shader,
|
||||
nir_var_shader_out,
|
||||
|
|
@ -1159,6 +1182,21 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
|
|||
}
|
||||
}
|
||||
|
||||
if (data.fs.num_tile_buffers > 0) {
|
||||
unsigned tile_buffer_addr_dwords =
|
||||
data.fs.num_tile_buffers * (sizeof(uint64_t) / sizeof(uint32_t));
|
||||
|
||||
data.fs.tile_buffers = (pco_range){
|
||||
.start = shared_regs,
|
||||
.count = tile_buffer_addr_dwords,
|
||||
.stride = sizeof(uint64_t) / sizeof(uint32_t),
|
||||
};
|
||||
|
||||
shared_regs += tile_buffer_addr_dwords;
|
||||
|
||||
load_op->num_tile_buffers = data.fs.num_tile_buffers;
|
||||
}
|
||||
|
||||
nir_jump(&b, nir_jump_return);
|
||||
|
||||
load_op->const_shareds_count = shared_regs;
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ struct pvr_eot_props {
|
|||
const uint32_t *state_words;
|
||||
const unsigned *state_regs;
|
||||
};
|
||||
|
||||
uint64_t tile_buffer_addrs[PVR_MAX_COLOR_ATTACHMENTS];
|
||||
};
|
||||
|
||||
pco_shader *pvr_usc_eot(pco_ctx *ctx, struct pvr_eot_props *props);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue