pvr, pco: switch to usc generated clear attachment shaders

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37439>
This commit is contained in:
Simon Perretta 2025-08-08 15:35:15 +01:00
parent 6dd0a5ee2d
commit c2127bf4f7
10 changed files with 272 additions and 1764 deletions

View file

@ -2765,8 +2765,17 @@ intrinsic("load_vtxin_pco", src_comp=[1], dest_comp=0, bit_sizes=[32])
# load_coeff_pco(offset)
intrinsic("load_coeff_pco", src_comp=[1], dest_comp=0, bit_sizes=[32])
# frag_store_pco(data, offset/base)
intrinsic("frag_store_pco", src_comp=[1], indices=[BASE], bit_sizes=[32])
# dma_ld_pco(address)
intrinsic("dma_ld_pco", src_comp=[2], dest_comp=0, flags=[CAN_ELIMINATE], bit_sizes=[32])
# dma_st_pco(address_data)
intrinsic("dma_st_pco", src_comp=[0], bit_sizes=[32])
# dma_st_tiled_pco(address_data, valid_mask)
intrinsic("dma_st_tiled_pco", src_comp=[3, 1], bit_sizes=[32])
# load_tiled_offset_pco(component, is_store)
intrinsic("load_tiled_offset_pco", dest_comp=1, indices=[COMPONENT, FLAGS], bit_sizes=[32])

View file

@ -58,4 +58,17 @@ enum pvr_query_reset_data {
_PVR_QUERY_RESET_DATA_COUNT,
};
/** Clear attachment shader data; shared registers. */
enum pvr_clear_attach_data {
PVR_CLEAR_ATTACH_DATA_DWORD0,
PVR_CLEAR_ATTACH_DATA_DWORD1,
PVR_CLEAR_ATTACH_DATA_DWORD2,
PVR_CLEAR_ATTACH_DATA_DWORD3,
PVR_CLEAR_ATTACH_DATA_TILE_ADDR_LO,
PVR_CLEAR_ATTACH_DATA_TILE_ADDR_HI,
_PVR_CLEAR_ATTACH_DATA_COUNT,
};
#endif /* PVR_IFACE_H */

View file

@ -280,6 +280,24 @@ static pco_instr *trans_load_reg(trans_ctx *tctx,
return pco_mov(&tctx->b, dest, src, .rpt = chans);
}
static pco_instr *trans_load_tiled_offset(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest)
{
unsigned component = nir_intrinsic_component(intr);
bool store = !!nir_intrinsic_flags(intr);
unsigned base0 = store ? PCO_SR_TILED_ST_COMP0 : PCO_SR_TILED_LD_COMP0;
unsigned base4 = store ? PCO_SR_TILED_ST_COMP4 : PCO_SR_TILED_LD_COMP4;
unsigned sr_index = component < 4 ? component + base0
: component + base4 - 4;
pco_ref tiled_offset = pco_ref_hwreg(sr_index, PCO_REG_CLASS_SPEC);
return pco_mov(&tctx->b, dest, tiled_offset, .olchk = tctx->olchk);
}
static inline pco_instr *build_itr(pco_builder *b,
pco_ref dest,
enum pco_drc drc,
@ -1699,6 +1717,18 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
instr = trans_load_reg(tctx, intr, dest, src[0], PCO_REG_CLASS_COEFF);
break;
case nir_intrinsic_load_tiled_offset_pco:
instr = trans_load_tiled_offset(tctx, intr, dest);
break;
case nir_intrinsic_frag_store_pco: {
unsigned base = nir_intrinsic_base(intr);
pco_ref dest = pco_ref_hwreg(base, PCO_REG_CLASS_PIXOUT);
instr = pco_mov(&tctx->b, dest, src[0], .olchk = tctx->olchk);
break;
}
case nir_intrinsic_load_output:
assert(tctx->stage == MESA_SHADER_FRAGMENT);
instr = trans_load_output_fs(tctx, intr, dest);
@ -1830,6 +1860,25 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
break;
}
case nir_intrinsic_dma_st_tiled_pco: {
unsigned chans = pco_ref_get_chans(src[0]) - 2;
pco_ref data_comp =
pco_ref_new_ssa(tctx->func, pco_ref_get_bits(src[0]), chans);
pco_comp(&tctx->b, data_comp, src[0], pco_ref_val16(2));
instr = pco_st_tiled(&tctx->b,
data_comp,
pco_ref_imm8(PCO_DSIZE_32BIT),
pco_ref_drc(PCO_DRC_0),
pco_ref_imm8(chans),
src[0],
src[1],
.olchk = tctx->olchk);
break;
}
/* Vertex sysvals. */
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:

View file

@ -33,9 +33,8 @@
#include "pvr_formats.h"
#include "pvr_job_transfer.h"
#include "pvr_private.h"
#include "usc/programs/pvr_shader_factory.h"
#include "usc/programs/pvr_static_shaders.h"
#include "pvr_types.h"
#include "pvr_usc.h"
#include "util/bitscan.h"
#include "util/list.h"
#include "util/macros.h"
@ -1513,75 +1512,45 @@ pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
struct pvr_cmd_buffer *cmd_buffer,
const struct pvr_shader_factory_info *shader_info,
const struct pvr_clear_attach_props *props,
const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
ASSERTED bool uses_tile_buffer,
bool uses_tile_buffer,
uint32_t tile_buffer_idx,
struct pvr_suballoc_bo **const const_shareds_buffer_out)
{
struct pvr_device *device = cmd_buffer->device;
struct pvr_suballoc_bo *const_shareds_buffer;
struct pvr_bo *tile_buffer;
uint64_t tile_dev_addr;
uint64_t tile_dev_addr = 0;
uint32_t *buffer;
VkResult result;
/* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
* Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
*/
result =
pvr_cmd_buffer_alloc_mem(cmd_buffer,
device->heaps.general_heap,
PVR_DW_TO_BYTES(shader_info->const_shared_regs),
&const_shareds_buffer);
/* TODO: only allocate what's needed, not always
* _PVR_CLEAR_ATTACH_DATA_COUNT? */
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
device->heaps.general_heap,
_PVR_CLEAR_ATTACH_DATA_COUNT,
&const_shareds_buffer);
if (result != VK_SUCCESS)
return result;
buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
uint32_t dest_idx = shader_info->driver_const_location_map[i];
buffer[PVR_CLEAR_ATTACH_DATA_DWORD0] = clear_color[0];
buffer[PVR_CLEAR_ATTACH_DATA_DWORD1] = clear_color[1];
buffer[PVR_CLEAR_ATTACH_DATA_DWORD2] = clear_color[2];
buffer[PVR_CLEAR_ATTACH_DATA_DWORD3] = clear_color[3];
if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
continue;
assert(dest_idx < shader_info->const_shared_regs);
switch (i) {
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
buffer[dest_idx] = clear_color[i];
break;
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
assert(uses_tile_buffer);
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
break;
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
assert(uses_tile_buffer);
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
buffer[dest_idx] = (uint32_t)tile_dev_addr;
break;
default:
UNREACHABLE("Unsupported clear attachment const type.");
}
if (uses_tile_buffer) {
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
}
for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
const struct pvr_static_buffer *static_buff =
&shader_info->static_const_buffer[i];
assert(static_buff->dst_idx < shader_info->const_shared_regs);
buffer[static_buff->dst_idx] = static_buff->value;
}
buffer[PVR_CLEAR_ATTACH_DATA_TILE_ADDR_LO] = tile_dev_addr & 0xffffffff;
buffer[PVR_CLEAR_ATTACH_DATA_TILE_ADDR_HI] = tile_dev_addr >> 32;
*const_shareds_buffer_out = const_shareds_buffer;
@ -1608,7 +1577,6 @@ static VkResult pvr_clear_color_attachment_static(
const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
struct pvr_pds_pixel_shader_sa_program texture_program;
uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
const struct pvr_shader_factory_info *shader_info;
struct pvr_suballoc_bo *pds_texture_program_bo;
struct pvr_static_clear_ppp_template template;
struct pvr_suballoc_bo *const_shareds_buffer;
@ -1633,15 +1601,17 @@ static VkResult pvr_clear_color_attachment_static(
assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
output_offset,
uses_tile_buffer);
struct pvr_clear_attach_props props = {
.dword_count = out_reg_count,
.offset = output_offset,
.uses_tile_buffer = uses_tile_buffer,
};
shader_info = clear_attachment_collection[program_idx].info;
program_idx = pvr_uscgen_clear_attach_index(&props);
result = pvr_clear_color_attachment_static_create_consts_buffer(
cmd_buffer,
shader_info,
&props,
clear_color,
uses_tile_buffer,
tile_buffer_idx,
@ -1649,20 +1619,18 @@ static VkResult pvr_clear_color_attachment_static(
if (result != VK_SUCCESS)
return result;
/* clang-format off */
texture_program = (struct pvr_pds_pixel_shader_sa_program){
.num_texture_dma_kicks = 1,
.texture_dma_address = {
[0] = const_shareds_buffer->dev_addr.addr,
}
};
/* clang-format on */
texture_program =
(struct pvr_pds_pixel_shader_sa_program){ .num_texture_dma_kicks = 1,
.texture_dma_address = {
[0] = const_shareds_buffer
->dev_addr.addr,
} };
pvr_csb_pack (&texture_program.texture_dma_control[0],
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
doutd_src1) {
doutd_src1.dest = ROGUE_PDSINST_DOUTD_DEST_COMMON_STORE;
doutd_src1.bsize = shader_info->const_shared_regs;
doutd_src1.bsize = _PVR_CLEAR_ATTACH_DATA_COUNT;
}
clear_attachment_program =
@ -1720,7 +1688,7 @@ static VkResult pvr_clear_color_attachment_static(
TA_STATE_PDS_SIZEINFO2,
sizeinfo2) {
sizeinfo2.usc_sharedsize =
DIV_ROUND_UP(shader_info->const_shared_regs,
DIV_ROUND_UP(_PVR_CLEAR_ATTACH_DATA_COUNT,
ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
}

View file

@ -30,8 +30,7 @@
#include "pvr_clear.h"
#include "pvr_pds.h"
#include "pvr_private.h"
#include "usc/programs/pvr_shader_factory.h"
#include "usc/programs/pvr_static_shaders.h"
#include "pvr_usc.h"
#include "pvr_types.h"
#include "vk_alloc.h"
#include "vk_log.h"
@ -244,43 +243,35 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
struct pvr_device_static_clear_state *clear_state =
&device->static_clear_state;
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
uint32_t pds_texture_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
uint32_t pds_pixel_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
uint32_t usc_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
uint32_t pds_texture_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
uint32_t pds_pixel_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
uint32_t usc_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
pco_shader *shaders[PVR_NUM_CLEAR_ATTACH_SHADERS];
struct pvr_clear_attach_props props;
uint64_t usc_upload_offset;
uint64_t pds_upload_offset;
uint32_t alloc_size = 0;
VkResult result;
uint8_t *ptr;
#if !defined(NDEBUG)
uint32_t clear_attachment_info_count = 0;
/* Build and upload USC fragment shaders. */
for (unsigned dword_count = 1; dword_count <= 4; ++dword_count) {
for (unsigned offset = 0; offset <= 3; ++offset) {
for (unsigned uses_tile_buffer = 0; uses_tile_buffer <= 1;
++uses_tile_buffer) {
if (dword_count + offset > 4)
continue;
for (uint32_t i = 0; i < ARRAY_SIZE(clear_attachment_collection); i++) {
if (!clear_attachment_collection[i].info)
continue;
props.dword_count = dword_count;
props.offset = offset;
props.uses_tile_buffer = uses_tile_buffer;
clear_attachment_info_count++;
}
assert(clear_attachment_info_count == PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT);
#endif
/* Upload USC fragment shaders. */
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
if (!clear_attachment_collection[i].info)
continue;
usc_program_offsets[offset_idx] = alloc_size;
/* TODO: The compiler will likely give us a pre-aligned size for the USC
* shader so don't bother aligning here when it's hooked up.
*/
alloc_size += ALIGN_POT(clear_attachment_collection[i].size, 4);
offset_idx++;
unsigned u = pvr_uscgen_clear_attach_index(&props);
shaders[u] =
pvr_uscgen_clear_attach(device->pdevice->pco_ctx, &props);
alloc_size += pco_shader_binary_size(shaders[u]);
}
}
}
result = pvr_bo_suballoc(&device->suballoc_usc,
@ -294,55 +285,46 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
usc_upload_offset =
clear_state->usc_clear_attachment_programs->dev_addr.addr -
device->heaps.usc_heap->base_addr.addr;
ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(
clear_state->usc_clear_attachment_programs);
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
if (!clear_attachment_collection[i].info)
continue;
unsigned offset = 0;
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
unsigned shader_size = pco_shader_binary_size(shaders[u]);
memcpy(ptr + usc_program_offsets[offset_idx],
clear_attachment_collection[i].code,
clear_attachment_collection[i].size);
usc_program_offsets[u] = offset;
memcpy(&ptr[offset], pco_shader_binary_data(shaders[u]), shader_size);
offset_idx++;
offset += shader_size;
}
/* Upload PDS programs. */
alloc_size = 0;
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
struct pvr_pds_kickusc_program pixel_shader_pds_program;
uint32_t program_size;
if (!clear_attachment_collection[i].info)
continue;
/* Texture program to load colors. */
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
.num_texture_dma_kicks = 1,
};
pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&texture_pds_program);
pds_texture_program_offsets[offset_idx] = alloc_size;
pds_texture_program_offsets[u] = alloc_size;
alloc_size += ALIGN_POT(PVR_DW_TO_BYTES(texture_pds_program.code_size),
pds_prog_alignment);
/* Pixel program to load fragment shader. */
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
usc_upload_offset + usc_program_offsets[offset_idx],
clear_attachment_collection[i].info->temps_required,
usc_upload_offset + usc_program_offsets[u],
pco_shader_data(shaders[u])->common.temps,
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
false);
@ -352,10 +334,8 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
pixel_shader_pds_program.data_size;
program_size = PVR_DW_TO_BYTES(program_size);
pds_pixel_program_offsets[offset_idx] = alloc_size;
pds_pixel_program_offsets[u] = alloc_size;
alloc_size += ALIGN_POT(program_size, pds_prog_alignment);
offset_idx++;
}
result = pvr_bo_suballoc(&device->suballoc_pds,
@ -371,65 +351,55 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
pds_upload_offset =
clear_state->pds_clear_attachment_programs->dev_addr.addr -
device->heaps.pds_heap->base_addr.addr;
ptr =
pvr_bo_suballoc_get_map_addr(clear_state->pds_clear_attachment_programs);
for (uint32_t i = 0, offset_idx = 0;
i < ARRAY_SIZE(clear_attachment_collection);
i++) {
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
struct pvr_pds_kickusc_program pixel_shader_pds_program;
if (!clear_attachment_collection[i].info) {
clear_state->pds_clear_attachment_program_info[i] =
(struct pvr_pds_clear_attachment_program_info){ 0 };
continue;
}
/* Texture program to load colors. */
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
.num_texture_dma_kicks = 1,
};
pvr_pds_generate_pixel_shader_sa_code_segment(
&texture_pds_program,
(uint32_t *)(ptr + pds_texture_program_offsets[offset_idx]));
(uint32_t *)(ptr + pds_texture_program_offsets[u]));
/* Pixel program to load fragment shader. */
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
usc_upload_offset + usc_program_offsets[offset_idx],
clear_attachment_collection[i].info->temps_required,
usc_upload_offset + usc_program_offsets[u],
pco_shader_data(shaders[u])->common.temps,
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
false);
pvr_pds_generate_pixel_shader_program(
&pixel_shader_pds_program,
(uint32_t *)(ptr + pds_pixel_program_offsets[offset_idx]));
(uint32_t *)(ptr + pds_pixel_program_offsets[u]));
/* Setup the PDS program info. */
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_pds_program,
dev_info);
clear_state->pds_clear_attachment_program_info[i] =
clear_state->pds_clear_attachment_program_info[u] =
(struct pvr_pds_clear_attachment_program_info){
.texture_program_offset = PVR_DEV_ADDR(
pds_upload_offset + pds_texture_program_offsets[offset_idx]),
.pixel_program_offset = PVR_DEV_ADDR(
pds_upload_offset + pds_pixel_program_offsets[offset_idx]),
.texture_program_offset =
PVR_DEV_ADDR(pds_upload_offset + pds_texture_program_offsets[u]),
.pixel_program_offset =
PVR_DEV_ADDR(pds_upload_offset + pds_pixel_program_offsets[u]),
.texture_program_pds_temps_count = texture_pds_program.temps_used,
.texture_program_data_size = texture_pds_program.data_size,
};
offset_idx++;
}
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u)
ralloc_free(shaders[u]);
return VK_SUCCESS;
}

View file

@ -50,6 +50,7 @@
#include "pvr_job_render.h"
#include "pvr_limits.h"
#include "pvr_pds.h"
#include "pvr_usc.h"
#include "usc/programs/pvr_shader_factory.h"
#include "pvr_spm.h"
#include "pvr_types.h"
@ -252,8 +253,7 @@ struct pvr_device {
uint32_t texture_program_pds_temps_count;
/* Size in dwords. */
uint32_t texture_program_data_size;
} pds_clear_attachment_program_info
[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES];
} pds_clear_attachment_program_info[PVR_NUM_CLEAR_ATTACH_SHADERS];
} static_clear_state;
struct {

View file

@ -16,10 +16,12 @@
#include "nir/nir_format_convert.h"
#include "nir/nir_conversion_builder.h"
#include "pco/pco.h"
#include "pco/pco_common.h"
#include "pco/pco_data.h"
#include "pco_uscgen_programs.h"
#include "pvr_common.h"
#include "pvr_formats.h"
#include "pvr_private.h"
#include "pvr_usc.h"
#include "usc/pvr_uscgen.h"
#include "util/macros.h"
@ -1226,3 +1228,71 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
return build_shader(ctx, b.shader, &data);
}
pco_shader *pvr_uscgen_clear_attach(pco_ctx *ctx,
struct pvr_clear_attach_props *props)
{
pco_data data = { 0 };
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_FRAGMENT,
pco_nir_options(),
"clear_attach(%s, %u dwords, %u offset)",
props->uses_tile_buffer ? "tiled" : "register",
props->dword_count,
props->offset);
assert(props->dword_count + props->offset <= 4);
if (props->uses_tile_buffer) {
nir_def *valid_mask = nir_load_savmsk_vm_pco(&b);
nir_def *tile_addr_lo =
nir_load_preamble(&b,
1,
32,
.base = PVR_CLEAR_ATTACH_DATA_TILE_ADDR_LO);
nir_def *tile_addr_hi =
nir_load_preamble(&b,
1,
32,
.base = PVR_CLEAR_ATTACH_DATA_TILE_ADDR_HI);
for (unsigned u = 0; u < props->dword_count; ++u) {
nir_def *tiled_offset =
nir_load_tiled_offset_pco(&b, .component = u + props->offset);
nir_def *addr =
nir_uadd64_32(&b, tile_addr_lo, tile_addr_hi, tiled_offset);
nir_def *data =
nir_load_preamble(&b,
1,
32,
.base = PVR_CLEAR_ATTACH_DATA_DWORD0 + u);
nir_def *addr_data = nir_vec3(&b,
nir_channel(&b, addr, 0),
nir_channel(&b, addr, 1),
data);
nir_dma_st_tiled_pco(&b, addr_data, valid_mask);
}
nir_dummy_load_store_pco(&b);
} else {
for (unsigned u = 0; u < props->dword_count; ++u) {
nir_def *data =
nir_load_preamble(&b,
1,
32,
.base = PVR_CLEAR_ATTACH_DATA_DWORD0 + u);
nir_frag_store_pco(&b, data, u + props->offset);
}
}
nir_jump(&b, nir_jump_return);
return build_shader(ctx, b.shader, &data);
}

View file

@ -13,6 +13,7 @@
* \brief USC internal shader generation header.
*/
#include "common/pvr_iface.h"
#include "compiler/shader_enums.h"
#include "pco/pco.h"
#include "pvr_private.h"
@ -49,4 +50,50 @@ pco_shader *pvr_uscgen_tq(pco_ctx *ctx,
pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op);
/* Clear attachment shader generation. */
struct pvr_clear_attach_props {
unsigned dword_count;
unsigned offset;
bool uses_tile_buffer;
};
pco_shader *pvr_uscgen_clear_attach(pco_ctx *ctx,
struct pvr_clear_attach_props *props);
#define INDEX(d_c, o, u_t_b, i) \
if (props->dword_count == d_c && props->offset == o && \
props->uses_tile_buffer == u_t_b) \
return i
inline static unsigned
pvr_uscgen_clear_attach_index(struct pvr_clear_attach_props *props)
{
INDEX(1, 0, false, 0);
INDEX(1, 1, false, 1);
INDEX(1, 2, false, 2);
INDEX(1, 3, false, 3);
INDEX(2, 0, false, 4);
INDEX(2, 1, false, 5);
INDEX(2, 2, false, 6);
INDEX(3, 0, false, 7);
INDEX(3, 1, false, 8);
INDEX(4, 0, false, 9);
INDEX(1, 0, true, 10);
INDEX(1, 1, true, 11);
INDEX(1, 2, true, 12);
INDEX(1, 3, true, 13);
INDEX(2, 0, true, 14);
INDEX(2, 1, true, 15);
INDEX(2, 2, true, 16);
INDEX(3, 0, true, 17);
INDEX(3, 1, true, 18);
INDEX(4, 0, true, 19);
UNREACHABLE("Invalid clear attachment shader properties.");
}
#undef INDEX
#define PVR_NUM_CLEAR_ATTACH_SHADERS 20U
#endif /* PVR_USC_H */

View file

@ -31,78 +31,6 @@
#include "util/bitscan.h"
#include "util/u_math.h"
/* ClearAttachments. */
enum pvr_clear_attachment_const {
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0 = 0, /* Don't change. Indexes array.
*/
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1 = 1, /* Don't change. Indexes array.
*/
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2 = 2, /* Don't change. Indexes array.
*/
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3 = 3, /* Don't change. Indexes array.
*/
PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER,
PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER,
PVR_CLEAR_ATTACHMENT_CONST_COUNT,
};
#define PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED (~0U)
/* 8 + 8 = 16 <- 1 Dword, 8 offsets, to registers/tile buffers
* 7 + 7 = 14 <- 2 Dwords, 7 offsets, to registers/tile buffers
* 6 + 6 = 12 <- 3 Dwords, 6 offsets, to registers/tile buffers
* 5 + 5 = 10 <- 4 Dwords, 5 offsets, to registers/tile buffers
*/
#define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT 52
/* This defines the max theoretic number of clear attachment programs. In cases
* where the dword count goes past the number of on-chip on-tile-buffer targets
* there are unused elements. There are 4 versions for clearing 1..4 dwords, 8
* versions for clearing offsets 0..7 and 2 versions for clearing either on
* chip or in memory calculated as 4 * 8 * 2 = 64.
*/
#define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES 64
/**
* \brief Returns the index of the clear attachment USC program.
*
* For shaders which use output registers "dword_count" is essentially the
* count of output registers to use, and "offset" is the first output reg to
* use. E.g. dword_count 3, offset 1, will use o1, o2, o3.
*
* For shaders which use tile buffers as the destination "dword_count" is the
* the amount of dwords to write to the tile buffer and "offset" is the offset
* at which to start writing at.
*/
static inline uint32_t
pvr_get_clear_attachment_program_index(uint32_t dword_count,
uint32_t offset,
bool uses_tile_buffer)
{
/* dest - Clear on chip or in memory.
* offset - Clear offset 0..7 .
* dword_count - Clear from 1..4 dwords.
*/
const uint32_t dest_start = 0;
const uint32_t dest_end = 0;
const uint32_t offset_start = 1;
const uint32_t offset_end = 3;
const uint32_t dword_count_start = 4;
const uint32_t dword_count_end = 5;
uint32_t idx = 0;
dword_count -= 1;
idx |= util_bitpack_uint(uses_tile_buffer, dest_start, dest_end);
idx |= util_bitpack_uint(offset, offset_start, offset_end);
idx |= util_bitpack_uint(dword_count, dword_count_start, dword_count_end);
return idx;
}
enum pvr_spm_load_const {
SPM_LOAD_CONST_TILE_BUFFER_1_UPPER,
SPM_LOAD_CONST_TILE_BUFFER_1_LOWER,

File diff suppressed because it is too large Load diff