mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 10:40:11 +01:00
pvr, pco: switch to usc generated clear attachment shaders
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37439>
This commit is contained in:
parent
6dd0a5ee2d
commit
c2127bf4f7
10 changed files with 272 additions and 1764 deletions
|
|
@ -2765,8 +2765,17 @@ intrinsic("load_vtxin_pco", src_comp=[1], dest_comp=0, bit_sizes=[32])
|
|||
# load_coeff_pco(offset)
|
||||
intrinsic("load_coeff_pco", src_comp=[1], dest_comp=0, bit_sizes=[32])
|
||||
|
||||
# frag_store_pco(data, offset/base)
|
||||
intrinsic("frag_store_pco", src_comp=[1], indices=[BASE], bit_sizes=[32])
|
||||
|
||||
# dma_ld_pco(address)
|
||||
intrinsic("dma_ld_pco", src_comp=[2], dest_comp=0, flags=[CAN_ELIMINATE], bit_sizes=[32])
|
||||
|
||||
# dma_st_pco(address_data)
|
||||
intrinsic("dma_st_pco", src_comp=[0], bit_sizes=[32])
|
||||
|
||||
# dma_st_tiled_pco(address_data, valid_mask)
|
||||
intrinsic("dma_st_tiled_pco", src_comp=[3, 1], bit_sizes=[32])
|
||||
|
||||
# load_tiled_offset_pco(component, is_store)
|
||||
intrinsic("load_tiled_offset_pco", dest_comp=1, indices=[COMPONENT, FLAGS], bit_sizes=[32])
|
||||
|
|
|
|||
|
|
@ -58,4 +58,17 @@ enum pvr_query_reset_data {
|
|||
_PVR_QUERY_RESET_DATA_COUNT,
|
||||
};
|
||||
|
||||
/** Clear attachment shader data; shared registers. */
|
||||
enum pvr_clear_attach_data {
|
||||
PVR_CLEAR_ATTACH_DATA_DWORD0,
|
||||
PVR_CLEAR_ATTACH_DATA_DWORD1,
|
||||
PVR_CLEAR_ATTACH_DATA_DWORD2,
|
||||
PVR_CLEAR_ATTACH_DATA_DWORD3,
|
||||
|
||||
PVR_CLEAR_ATTACH_DATA_TILE_ADDR_LO,
|
||||
PVR_CLEAR_ATTACH_DATA_TILE_ADDR_HI,
|
||||
|
||||
_PVR_CLEAR_ATTACH_DATA_COUNT,
|
||||
};
|
||||
|
||||
#endif /* PVR_IFACE_H */
|
||||
|
|
|
|||
|
|
@ -280,6 +280,24 @@ static pco_instr *trans_load_reg(trans_ctx *tctx,
|
|||
return pco_mov(&tctx->b, dest, src, .rpt = chans);
|
||||
}
|
||||
|
||||
static pco_instr *trans_load_tiled_offset(trans_ctx *tctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
pco_ref dest)
|
||||
{
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
bool store = !!nir_intrinsic_flags(intr);
|
||||
|
||||
unsigned base0 = store ? PCO_SR_TILED_ST_COMP0 : PCO_SR_TILED_LD_COMP0;
|
||||
unsigned base4 = store ? PCO_SR_TILED_ST_COMP4 : PCO_SR_TILED_LD_COMP4;
|
||||
|
||||
unsigned sr_index = component < 4 ? component + base0
|
||||
: component + base4 - 4;
|
||||
|
||||
pco_ref tiled_offset = pco_ref_hwreg(sr_index, PCO_REG_CLASS_SPEC);
|
||||
|
||||
return pco_mov(&tctx->b, dest, tiled_offset, .olchk = tctx->olchk);
|
||||
}
|
||||
|
||||
static inline pco_instr *build_itr(pco_builder *b,
|
||||
pco_ref dest,
|
||||
enum pco_drc drc,
|
||||
|
|
@ -1699,6 +1717,18 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
instr = trans_load_reg(tctx, intr, dest, src[0], PCO_REG_CLASS_COEFF);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_tiled_offset_pco:
|
||||
instr = trans_load_tiled_offset(tctx, intr, dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_frag_store_pco: {
|
||||
unsigned base = nir_intrinsic_base(intr);
|
||||
|
||||
pco_ref dest = pco_ref_hwreg(base, PCO_REG_CLASS_PIXOUT);
|
||||
instr = pco_mov(&tctx->b, dest, src[0], .olchk = tctx->olchk);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_output:
|
||||
assert(tctx->stage == MESA_SHADER_FRAGMENT);
|
||||
instr = trans_load_output_fs(tctx, intr, dest);
|
||||
|
|
@ -1830,6 +1860,25 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_dma_st_tiled_pco: {
|
||||
unsigned chans = pco_ref_get_chans(src[0]) - 2;
|
||||
|
||||
pco_ref data_comp =
|
||||
pco_ref_new_ssa(tctx->func, pco_ref_get_bits(src[0]), chans);
|
||||
pco_comp(&tctx->b, data_comp, src[0], pco_ref_val16(2));
|
||||
|
||||
instr = pco_st_tiled(&tctx->b,
|
||||
data_comp,
|
||||
pco_ref_imm8(PCO_DSIZE_32BIT),
|
||||
pco_ref_drc(PCO_DRC_0),
|
||||
pco_ref_imm8(chans),
|
||||
src[0],
|
||||
src[1],
|
||||
.olchk = tctx->olchk);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Vertex sysvals. */
|
||||
case nir_intrinsic_load_vertex_id:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
|
|
|
|||
|
|
@ -33,9 +33,8 @@
|
|||
#include "pvr_formats.h"
|
||||
#include "pvr_job_transfer.h"
|
||||
#include "pvr_private.h"
|
||||
#include "usc/programs/pvr_shader_factory.h"
|
||||
#include "usc/programs/pvr_static_shaders.h"
|
||||
#include "pvr_types.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/list.h"
|
||||
#include "util/macros.h"
|
||||
|
|
@ -1513,75 +1512,45 @@ pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
|
|||
|
||||
static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
|
||||
struct pvr_cmd_buffer *cmd_buffer,
|
||||
const struct pvr_shader_factory_info *shader_info,
|
||||
const struct pvr_clear_attach_props *props,
|
||||
const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
|
||||
ASSERTED bool uses_tile_buffer,
|
||||
bool uses_tile_buffer,
|
||||
uint32_t tile_buffer_idx,
|
||||
struct pvr_suballoc_bo **const const_shareds_buffer_out)
|
||||
{
|
||||
struct pvr_device *device = cmd_buffer->device;
|
||||
struct pvr_suballoc_bo *const_shareds_buffer;
|
||||
struct pvr_bo *tile_buffer;
|
||||
uint64_t tile_dev_addr;
|
||||
uint64_t tile_dev_addr = 0;
|
||||
uint32_t *buffer;
|
||||
VkResult result;
|
||||
|
||||
/* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
|
||||
* Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
|
||||
*/
|
||||
result =
|
||||
pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
device->heaps.general_heap,
|
||||
PVR_DW_TO_BYTES(shader_info->const_shared_regs),
|
||||
&const_shareds_buffer);
|
||||
/* TODO: only allocate what's needed, not always
|
||||
* _PVR_CLEAR_ATTACH_DATA_COUNT? */
|
||||
result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
|
||||
device->heaps.general_heap,
|
||||
_PVR_CLEAR_ATTACH_DATA_COUNT,
|
||||
&const_shareds_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
|
||||
|
||||
for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
|
||||
uint32_t dest_idx = shader_info->driver_const_location_map[i];
|
||||
buffer[PVR_CLEAR_ATTACH_DATA_DWORD0] = clear_color[0];
|
||||
buffer[PVR_CLEAR_ATTACH_DATA_DWORD1] = clear_color[1];
|
||||
buffer[PVR_CLEAR_ATTACH_DATA_DWORD2] = clear_color[2];
|
||||
buffer[PVR_CLEAR_ATTACH_DATA_DWORD3] = clear_color[3];
|
||||
|
||||
if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
|
||||
continue;
|
||||
|
||||
assert(dest_idx < shader_info->const_shared_regs);
|
||||
|
||||
switch (i) {
|
||||
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
|
||||
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
|
||||
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
|
||||
case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
|
||||
buffer[dest_idx] = clear_color[i];
|
||||
break;
|
||||
|
||||
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
|
||||
assert(uses_tile_buffer);
|
||||
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
||||
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
||||
buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
|
||||
break;
|
||||
|
||||
case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
|
||||
assert(uses_tile_buffer);
|
||||
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
||||
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
||||
buffer[dest_idx] = (uint32_t)tile_dev_addr;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported clear attachment const type.");
|
||||
}
|
||||
if (uses_tile_buffer) {
|
||||
tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
|
||||
tile_dev_addr = tile_buffer->vma->dev_addr.addr;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
|
||||
const struct pvr_static_buffer *static_buff =
|
||||
&shader_info->static_const_buffer[i];
|
||||
|
||||
assert(static_buff->dst_idx < shader_info->const_shared_regs);
|
||||
|
||||
buffer[static_buff->dst_idx] = static_buff->value;
|
||||
}
|
||||
buffer[PVR_CLEAR_ATTACH_DATA_TILE_ADDR_LO] = tile_dev_addr & 0xffffffff;
|
||||
buffer[PVR_CLEAR_ATTACH_DATA_TILE_ADDR_HI] = tile_dev_addr >> 32;
|
||||
|
||||
*const_shareds_buffer_out = const_shareds_buffer;
|
||||
|
||||
|
|
@ -1608,7 +1577,6 @@ static VkResult pvr_clear_color_attachment_static(
|
|||
const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
|
||||
struct pvr_pds_pixel_shader_sa_program texture_program;
|
||||
uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
|
||||
const struct pvr_shader_factory_info *shader_info;
|
||||
struct pvr_suballoc_bo *pds_texture_program_bo;
|
||||
struct pvr_static_clear_ppp_template template;
|
||||
struct pvr_suballoc_bo *const_shareds_buffer;
|
||||
|
|
@ -1633,15 +1601,17 @@ static VkResult pvr_clear_color_attachment_static(
|
|||
|
||||
assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
|
||||
|
||||
program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
|
||||
output_offset,
|
||||
uses_tile_buffer);
|
||||
struct pvr_clear_attach_props props = {
|
||||
.dword_count = out_reg_count,
|
||||
.offset = output_offset,
|
||||
.uses_tile_buffer = uses_tile_buffer,
|
||||
};
|
||||
|
||||
shader_info = clear_attachment_collection[program_idx].info;
|
||||
program_idx = pvr_uscgen_clear_attach_index(&props);
|
||||
|
||||
result = pvr_clear_color_attachment_static_create_consts_buffer(
|
||||
cmd_buffer,
|
||||
shader_info,
|
||||
&props,
|
||||
clear_color,
|
||||
uses_tile_buffer,
|
||||
tile_buffer_idx,
|
||||
|
|
@ -1649,20 +1619,18 @@ static VkResult pvr_clear_color_attachment_static(
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* clang-format off */
|
||||
texture_program = (struct pvr_pds_pixel_shader_sa_program){
|
||||
.num_texture_dma_kicks = 1,
|
||||
.texture_dma_address = {
|
||||
[0] = const_shareds_buffer->dev_addr.addr,
|
||||
}
|
||||
};
|
||||
/* clang-format on */
|
||||
texture_program =
|
||||
(struct pvr_pds_pixel_shader_sa_program){ .num_texture_dma_kicks = 1,
|
||||
.texture_dma_address = {
|
||||
[0] = const_shareds_buffer
|
||||
->dev_addr.addr,
|
||||
} };
|
||||
|
||||
pvr_csb_pack (&texture_program.texture_dma_control[0],
|
||||
PDSINST_DOUT_FIELDS_DOUTD_SRC1,
|
||||
doutd_src1) {
|
||||
doutd_src1.dest = ROGUE_PDSINST_DOUTD_DEST_COMMON_STORE;
|
||||
doutd_src1.bsize = shader_info->const_shared_regs;
|
||||
doutd_src1.bsize = _PVR_CLEAR_ATTACH_DATA_COUNT;
|
||||
}
|
||||
|
||||
clear_attachment_program =
|
||||
|
|
@ -1720,7 +1688,7 @@ static VkResult pvr_clear_color_attachment_static(
|
|||
TA_STATE_PDS_SIZEINFO2,
|
||||
sizeinfo2) {
|
||||
sizeinfo2.usc_sharedsize =
|
||||
DIV_ROUND_UP(shader_info->const_shared_regs,
|
||||
DIV_ROUND_UP(_PVR_CLEAR_ATTACH_DATA_COUNT,
|
||||
ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,8 +30,7 @@
|
|||
#include "pvr_clear.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "usc/programs/pvr_shader_factory.h"
|
||||
#include "usc/programs/pvr_static_shaders.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "pvr_types.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_log.h"
|
||||
|
|
@ -244,43 +243,35 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
|
|||
struct pvr_device_static_clear_state *clear_state =
|
||||
&device->static_clear_state;
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
uint32_t pds_texture_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
|
||||
uint32_t pds_pixel_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
|
||||
uint32_t usc_program_offsets[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT];
|
||||
uint32_t pds_texture_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
||||
uint32_t pds_pixel_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
||||
uint32_t usc_program_offsets[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
||||
pco_shader *shaders[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
||||
struct pvr_clear_attach_props props;
|
||||
uint64_t usc_upload_offset;
|
||||
uint64_t pds_upload_offset;
|
||||
uint32_t alloc_size = 0;
|
||||
VkResult result;
|
||||
uint8_t *ptr;
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
uint32_t clear_attachment_info_count = 0;
|
||||
/* Build and upload USC fragment shaders. */
|
||||
for (unsigned dword_count = 1; dword_count <= 4; ++dword_count) {
|
||||
for (unsigned offset = 0; offset <= 3; ++offset) {
|
||||
for (unsigned uses_tile_buffer = 0; uses_tile_buffer <= 1;
|
||||
++uses_tile_buffer) {
|
||||
if (dword_count + offset > 4)
|
||||
continue;
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(clear_attachment_collection); i++) {
|
||||
if (!clear_attachment_collection[i].info)
|
||||
continue;
|
||||
props.dword_count = dword_count;
|
||||
props.offset = offset;
|
||||
props.uses_tile_buffer = uses_tile_buffer;
|
||||
|
||||
clear_attachment_info_count++;
|
||||
}
|
||||
|
||||
assert(clear_attachment_info_count == PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT);
|
||||
#endif
|
||||
|
||||
/* Upload USC fragment shaders. */
|
||||
|
||||
for (uint32_t i = 0, offset_idx = 0;
|
||||
i < ARRAY_SIZE(clear_attachment_collection);
|
||||
i++) {
|
||||
if (!clear_attachment_collection[i].info)
|
||||
continue;
|
||||
|
||||
usc_program_offsets[offset_idx] = alloc_size;
|
||||
/* TODO: The compiler will likely give us a pre-aligned size for the USC
|
||||
* shader so don't bother aligning here when it's hooked up.
|
||||
*/
|
||||
alloc_size += ALIGN_POT(clear_attachment_collection[i].size, 4);
|
||||
|
||||
offset_idx++;
|
||||
unsigned u = pvr_uscgen_clear_attach_index(&props);
|
||||
shaders[u] =
|
||||
pvr_uscgen_clear_attach(device->pdevice->pco_ctx, &props);
|
||||
alloc_size += pco_shader_binary_size(shaders[u]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result = pvr_bo_suballoc(&device->suballoc_usc,
|
||||
|
|
@ -294,55 +285,46 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
|
|||
usc_upload_offset =
|
||||
clear_state->usc_clear_attachment_programs->dev_addr.addr -
|
||||
device->heaps.usc_heap->base_addr.addr;
|
||||
|
||||
ptr = (uint8_t *)pvr_bo_suballoc_get_map_addr(
|
||||
clear_state->usc_clear_attachment_programs);
|
||||
|
||||
for (uint32_t i = 0, offset_idx = 0;
|
||||
i < ARRAY_SIZE(clear_attachment_collection);
|
||||
i++) {
|
||||
if (!clear_attachment_collection[i].info)
|
||||
continue;
|
||||
unsigned offset = 0;
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
||||
unsigned shader_size = pco_shader_binary_size(shaders[u]);
|
||||
|
||||
memcpy(ptr + usc_program_offsets[offset_idx],
|
||||
clear_attachment_collection[i].code,
|
||||
clear_attachment_collection[i].size);
|
||||
usc_program_offsets[u] = offset;
|
||||
memcpy(&ptr[offset], pco_shader_binary_data(shaders[u]), shader_size);
|
||||
|
||||
offset_idx++;
|
||||
offset += shader_size;
|
||||
}
|
||||
|
||||
/* Upload PDS programs. */
|
||||
|
||||
alloc_size = 0;
|
||||
|
||||
for (uint32_t i = 0, offset_idx = 0;
|
||||
i < ARRAY_SIZE(clear_attachment_collection);
|
||||
i++) {
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
||||
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
|
||||
struct pvr_pds_kickusc_program pixel_shader_pds_program;
|
||||
uint32_t program_size;
|
||||
|
||||
if (!clear_attachment_collection[i].info)
|
||||
continue;
|
||||
|
||||
/* Texture program to load colors. */
|
||||
|
||||
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
|
||||
.num_texture_dma_kicks = 1,
|
||||
};
|
||||
|
||||
pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&texture_pds_program);
|
||||
|
||||
pds_texture_program_offsets[offset_idx] = alloc_size;
|
||||
pds_texture_program_offsets[u] = alloc_size;
|
||||
alloc_size += ALIGN_POT(PVR_DW_TO_BYTES(texture_pds_program.code_size),
|
||||
pds_prog_alignment);
|
||||
|
||||
/* Pixel program to load fragment shader. */
|
||||
|
||||
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
|
||||
|
||||
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
|
||||
usc_upload_offset + usc_program_offsets[offset_idx],
|
||||
clear_attachment_collection[i].info->temps_required,
|
||||
usc_upload_offset + usc_program_offsets[u],
|
||||
pco_shader_data(shaders[u])->common.temps,
|
||||
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
||||
false);
|
||||
|
||||
|
|
@ -352,10 +334,8 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
|
|||
pixel_shader_pds_program.data_size;
|
||||
program_size = PVR_DW_TO_BYTES(program_size);
|
||||
|
||||
pds_pixel_program_offsets[offset_idx] = alloc_size;
|
||||
pds_pixel_program_offsets[u] = alloc_size;
|
||||
alloc_size += ALIGN_POT(program_size, pds_prog_alignment);
|
||||
|
||||
offset_idx++;
|
||||
}
|
||||
|
||||
result = pvr_bo_suballoc(&device->suballoc_pds,
|
||||
|
|
@ -371,65 +351,55 @@ pvr_device_init_clear_attachment_programs(struct pvr_device *device)
|
|||
pds_upload_offset =
|
||||
clear_state->pds_clear_attachment_programs->dev_addr.addr -
|
||||
device->heaps.pds_heap->base_addr.addr;
|
||||
|
||||
ptr =
|
||||
pvr_bo_suballoc_get_map_addr(clear_state->pds_clear_attachment_programs);
|
||||
|
||||
for (uint32_t i = 0, offset_idx = 0;
|
||||
i < ARRAY_SIZE(clear_attachment_collection);
|
||||
i++) {
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u) {
|
||||
struct pvr_pds_pixel_shader_sa_program texture_pds_program;
|
||||
struct pvr_pds_kickusc_program pixel_shader_pds_program;
|
||||
|
||||
if (!clear_attachment_collection[i].info) {
|
||||
clear_state->pds_clear_attachment_program_info[i] =
|
||||
(struct pvr_pds_clear_attachment_program_info){ 0 };
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Texture program to load colors. */
|
||||
|
||||
texture_pds_program = (struct pvr_pds_pixel_shader_sa_program){
|
||||
.num_texture_dma_kicks = 1,
|
||||
};
|
||||
|
||||
pvr_pds_generate_pixel_shader_sa_code_segment(
|
||||
&texture_pds_program,
|
||||
(uint32_t *)(ptr + pds_texture_program_offsets[offset_idx]));
|
||||
(uint32_t *)(ptr + pds_texture_program_offsets[u]));
|
||||
|
||||
/* Pixel program to load fragment shader. */
|
||||
|
||||
pixel_shader_pds_program = (struct pvr_pds_kickusc_program){ 0 };
|
||||
|
||||
pvr_pds_setup_doutu(&pixel_shader_pds_program.usc_task_control,
|
||||
usc_upload_offset + usc_program_offsets[offset_idx],
|
||||
clear_attachment_collection[i].info->temps_required,
|
||||
usc_upload_offset + usc_program_offsets[u],
|
||||
pco_shader_data(shaders[u])->common.temps,
|
||||
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
||||
false);
|
||||
|
||||
pvr_pds_generate_pixel_shader_program(
|
||||
&pixel_shader_pds_program,
|
||||
(uint32_t *)(ptr + pds_pixel_program_offsets[offset_idx]));
|
||||
(uint32_t *)(ptr + pds_pixel_program_offsets[u]));
|
||||
|
||||
/* Setup the PDS program info. */
|
||||
|
||||
pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_pds_program,
|
||||
dev_info);
|
||||
|
||||
clear_state->pds_clear_attachment_program_info[i] =
|
||||
clear_state->pds_clear_attachment_program_info[u] =
|
||||
(struct pvr_pds_clear_attachment_program_info){
|
||||
.texture_program_offset = PVR_DEV_ADDR(
|
||||
pds_upload_offset + pds_texture_program_offsets[offset_idx]),
|
||||
.pixel_program_offset = PVR_DEV_ADDR(
|
||||
pds_upload_offset + pds_pixel_program_offsets[offset_idx]),
|
||||
.texture_program_offset =
|
||||
PVR_DEV_ADDR(pds_upload_offset + pds_texture_program_offsets[u]),
|
||||
.pixel_program_offset =
|
||||
PVR_DEV_ADDR(pds_upload_offset + pds_pixel_program_offsets[u]),
|
||||
|
||||
.texture_program_pds_temps_count = texture_pds_program.temps_used,
|
||||
.texture_program_data_size = texture_pds_program.data_size,
|
||||
};
|
||||
|
||||
offset_idx++;
|
||||
}
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(shaders); ++u)
|
||||
ralloc_free(shaders[u]);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@
|
|||
#include "pvr_job_render.h"
|
||||
#include "pvr_limits.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "usc/programs/pvr_shader_factory.h"
|
||||
#include "pvr_spm.h"
|
||||
#include "pvr_types.h"
|
||||
|
|
@ -252,8 +253,7 @@ struct pvr_device {
|
|||
uint32_t texture_program_pds_temps_count;
|
||||
/* Size in dwords. */
|
||||
uint32_t texture_program_data_size;
|
||||
} pds_clear_attachment_program_info
|
||||
[PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES];
|
||||
} pds_clear_attachment_program_info[PVR_NUM_CLEAR_ATTACH_SHADERS];
|
||||
} static_clear_state;
|
||||
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -16,10 +16,12 @@
|
|||
#include "nir/nir_format_convert.h"
|
||||
#include "nir/nir_conversion_builder.h"
|
||||
#include "pco/pco.h"
|
||||
#include "pco/pco_common.h"
|
||||
#include "pco/pco_data.h"
|
||||
#include "pco_uscgen_programs.h"
|
||||
#include "pvr_common.h"
|
||||
#include "pvr_formats.h"
|
||||
#include "pvr_private.h"
|
||||
#include "pvr_usc.h"
|
||||
#include "usc/pvr_uscgen.h"
|
||||
#include "util/macros.h"
|
||||
|
|
@ -1226,3 +1228,71 @@ pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op)
|
|||
|
||||
return build_shader(ctx, b.shader, &data);
|
||||
}
|
||||
|
||||
pco_shader *pvr_uscgen_clear_attach(pco_ctx *ctx,
|
||||
struct pvr_clear_attach_props *props)
|
||||
{
|
||||
pco_data data = { 0 };
|
||||
|
||||
nir_builder b = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_FRAGMENT,
|
||||
pco_nir_options(),
|
||||
"clear_attach(%s, %u dwords, %u offset)",
|
||||
props->uses_tile_buffer ? "tiled" : "register",
|
||||
props->dword_count,
|
||||
props->offset);
|
||||
|
||||
assert(props->dword_count + props->offset <= 4);
|
||||
|
||||
if (props->uses_tile_buffer) {
|
||||
nir_def *valid_mask = nir_load_savmsk_vm_pco(&b);
|
||||
|
||||
nir_def *tile_addr_lo =
|
||||
nir_load_preamble(&b,
|
||||
1,
|
||||
32,
|
||||
.base = PVR_CLEAR_ATTACH_DATA_TILE_ADDR_LO);
|
||||
nir_def *tile_addr_hi =
|
||||
nir_load_preamble(&b,
|
||||
1,
|
||||
32,
|
||||
.base = PVR_CLEAR_ATTACH_DATA_TILE_ADDR_HI);
|
||||
|
||||
for (unsigned u = 0; u < props->dword_count; ++u) {
|
||||
nir_def *tiled_offset =
|
||||
nir_load_tiled_offset_pco(&b, .component = u + props->offset);
|
||||
|
||||
nir_def *addr =
|
||||
nir_uadd64_32(&b, tile_addr_lo, tile_addr_hi, tiled_offset);
|
||||
|
||||
nir_def *data =
|
||||
nir_load_preamble(&b,
|
||||
1,
|
||||
32,
|
||||
.base = PVR_CLEAR_ATTACH_DATA_DWORD0 + u);
|
||||
|
||||
nir_def *addr_data = nir_vec3(&b,
|
||||
nir_channel(&b, addr, 0),
|
||||
nir_channel(&b, addr, 1),
|
||||
data);
|
||||
|
||||
nir_dma_st_tiled_pco(&b, addr_data, valid_mask);
|
||||
}
|
||||
|
||||
nir_dummy_load_store_pco(&b);
|
||||
} else {
|
||||
for (unsigned u = 0; u < props->dword_count; ++u) {
|
||||
nir_def *data =
|
||||
nir_load_preamble(&b,
|
||||
1,
|
||||
32,
|
||||
.base = PVR_CLEAR_ATTACH_DATA_DWORD0 + u);
|
||||
|
||||
nir_frag_store_pco(&b, data, u + props->offset);
|
||||
}
|
||||
}
|
||||
|
||||
nir_jump(&b, nir_jump_return);
|
||||
|
||||
return build_shader(ctx, b.shader, &data);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
* \brief USC internal shader generation header.
|
||||
*/
|
||||
|
||||
#include "common/pvr_iface.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "pco/pco.h"
|
||||
#include "pvr_private.h"
|
||||
|
|
@ -49,4 +50,50 @@ pco_shader *pvr_uscgen_tq(pco_ctx *ctx,
|
|||
|
||||
pco_shader *pvr_uscgen_loadop(pco_ctx *ctx, struct pvr_load_op *load_op);
|
||||
|
||||
/* Clear attachment shader generation. */
|
||||
struct pvr_clear_attach_props {
|
||||
unsigned dword_count;
|
||||
unsigned offset;
|
||||
bool uses_tile_buffer;
|
||||
};
|
||||
|
||||
pco_shader *pvr_uscgen_clear_attach(pco_ctx *ctx,
|
||||
struct pvr_clear_attach_props *props);
|
||||
|
||||
#define INDEX(d_c, o, u_t_b, i) \
|
||||
if (props->dword_count == d_c && props->offset == o && \
|
||||
props->uses_tile_buffer == u_t_b) \
|
||||
return i
|
||||
|
||||
inline static unsigned
|
||||
pvr_uscgen_clear_attach_index(struct pvr_clear_attach_props *props)
|
||||
{
|
||||
INDEX(1, 0, false, 0);
|
||||
INDEX(1, 1, false, 1);
|
||||
INDEX(1, 2, false, 2);
|
||||
INDEX(1, 3, false, 3);
|
||||
INDEX(2, 0, false, 4);
|
||||
INDEX(2, 1, false, 5);
|
||||
INDEX(2, 2, false, 6);
|
||||
INDEX(3, 0, false, 7);
|
||||
INDEX(3, 1, false, 8);
|
||||
INDEX(4, 0, false, 9);
|
||||
|
||||
INDEX(1, 0, true, 10);
|
||||
INDEX(1, 1, true, 11);
|
||||
INDEX(1, 2, true, 12);
|
||||
INDEX(1, 3, true, 13);
|
||||
INDEX(2, 0, true, 14);
|
||||
INDEX(2, 1, true, 15);
|
||||
INDEX(2, 2, true, 16);
|
||||
INDEX(3, 0, true, 17);
|
||||
INDEX(3, 1, true, 18);
|
||||
INDEX(4, 0, true, 19);
|
||||
|
||||
UNREACHABLE("Invalid clear attachment shader properties.");
|
||||
}
|
||||
#undef INDEX
|
||||
|
||||
#define PVR_NUM_CLEAR_ATTACH_SHADERS 20U
|
||||
|
||||
#endif /* PVR_USC_H */
|
||||
|
|
|
|||
|
|
@ -31,78 +31,6 @@
|
|||
#include "util/bitscan.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
/* ClearAttachments. */
|
||||
enum pvr_clear_attachment_const {
|
||||
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0 = 0, /* Don't change. Indexes array.
|
||||
*/
|
||||
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1 = 1, /* Don't change. Indexes array.
|
||||
*/
|
||||
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2 = 2, /* Don't change. Indexes array.
|
||||
*/
|
||||
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3 = 3, /* Don't change. Indexes array.
|
||||
*/
|
||||
PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER,
|
||||
PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER,
|
||||
PVR_CLEAR_ATTACHMENT_CONST_COUNT,
|
||||
};
|
||||
|
||||
#define PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED (~0U)
|
||||
|
||||
/* 8 + 8 = 16 <- 1 Dword, 8 offsets, to registers/tile buffers
|
||||
* 7 + 7 = 14 <- 2 Dwords, 7 offsets, to registers/tile buffers
|
||||
* 6 + 6 = 12 <- 3 Dwords, 6 offsets, to registers/tile buffers
|
||||
* 5 + 5 = 10 <- 4 Dwords, 5 offsets, to registers/tile buffers
|
||||
*/
|
||||
#define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT 52
|
||||
|
||||
/* This defines the max theoretic number of clear attachment programs. In cases
|
||||
* where the dword count goes past the number of on-chip on-tile-buffer targets
|
||||
* there are unused elements. There are 4 versions for clearing 1..4 dwords, 8
|
||||
* versions for clearing offsets 0..7 and 2 versions for clearing either on
|
||||
* chip or in memory calculated as 4 * 8 * 2 = 64.
|
||||
*/
|
||||
#define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES 64
|
||||
|
||||
/**
|
||||
* \brief Returns the index of the clear attachment USC program.
|
||||
*
|
||||
* For shaders which use output registers "dword_count" is essentially the
|
||||
* count of output registers to use, and "offset" is the first output reg to
|
||||
* use. E.g. dword_count 3, offset 1, will use o1, o2, o3.
|
||||
*
|
||||
* For shaders which use tile buffers as the destination "dword_count" is the
|
||||
* the amount of dwords to write to the tile buffer and "offset" is the offset
|
||||
* at which to start writing at.
|
||||
*/
|
||||
static inline uint32_t
|
||||
pvr_get_clear_attachment_program_index(uint32_t dword_count,
|
||||
uint32_t offset,
|
||||
bool uses_tile_buffer)
|
||||
{
|
||||
/* dest - Clear on chip or in memory.
|
||||
* offset - Clear offset 0..7 .
|
||||
* dword_count - Clear from 1..4 dwords.
|
||||
*/
|
||||
const uint32_t dest_start = 0;
|
||||
const uint32_t dest_end = 0;
|
||||
|
||||
const uint32_t offset_start = 1;
|
||||
const uint32_t offset_end = 3;
|
||||
|
||||
const uint32_t dword_count_start = 4;
|
||||
const uint32_t dword_count_end = 5;
|
||||
|
||||
uint32_t idx = 0;
|
||||
|
||||
dword_count -= 1;
|
||||
|
||||
idx |= util_bitpack_uint(uses_tile_buffer, dest_start, dest_end);
|
||||
idx |= util_bitpack_uint(offset, offset_start, offset_end);
|
||||
idx |= util_bitpack_uint(dword_count, dword_count_start, dword_count_end);
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
enum pvr_spm_load_const {
|
||||
SPM_LOAD_CONST_TILE_BUFFER_1_UPPER,
|
||||
SPM_LOAD_CONST_TILE_BUFFER_1_LOWER,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue