pvr: dynamically handle shademodel for flat shaded varyings

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-03-05 18:32:16 +00:00 committed by Marge Bot
parent 0c7dc07c93
commit 1b7954fae6
5 changed files with 100 additions and 44 deletions

View file

@ -4151,6 +4151,11 @@ uint32_t *pvr_pds_coefficient_loading(
/* Copy the USC task control words to constants. */
constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
if (BITSET_TEST(program->flat_iter_mask, iterator))
program->dout_src_offsets[iterator] = constant;
else
program->dout_src_offsets[iterator] = ~0u;
/* Write the first iterator. */
iterator_word =
(uint64_t)program->FPU_iterators[iterator]

View file

@ -29,6 +29,7 @@
#include "pvr_device_info.h"
#include "pvr_limits.h"
#include "pds/pvr_rogue_pds_defs.h"
#include "util/bitset.h"
#include "util/macros.h"
#ifdef __cplusplus
@ -268,6 +269,9 @@ struct pvr_pds_coeff_loading_program {
uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS];
uint32_t destination[PVR_MAXIMUM_ITERATIONS];
BITSET_DECLARE(flat_iter_mask, PVR_MAXIMUM_ITERATIONS);
uint32_t dout_src_offsets[PVR_MAXIMUM_ITERATIONS];
uint32_t data_size;
uint32_t code_size;

View file

@ -4925,7 +4925,58 @@ pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer,
}
}
static void
static VkResult
setup_pds_coeff_program(struct pvr_cmd_buffer *const cmd_buffer,
struct pvr_pds_upload *pds_coeff_program)
{
struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
const struct pvr_fragment_shader_state *const fragment_shader_state =
&state->gfx_pipeline->shader_state.fragment;
const struct vk_dynamic_graphics_state *const dynamic_state =
&cmd_buffer->vk.dynamic_graphics_state;
const VkPrimitiveTopology topology = dynamic_state->ia.primitive_topology;
const struct pvr_pds_coeff_loading_program *program =
&fragment_shader_state->pds_coeff_program;
uint32_t *pds_coeff_program_buffer =
fragment_shader_state->pds_coeff_program_buffer;
unsigned i;
memset(pds_coeff_program, 0, sizeof(*pds_coeff_program));
if (!pds_coeff_program_buffer)
return VK_SUCCESS;
BITSET_FOREACH_SET (i, program->flat_iter_mask, PVR_MAXIMUM_ITERATIONS) {
uint32_t off = program->dout_src_offsets[i];
assert(off != ~0u);
struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC douti_src;
ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_unpack(&pds_coeff_program_buffer[off],
&douti_src);
if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN)
douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_FLAT_VERTEX1;
else
douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_FLAT_VERTEX0;
ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&pds_coeff_program_buffer[off],
&douti_src);
}
/* FIXME: Figure out the define for alignment of 16. */
return pvr_cmd_buffer_upload_pds(
cmd_buffer,
&pds_coeff_program_buffer[0],
program->data_size,
16,
&pds_coeff_program_buffer[program->data_size],
program->code_size,
16,
16,
pds_coeff_program);
}
static VkResult
pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
struct pvr_sub_cmd_gfx *const sub_cmd)
{
@ -4938,8 +4989,12 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
&fragment_shader_state->descriptor_state;
const struct pvr_pipeline_stage_state *fragment_state =
&fragment_shader_state->stage_state;
const struct pvr_pds_upload *pds_coeff_program =
&fragment_shader_state->pds_coeff_program;
struct pvr_pds_upload pds_coeff_program;
VkResult result;
result = setup_pds_coeff_program(cmd_buffer, &pds_coeff_program);
if (result != VK_SUCCESS)
return result;
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
struct ROGUE_TA_STATE_HEADER *const header = &state->emit_header;
@ -4950,7 +5005,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE);
const uint32_t pds_varying_state_size =
DIV_ROUND_UP(pds_coeff_program->data_size,
DIV_ROUND_UP(pds_coeff_program.data_size,
ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE);
const uint32_t usc_varying_size =
@ -5018,13 +5073,13 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
ppp_state->pds.size_info2 |= size_info2;
if (pds_coeff_program->pvr_bo) {
if (pds_coeff_program.pvr_bo) {
header->pres_pds_state_ptr1 = true;
pvr_csb_pack (&ppp_state->pds.varying_base,
TA_STATE_PDS_VARYINGBASE,
base) {
base.addr = PVR_DEV_ADDR(pds_coeff_program->data_offset);
base.addr = PVR_DEV_ADDR(pds_coeff_program.data_offset);
}
} else {
ppp_state->pds.varying_base = 0U;
@ -5038,6 +5093,8 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
header->pres_pds_state_ptr0 = true;
header->pres_pds_state_ptr3 = true;
return result;
}
static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer)
@ -5540,7 +5597,9 @@ pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer,
state->dirty.fragment_descriptors &&
state->gfx_pipeline->shader_state.fragment.shader_bo &&
!state->gfx_pipeline->fs_data.common.uses.empty) {
pvr_setup_fragment_state_pointers(cmd_buffer, sub_cmd);
result = pvr_setup_fragment_state_pointers(cmd_buffer, sub_cmd);
if (result != VK_SUCCESS)
return result;
}
pvr_setup_isp_depth_bias_scissor_state(cmd_buffer);

View file

@ -69,12 +69,12 @@
static VkResult pvr_pds_coeff_program_create_and_upload(
struct pvr_device *device,
const VkAllocationCallbacks *allocator,
struct pvr_pds_coeff_loading_program *program,
struct pvr_fragment_shader_state *fragment_state)
{
struct pvr_pds_coeff_loading_program *program =
&fragment_state->pds_coeff_program;
uint32_t staging_buffer_size;
uint32_t *staging_buffer;
VkResult result;
assert(program->num_fpu_iterators < PVR_MAXIMUM_ITERATIONS);
@ -82,9 +82,7 @@ static VkResult pvr_pds_coeff_program_create_and_upload(
pvr_pds_coefficient_loading(program, NULL, PDS_GENERATE_SIZES);
if (!program->code_size) {
fragment_state->pds_coeff_program.pvr_bo = NULL;
fragment_state->pds_coeff_program.code_size = 0;
fragment_state->pds_coeff_program.data_size = 0;
fragment_state->pds_coeff_program_buffer = NULL;
fragment_state->stage_state.pds_temps_count = 0;
return VK_SUCCESS;
@ -106,23 +104,7 @@ static VkResult pvr_pds_coeff_program_create_and_upload(
staging_buffer,
PDS_GENERATE_CODEDATA_SEGMENTS);
/* FIXME: Figure out the define for alignment of 16. */
result = pvr_gpu_upload_pds(device,
&staging_buffer[0],
program->data_size,
16,
&staging_buffer[program->data_size],
program->code_size,
16,
16,
&fragment_state->pds_coeff_program);
if (result != VK_SUCCESS) {
vk_free2(&device->vk.alloc, allocator, staging_buffer);
return result;
}
vk_free2(&device->vk.alloc, allocator, staging_buffer);
fragment_state->pds_coeff_program_buffer = staging_buffer;
fragment_state->stage_state.pds_temps_count = program->temps_used;
return VK_SUCCESS;
@ -1178,6 +1160,8 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device,
{
const uint32_t num_vertex_attrib_programs =
ARRAY_SIZE(gfx_pipeline->shader_state.vertex.pds_attrib_programs);
struct pvr_fragment_shader_state *fragment_state =
&gfx_pipeline->shader_state.fragment;
pvr_pds_descriptor_program_destroy(
device,
@ -1198,8 +1182,9 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device,
pvr_bo_suballoc_free(
gfx_pipeline->shader_state.fragment.pds_fragment_program.pvr_bo);
pvr_bo_suballoc_free(
gfx_pipeline->shader_state.fragment.pds_coeff_program.pvr_bo);
vk_free2(&device->vk.alloc,
allocator,
fragment_state->pds_coeff_program_buffer);
pvr_bo_suballoc_free(gfx_pipeline->shader_state.fragment.shader_bo);
pvr_bo_suballoc_free(gfx_pipeline->shader_state.vertex.shader_bo);
@ -1384,9 +1369,13 @@ static void pvr_graphics_pipeline_setup_vertex_dma(
static void pvr_graphics_pipeline_setup_fragment_coeff_program(
struct pvr_graphics_pipeline *gfx_pipeline,
nir_shader *fs,
struct pvr_pds_coeff_loading_program *frag_coeff_program)
nir_shader *fs)
{
struct pvr_fragment_shader_state *fragment_state =
&gfx_pipeline->shader_state.fragment;
struct pvr_pds_coeff_loading_program *frag_coeff_program =
&fragment_state->pds_coeff_program;
uint64_t varyings_used = fs->info.inputs_read &
BITFIELD64_RANGE(VARYING_SLOT_VAR0, MAX_VARYING);
pco_vs_data *vs_data = &gfx_pipeline->vs_data.vs;
@ -1478,17 +1467,16 @@ static void pvr_graphics_pipeline_setup_fragment_coeff_program(
switch (var->data.interpolation) {
case INTERP_MODE_SMOOTH:
douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
douti_src.perspective = true;
break;
FALLTHROUGH;
case INTERP_MODE_NOPERSPECTIVE:
douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
break;
case INTERP_MODE_FLAT:
/* TODO: triangle fan, provoking vertex last. */
douti_src.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_FLAT_VERTEX0;
/* Shademodel will be set up later for flat. */
BITSET_SET(frag_coeff_program->flat_iter_mask, fpu);
break;
default:
@ -2294,8 +2282,6 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
struct pvr_pds_vertex_dma vtx_dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS];
uint32_t vtx_dma_count = 0;
struct pvr_pds_coeff_loading_program frag_coeff_program = { 0 };
for (mesa_shader_stage stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
size_t stage_index = gfx_pipeline->stage_indices[stage];
@ -2398,8 +2384,7 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
pvr_graphics_pipeline_setup_fragment_coeff_program(
gfx_pipeline,
nir_shaders[MESA_SHADER_FRAGMENT],
&frag_coeff_program);
nir_shaders[MESA_SHADER_FRAGMENT]);
result = pvr_gpu_upload_usc(device,
pco_shader_binary_data(*fs),
@ -2411,7 +2396,6 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
result = pvr_pds_coeff_program_create_and_upload(device,
allocator,
&frag_coeff_program,
fragment_state);
if (result != VK_SUCCESS)
goto err_free_fragment_bo;
@ -2485,7 +2469,9 @@ err_free_frag_descriptor_program:
err_free_frag_program:
pvr_bo_suballoc_free(fragment_state->pds_fragment_program.pvr_bo);
err_free_coeff_program:
pvr_bo_suballoc_free(fragment_state->pds_coeff_program.pvr_bo);
vk_free2(&device->vk.alloc,
allocator,
fragment_state->pds_coeff_program_buffer);
err_free_fragment_bo:
pvr_bo_suballoc_free(fragment_state->shader_bo);
err_free_vertex_bo:

View file

@ -886,7 +886,9 @@ struct pvr_fragment_shader_state {
enum ROGUE_TA_PASSTYPE pass_type;
enum ROGUE_PDSINST_DOUTU_SAMPLE_RATE sample_rate;
struct pvr_pds_upload pds_coeff_program;
struct pvr_pds_coeff_loading_program pds_coeff_program;
uint32_t *pds_coeff_program_buffer;
struct pvr_pds_upload pds_fragment_program;
};