mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
pvr, pco: add base compute support
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
parent
ad2b623744
commit
4f79bc2e30
12 changed files with 778 additions and 421 deletions
|
|
@ -16,6 +16,7 @@ libpowervr_compiler_files = files(
|
|||
'pco_ir.c',
|
||||
'pco_legalize.c',
|
||||
'pco_nir.c',
|
||||
'pco_nir_compute.c',
|
||||
'pco_nir_pvfio.c',
|
||||
'pco_nir_vk.c',
|
||||
'pco_opt.c',
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ typedef struct _pco_fs_data {
|
|||
|
||||
/** PCO compute shader-specific data. */
|
||||
typedef struct _pco_cs_data {
|
||||
/**/
|
||||
unsigned workgroup_size[3]; /** Workgroup size. */
|
||||
} pco_cs_data;
|
||||
|
||||
/** PCO descriptor binding data. */
|
||||
|
|
|
|||
|
|
@ -1531,6 +1531,7 @@ bool pco_end(pco_shader *shader);
|
|||
bool pco_group_instrs(pco_shader *shader);
|
||||
bool pco_index(pco_shader *shader, bool skip_ssa);
|
||||
bool pco_legalize(pco_shader *shader);
|
||||
bool pco_nir_compute_instance_check(nir_shader *shader);
|
||||
bool pco_nir_lower_algebraic(nir_shader *shader);
|
||||
bool pco_nir_lower_algebraic_late(nir_shader *shader);
|
||||
bool pco_nir_lower_vk(nir_shader *shader, pco_common_data *common);
|
||||
|
|
|
|||
|
|
@ -81,6 +81,9 @@ const nir_shader_compiler_options *pco_nir_options(void)
|
|||
*/
|
||||
void pco_preprocess_nir(pco_ctx *ctx, nir_shader *nir)
|
||||
{
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE)
|
||||
NIR_PASS(_, nir, pco_nir_compute_instance_check);
|
||||
|
||||
if (nir->info.internal)
|
||||
NIR_PASS(_, nir, nir_lower_returns);
|
||||
|
||||
|
|
@ -108,6 +111,15 @@ void pco_preprocess_nir(pco_ctx *ctx, nir_shader *nir)
|
|||
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE) {
|
||||
NIR_PASS(_,
|
||||
nir,
|
||||
nir_lower_compute_system_values,
|
||||
&(nir_lower_compute_system_values_options){
|
||||
.lower_cs_local_id_to_index = true,
|
||||
});
|
||||
}
|
||||
|
||||
NIR_PASS(_,
|
||||
nir,
|
||||
nir_lower_io_vars_to_temporaries,
|
||||
|
|
@ -396,6 +408,18 @@ static void gather_fs_data(nir_shader *nir, pco_data *data)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Gathers compute shader data.
|
||||
*
|
||||
* \param[in] nir NIR shader.
|
||||
* \param[in,out] data Shader data.
|
||||
*/
|
||||
static void gather_cs_data(nir_shader *nir, pco_data *data)
|
||||
{
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(data->cs.workgroup_size); ++u)
|
||||
data->cs.workgroup_size[u] = nir->info.workgroup_size[u];
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Checks whether a NIR intrinsic op is atomic.
|
||||
*
|
||||
|
|
@ -462,12 +486,16 @@ static void gather_data(nir_shader *nir, pco_data *data)
|
|||
return gather_fs_data(nir, data);
|
||||
|
||||
case MESA_SHADER_VERTEX:
|
||||
/* TODO */
|
||||
break;
|
||||
return;
|
||||
|
||||
case MESA_SHADER_COMPUTE:
|
||||
return gather_cs_data(nir, data);
|
||||
|
||||
default:
|
||||
UNREACHABLE("");
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
98
src/imagination/pco/pco_nir_compute.c
Normal file
98
src/imagination/pco/pco_nir_compute.c
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright © 2025 Imagination Technologies Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file pco_nir_compute.c
|
||||
*
|
||||
* \brief PCO NIR compute-specific passes.
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "pco.h"
|
||||
#include "pco_builder.h"
|
||||
#include "pco_internal.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define INST_CHK_FUNC "@pco_inst_chk"
|
||||
|
||||
/**
|
||||
* \brief Inserts the instance check.
|
||||
*
|
||||
* \param[in,out] shader NIR shader.
|
||||
*/
|
||||
static void insert_instance_check(nir_shader *shader)
|
||||
{
|
||||
/* Get original entrypoint. */
|
||||
nir_function *orig_entrypoint = nir_shader_get_entrypoint(shader)->function;
|
||||
|
||||
/* Create a function for the instance check which will serve as the new
|
||||
* entrypoint.
|
||||
*/
|
||||
nir_function *inst_chk_func = nir_function_create(shader, INST_CHK_FUNC);
|
||||
|
||||
inst_chk_func->is_entrypoint = true;
|
||||
orig_entrypoint->is_entrypoint = false;
|
||||
|
||||
nir_builder b = nir_builder_create(nir_function_impl_create(inst_chk_func));
|
||||
b.cursor = nir_after_cf_list(&b.impl->body);
|
||||
|
||||
/* If the current instance index is greater than the total workgroup size,
|
||||
* we don't execute.
|
||||
*/
|
||||
nir_def *local_size = nir_load_workgroup_size(&b);
|
||||
nir_def *size_x = nir_channel(&b, local_size, 0);
|
||||
nir_def *size_y = nir_channel(&b, local_size, 1);
|
||||
nir_def *size_z = nir_channel(&b, local_size, 2);
|
||||
nir_def *flat_size = nir_imul(&b, nir_imul(&b, size_x, size_y), size_z);
|
||||
|
||||
nir_def *flat_id = nir_load_local_invocation_index(&b);
|
||||
|
||||
nir_def *cond_inst_valid = nir_ilt(&b, flat_id, flat_size);
|
||||
nir_if *nif = nir_push_if(&b, cond_inst_valid);
|
||||
{
|
||||
nir_call(&b, orig_entrypoint);
|
||||
}
|
||||
nir_pop_if(&b, nif);
|
||||
nir_jump(&b, nir_jump_return);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Inserts an instance check for compute shaders.
|
||||
*
|
||||
* \param[in,out] shader NIR shader.
|
||||
* \return True if the pass made progress.
|
||||
*/
|
||||
bool pco_nir_compute_instance_check(nir_shader *shader)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_COMPUTE);
|
||||
|
||||
if (shader->info.internal)
|
||||
return false;
|
||||
|
||||
/* Check we haven't already done this. */
|
||||
nir_foreach_function (function, shader) {
|
||||
if (function->name && !strcmp(function->name, INST_CHK_FUNC))
|
||||
return false;
|
||||
}
|
||||
|
||||
insert_instance_check(shader);
|
||||
|
||||
/* Re-inline. */
|
||||
NIR_PASS(_, shader, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS(_, shader, nir_lower_returns);
|
||||
NIR_PASS(_, shader, nir_inline_functions);
|
||||
NIR_PASS(_, shader, nir_copy_prop);
|
||||
NIR_PASS(_, shader, nir_opt_deref);
|
||||
nir_remove_non_entrypoints(shader);
|
||||
NIR_PASS(_, shader, nir_lower_variable_initializers, ~0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -630,8 +630,36 @@ static pco_instr *trans_atomic_buffer(trans_ctx *tctx,
|
|||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
static inline enum pco_reg_class sys_val_to_reg_class(gl_system_value sys_val,
|
||||
mesa_shader_stage stage)
|
||||
{
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
return PCO_REG_CLASS_VTXIN;
|
||||
|
||||
case MESA_SHADER_COMPUTE:
|
||||
switch (sys_val) {
|
||||
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
|
||||
return PCO_REG_CLASS_VTXIN;
|
||||
|
||||
case SYSTEM_VALUE_WORKGROUP_ID:
|
||||
case SYSTEM_VALUE_NUM_WORKGROUPS:
|
||||
return PCO_REG_CLASS_COEFF;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Translates a NIR vs load system value intrinsic into PCO.
|
||||
* \brief Translates a NIR load system value intrinsic into PCO.
|
||||
*
|
||||
* \param[in,out] tctx Translation context.
|
||||
* \param[in] intr System value intrinsic.
|
||||
|
|
@ -639,7 +667,7 @@ static pco_instr *trans_atomic_buffer(trans_ctx *tctx,
|
|||
* \return The translated PCO instruction.
|
||||
*/
|
||||
static pco_instr *
|
||||
trans_load_sysval_vs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
||||
trans_load_sysval(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
||||
{
|
||||
gl_system_value sys_val = nir_system_value_from_intrinsic(intr->intrinsic);
|
||||
const pco_range *range = &tctx->shader->data.common.sys_vals[sys_val];
|
||||
|
|
@ -647,7 +675,9 @@ trans_load_sysval_vs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
unsigned chans = pco_ref_get_chans(dest);
|
||||
assert(chans == range->count);
|
||||
|
||||
pco_ref src = pco_ref_hwreg_vec(range->start, PCO_REG_CLASS_VTXIN, chans);
|
||||
pco_ref src = pco_ref_hwreg_vec(range->start,
|
||||
sys_val_to_reg_class(sys_val, tctx->stage),
|
||||
chans);
|
||||
return pco_mov(&tctx->b, dest, src, .rpt = chans);
|
||||
}
|
||||
|
||||
|
|
@ -702,12 +732,18 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
instr = trans_atomic_buffer(tctx, intr, dest, src[1], src[2]);
|
||||
break;
|
||||
|
||||
/* Vertex sysvals. */
|
||||
case nir_intrinsic_load_vertex_id:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_load_base_instance:
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_draw_id:
|
||||
instr = trans_load_sysval_vs(tctx, intr, dest);
|
||||
|
||||
/* Compute sysvals. */
|
||||
case nir_intrinsic_load_local_invocation_index:
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
instr = trans_load_sysval(tctx, intr, dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_ddx:
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@
|
|||
*/
|
||||
#define PVR_PDS_CDM_WORK_GROUP_ID_X 0
|
||||
#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
|
||||
#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
|
||||
#define PVR_PDS_CDM_WORK_GROUP_ID_Z 3
|
||||
/* Local IDs are available in every task. */
|
||||
#define PVR_PDS_CDM_LOCAL_ID_X 0
|
||||
#define PVR_PDS_CDM_LOCAL_ID_YZ 1
|
||||
|
|
@ -91,6 +91,12 @@ static const uint32_t cache_control_const[2][2] = {
|
|||
{ 0, 0 }
|
||||
};
|
||||
|
||||
static const uint32_t wg_id_temps[3] = {
|
||||
PVR_PDS_CDM_WORK_GROUP_ID_X,
|
||||
PVR_PDS_CDM_WORK_GROUP_ID_Y,
|
||||
PVR_PDS_CDM_WORK_GROUP_ID_Z,
|
||||
};
|
||||
|
||||
/*****************************************************************************
|
||||
Function definitions
|
||||
*****************************************************************************/
|
||||
|
|
@ -1768,6 +1774,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
* DOUTW for local, and two for global.
|
||||
*/
|
||||
uint32_t work_group_id_ctrl_words[2] = { 0 };
|
||||
uint32_t num_work_groups_ctrl_words[2] = { 0 };
|
||||
uint32_t local_id_ctrl_word = 0;
|
||||
uint32_t local_input_register;
|
||||
|
||||
|
|
@ -1795,6 +1802,42 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
|
||||
uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
|
||||
|
||||
const bool has_local_input_regs =
|
||||
(program->local_input_regs[0] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[2] != PVR_PDS_REG_UNUSED);
|
||||
|
||||
const bool has_local_input_reg[3] = {
|
||||
[0] = program->local_input_regs[0] != PVR_PDS_REG_UNUSED,
|
||||
[1] = program->local_input_regs[1] != PVR_PDS_REG_UNUSED,
|
||||
[2] = program->local_input_regs[2] != PVR_PDS_REG_UNUSED,
|
||||
};
|
||||
|
||||
const bool has_work_group_input_regs =
|
||||
(program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED);
|
||||
|
||||
const bool has_work_group_input_reg[3] = {
|
||||
[0] = program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED,
|
||||
[1] = program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED,
|
||||
[2] = program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED,
|
||||
};
|
||||
|
||||
const bool has_num_work_groups_regs =
|
||||
(program->num_work_groups_regs[0] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->num_work_groups_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->num_work_groups_regs[2] != PVR_PDS_REG_UNUSED);
|
||||
|
||||
const bool has_num_work_groups_reg[3] = {
|
||||
[0] = program->num_work_groups_regs[0] != PVR_PDS_REG_UNUSED,
|
||||
[1] = program->num_work_groups_regs[1] != PVR_PDS_REG_UNUSED,
|
||||
[2] = program->num_work_groups_regs[2] != PVR_PDS_REG_UNUSED,
|
||||
};
|
||||
|
||||
const bool has_barrier_coefficient = program->barrier_coefficient !=
|
||||
PVR_PDS_REG_UNUSED;
|
||||
|
||||
if (program->kick_usc) {
|
||||
/* Copy the USC task control words to constants. */
|
||||
usc_control_constant64 =
|
||||
|
|
@ -1822,13 +1865,12 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
program->cond_render_pred_temp = cond_render_pred_temp;
|
||||
}
|
||||
|
||||
if ((program->barrier_coefficient != PVR_PDS_REG_UNUSED) ||
|
||||
(program->clear_pds_barrier) ||
|
||||
if (has_barrier_coefficient || program->clear_pds_barrier ||
|
||||
(program->kick_usc && program->conditional_render)) {
|
||||
zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
|
||||
}
|
||||
|
||||
if (program->barrier_coefficient != PVR_PDS_REG_UNUSED) {
|
||||
if (has_barrier_coefficient) {
|
||||
barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
if (PVR_HAS_QUIRK(dev_info, 51210)) {
|
||||
barrier_ctrl_word2 =
|
||||
|
|
@ -1836,35 +1878,51 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
}
|
||||
}
|
||||
|
||||
if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED ||
|
||||
program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) {
|
||||
/* For DOUTW */
|
||||
if (has_work_group_input_reg[0] || has_work_group_input_reg[1]) {
|
||||
work_group_id_ctrl_words[0] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
|
||||
if (program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_work_group_input_reg[2]) {
|
||||
work_group_id_ctrl_words[1] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
|
||||
if ((program->local_input_regs[0] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
|
||||
/* For DOUTW */
|
||||
if (has_num_work_groups_reg[0] || has_num_work_groups_reg[1]) {
|
||||
num_work_groups_ctrl_words[0] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
|
||||
if (has_num_work_groups_reg[2]) {
|
||||
num_work_groups_ctrl_words[1] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
|
||||
/* For DOUTW */
|
||||
if (has_local_input_regs) {
|
||||
local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
|
||||
if (program->add_base_workgroup) {
|
||||
for (uint32_t workgroup_component = 0; workgroup_component < 3;
|
||||
workgroup_component++) {
|
||||
if (program->work_group_input_regs[workgroup_component] !=
|
||||
PVR_PDS_REG_UNUSED) {
|
||||
program
|
||||
->base_workgroup_constant_offset_in_dwords[workgroup_component] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
/* Patch constants. */
|
||||
for (uint32_t wg_comp = 0; wg_comp < 3; ++wg_comp) {
|
||||
if (has_work_group_input_reg[wg_comp]) {
|
||||
program->base_workgroup_constant_offset_in_dwords[wg_comp] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (has_num_work_groups_regs) {
|
||||
/* Ensure 64-bit alignment. */
|
||||
program->num_workgroups_constant_offset_in_dwords[0] =
|
||||
pvr_pds_get_constants(&next_constant, 2, &data_size);
|
||||
program->num_workgroups_constant_offset_in_dwords[1] =
|
||||
program->num_workgroups_constant_offset_in_dwords[0];
|
||||
program->num_workgroups_constant_offset_in_dwords[2] =
|
||||
pvr_pds_get_constants(&next_constant, 1, &data_size);
|
||||
}
|
||||
|
||||
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
|
||||
if (program->kick_usc) {
|
||||
/* Src0 for DOUTU */
|
||||
|
|
@ -1883,15 +1941,14 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
|
||||
}
|
||||
|
||||
if ((program->barrier_coefficient != PVR_PDS_REG_UNUSED) ||
|
||||
(program->clear_pds_barrier) ||
|
||||
if (has_barrier_coefficient || program->clear_pds_barrier ||
|
||||
(program->kick_usc && program->conditional_render)) {
|
||||
pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
|
||||
* Src0
|
||||
*/
|
||||
}
|
||||
|
||||
if (program->barrier_coefficient != PVR_PDS_REG_UNUSED) {
|
||||
if (has_barrier_coefficient) {
|
||||
if (PVR_HAS_QUIRK(dev_info, 51210)) {
|
||||
/* Write the constant for the coefficient register write. */
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
|
|
@ -1913,18 +1970,16 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
/* Check whether the barrier is going to be the last DOUTW done by
|
||||
* the coefficient sync task.
|
||||
*/
|
||||
if ((program->work_group_input_regs[0] == PVR_PDS_REG_UNUSED) &&
|
||||
(program->work_group_input_regs[1] == PVR_PDS_REG_UNUSED) &&
|
||||
(program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)) {
|
||||
if (!has_work_group_input_regs && !has_num_work_groups_regs)
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
}
|
||||
|
||||
pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
|
||||
}
|
||||
|
||||
/**/
|
||||
|
||||
/* If we want work-group id X, see if we also want work-group id Y. */
|
||||
if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED &&
|
||||
program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_work_group_input_reg[0] && has_work_group_input_reg[1]) {
|
||||
/* Make sure we are going to DOUTW them into adjacent registers
|
||||
* otherwise we can't do it in one.
|
||||
*/
|
||||
|
|
@ -1940,14 +1995,14 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
|
||||
/* If we don't want the Z work-group id then this is the last one.
|
||||
*/
|
||||
if (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)
|
||||
if (!has_work_group_input_reg[2] && !has_num_work_groups_regs)
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
|
||||
}
|
||||
/* If we only want one of X or Y then handle them separately. */
|
||||
else {
|
||||
if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_work_group_input_reg[0]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->work_group_input_regs[0],
|
||||
PVR_PDS_DOUTW_LOWER32,
|
||||
|
|
@ -1958,13 +2013,13 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
/* If we don't want the Z work-group id then this is the last
|
||||
* one.
|
||||
*/
|
||||
if (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)
|
||||
if (!has_work_group_input_reg[2] && !has_num_work_groups_regs)
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer,
|
||||
work_group_id_ctrl_words[0],
|
||||
doutw);
|
||||
} else if (program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) {
|
||||
} else if (has_work_group_input_reg[1]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->work_group_input_regs[1],
|
||||
PVR_PDS_DOUTW_UPPER32,
|
||||
|
|
@ -1975,7 +2030,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
/* If we don't want the Z work-group id then this is the last
|
||||
* one.
|
||||
*/
|
||||
if (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)
|
||||
if (!has_work_group_input_reg[2] && !has_num_work_groups_regs)
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer,
|
||||
|
|
@ -1985,35 +2040,111 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
}
|
||||
|
||||
/* Handle work-group id Z. */
|
||||
if (program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_work_group_input_reg[2]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->work_group_input_regs[2],
|
||||
PVR_PDS_DOUTW_UPPER32,
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
||||
true,
|
||||
dev_info);
|
||||
|
||||
if (!has_num_work_groups_regs)
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
|
||||
}
|
||||
|
||||
/**/
|
||||
|
||||
/* If we want num work-groups X, see if we also want num work-groups Y. */
|
||||
if (has_num_work_groups_reg[0] && has_num_work_groups_reg[1]) {
|
||||
/* Make sure we are going to DOUTW them into adjacent registers
|
||||
* otherwise we can't do it in one.
|
||||
*/
|
||||
assert(program->num_work_groups_regs[1] ==
|
||||
(program->num_work_groups_regs[0] + 1));
|
||||
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->num_work_groups_regs[0],
|
||||
PVR_PDS_DOUTW_LOWER64,
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
||||
true,
|
||||
dev_info);
|
||||
|
||||
/* If we don't want num work-groups Z then this is the last one.
|
||||
*/
|
||||
if (!has_num_work_groups_reg[2])
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[0], doutw);
|
||||
}
|
||||
/* If we only want one of X or Y then handle them separately. */
|
||||
else {
|
||||
if (has_num_work_groups_reg[0]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->num_work_groups_regs[0],
|
||||
PVR_PDS_DOUTW_LOWER32,
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
||||
true,
|
||||
dev_info);
|
||||
|
||||
/* If we don't want num work-groups Z then this is the last
|
||||
* one.
|
||||
*/
|
||||
if (has_num_work_groups_reg[2])
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer,
|
||||
num_work_groups_ctrl_words[0],
|
||||
doutw);
|
||||
} else if (has_num_work_groups_reg[1]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->num_work_groups_regs[1],
|
||||
PVR_PDS_DOUTW_UPPER32,
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
|
||||
true,
|
||||
dev_info);
|
||||
|
||||
/* If we don't want num work-groups Z then this is the last
|
||||
* one.
|
||||
*/
|
||||
if (!has_num_work_groups_reg[2])
|
||||
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
|
||||
|
||||
pvr_pds_write_constant32(buffer,
|
||||
num_work_groups_ctrl_words[0],
|
||||
doutw);
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle num work-groups Z. */
|
||||
if (has_num_work_groups_reg[2]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->num_work_groups_regs[2],
|
||||
PVR_PDS_DOUTW_LOWER32,
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
|
||||
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
|
||||
true,
|
||||
dev_info);
|
||||
|
||||
pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
|
||||
pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[1], doutw);
|
||||
}
|
||||
|
||||
/* Handle the local IDs. */
|
||||
if ((program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
|
||||
if (has_local_input_reg[1] || has_local_input_reg[2]) {
|
||||
uint32_t dest_reg;
|
||||
|
||||
/* If we want local id Y and Z make sure the compiler wants them in
|
||||
* the same register.
|
||||
*/
|
||||
if (!program->flattened_work_groups) {
|
||||
if ((program->local_input_regs[1] != PVR_PDS_REG_UNUSED) &&
|
||||
(program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
|
||||
if (has_local_input_reg[1] && has_local_input_reg[2]) {
|
||||
assert(program->local_input_regs[1] ==
|
||||
program->local_input_regs[2]);
|
||||
}
|
||||
}
|
||||
|
||||
if (program->local_input_regs[1] != PVR_PDS_REG_UNUSED)
|
||||
if (has_local_input_reg[1])
|
||||
dest_reg = program->local_input_regs[1];
|
||||
else
|
||||
dest_reg = program->local_input_regs[2];
|
||||
|
|
@ -2021,7 +2152,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
/* If we want local id X and (Y or Z) then we can do that in a
|
||||
* single 64-bit DOUTW.
|
||||
*/
|
||||
if (program->local_input_regs[0] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_local_input_reg[0]) {
|
||||
assert(dest_reg == (program->local_input_regs[0] + 1));
|
||||
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
|
|
@ -2052,7 +2183,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
}
|
||||
/* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
|
||||
*/
|
||||
else if (program->local_input_regs[0] != PVR_PDS_REG_UNUSED) {
|
||||
else if (has_local_input_reg[0]) {
|
||||
doutw = pvr_pds_encode_doutw_src1(
|
||||
program->local_input_regs[0],
|
||||
PVR_PDS_DOUTW_LOWER32,
|
||||
|
|
@ -2090,7 +2221,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
program->coeff_update_task_branch_size /* ADDR */));
|
||||
|
||||
/* Do we need to initialize the barrier coefficient? */
|
||||
if (program->barrier_coefficient != PVR_PDS_REG_UNUSED) {
|
||||
if (has_barrier_coefficient) {
|
||||
if (PVR_HAS_QUIRK(dev_info, 51210)) {
|
||||
/* Initialize the second barrier coefficient registers to zero.
|
||||
*/
|
||||
|
|
@ -2106,51 +2237,42 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
zero_constant64 >> 1)); /* SRC0 */
|
||||
}
|
||||
|
||||
if (program->add_base_workgroup) {
|
||||
const uint32_t temp_values[3] = { 0, 1, 3 };
|
||||
for (uint32_t workgroup_component = 0; workgroup_component < 3;
|
||||
workgroup_component++) {
|
||||
if (program->work_group_input_regs[workgroup_component] ==
|
||||
PVR_PDS_REG_UNUSED) {
|
||||
continue;
|
||||
}
|
||||
/* Add base workgroup to workgroup ids. */
|
||||
for (uint32_t wg_comp = 0; wg_comp < 3; ++wg_comp) {
|
||||
if (!has_work_group_input_reg[wg_comp])
|
||||
continue;
|
||||
|
||||
APPEND(pvr_pds_inst_encode_add32(
|
||||
/* cc */ 0x0,
|
||||
/* ALUM */ 0,
|
||||
/* SNA */ 0,
|
||||
/* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
|
||||
program->base_workgroup_constant_offset_in_dwords
|
||||
[workgroup_component],
|
||||
/* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
|
||||
PVR_PDS_CDM_WORK_GROUP_ID_X +
|
||||
temp_values[workgroup_component],
|
||||
/* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
|
||||
PVR_PDS_CDM_WORK_GROUP_ID_X +
|
||||
temp_values[workgroup_component]));
|
||||
}
|
||||
APPEND(pvr_pds_inst_encode_add32(
|
||||
/* cc */ 0x0,
|
||||
/* ALUM */ 0,
|
||||
/* SNA */ 0,
|
||||
/* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
|
||||
program->base_workgroup_constant_offset_in_dwords[wg_comp],
|
||||
/* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
|
||||
wg_id_temps[wg_comp],
|
||||
/* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
|
||||
wg_id_temps[wg_comp]));
|
||||
}
|
||||
|
||||
/* If we are going to put the work-group IDs in coefficients then we
|
||||
* just need to do the DOUTWs.
|
||||
*/
|
||||
if ((program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED)) {
|
||||
uint32_t dest_reg;
|
||||
if (has_work_group_input_reg[0] || has_work_group_input_reg[1]) {
|
||||
uint32_t src_reg;
|
||||
|
||||
if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED)
|
||||
dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
|
||||
if (has_work_group_input_reg[0])
|
||||
src_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
|
||||
else
|
||||
dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
|
||||
src_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
|
||||
|
||||
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
||||
0, /* END */
|
||||
work_group_id_ctrl_words[0], /* SRC1
|
||||
*/
|
||||
dest_reg >> 1)); /* SRC0 */
|
||||
src_reg >> 1)); /* SRC0 */
|
||||
}
|
||||
|
||||
if (program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_work_group_input_reg[2]) {
|
||||
APPEND(pvr_pds_encode_doutw64(
|
||||
0, /* cc */
|
||||
0, /* END */
|
||||
|
|
@ -2159,6 +2281,35 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
1)); /* SRC0 */
|
||||
}
|
||||
|
||||
/* If we are going to put the num work-groups in coefficients then we
|
||||
* just need to do the DOUTWs.
|
||||
*/
|
||||
if (has_num_work_groups_reg[0] || has_num_work_groups_reg[1]) {
|
||||
uint32_t src_reg;
|
||||
|
||||
if (has_num_work_groups_reg[0])
|
||||
src_reg = PVR_PDS_CONSTANTS_BLOCK_BASE +
|
||||
program->num_workgroups_constant_offset_in_dwords[0];
|
||||
else
|
||||
src_reg = PVR_PDS_CONSTANTS_BLOCK_BASE +
|
||||
program->num_workgroups_constant_offset_in_dwords[1];
|
||||
|
||||
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
||||
0, /* END */
|
||||
num_work_groups_ctrl_words[0], /* SRC1 */
|
||||
src_reg >> 1)); /* SRC0 */
|
||||
}
|
||||
|
||||
if (has_num_work_groups_reg[2]) {
|
||||
APPEND(pvr_pds_encode_doutw64(
|
||||
0, /* cc */
|
||||
0, /* END */
|
||||
num_work_groups_ctrl_words[1], /* SRC1 */
|
||||
(PVR_PDS_CONSTANTS_BLOCK_BASE +
|
||||
program->num_workgroups_constant_offset_in_dwords[2]) >>
|
||||
1)); /* SRC0 */
|
||||
}
|
||||
|
||||
/* Issue the task to the USC. */
|
||||
if (program->kick_usc && program->has_coefficient_update_task) {
|
||||
APPEND(pvr_pds_encode_doutu(0, /* cc */
|
||||
|
|
@ -2176,28 +2327,24 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
|
|||
/* DOUTW in the local IDs. */
|
||||
|
||||
/* If we want X and Y or Z, we only need one DOUTW. */
|
||||
if ((program->local_input_regs[0] != PVR_PDS_REG_UNUSED) &&
|
||||
((program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[2] != PVR_PDS_REG_UNUSED))) {
|
||||
if (has_local_input_reg[0] &&
|
||||
(has_local_input_reg[1] || has_local_input_reg[2])) {
|
||||
local_input_register =
|
||||
PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
|
||||
} else {
|
||||
/* If we just want X. */
|
||||
if (program->local_input_regs[0] != PVR_PDS_REG_UNUSED) {
|
||||
if (has_local_input_reg[0]) {
|
||||
local_input_register =
|
||||
PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
|
||||
}
|
||||
/* If we just want Y or Z. */
|
||||
else if (program->local_input_regs[1] != PVR_PDS_REG_UNUSED ||
|
||||
program->local_input_regs[2] != PVR_PDS_REG_UNUSED) {
|
||||
else if (has_local_input_reg[1] || has_local_input_reg[2]) {
|
||||
local_input_register =
|
||||
PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
|
||||
}
|
||||
}
|
||||
|
||||
if ((program->local_input_regs[0] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
|
||||
(program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
|
||||
if (has_local_input_regs) {
|
||||
APPEND(pvr_pds_encode_doutw64(0, /* cc */
|
||||
0, /* END */
|
||||
local_id_ctrl_word, /* SRC1 */
|
||||
|
|
|
|||
|
|
@ -539,6 +539,7 @@ struct pvr_pds_compute_shader_program {
|
|||
|
||||
uint32_t local_input_regs[3];
|
||||
uint32_t work_group_input_regs[3];
|
||||
uint32_t num_work_groups_regs[3];
|
||||
uint32_t global_input_regs[3];
|
||||
|
||||
uint32_t barrier_coefficient;
|
||||
|
|
@ -553,8 +554,8 @@ struct pvr_pds_compute_shader_program {
|
|||
|
||||
uint32_t coeff_update_task_branch_size;
|
||||
|
||||
bool add_base_workgroup;
|
||||
uint32_t base_workgroup_constant_offset_in_dwords[3];
|
||||
uint32_t num_workgroups_constant_offset_in_dwords[3];
|
||||
|
||||
bool kick_usc;
|
||||
|
||||
|
|
@ -585,6 +586,11 @@ static inline void pvr_pds_compute_shader_program_init(
|
|||
PVR_PDS_REG_UNUSED,
|
||||
PVR_PDS_REG_UNUSED,
|
||||
},
|
||||
.num_work_groups_regs = {
|
||||
PVR_PDS_REG_UNUSED,
|
||||
PVR_PDS_REG_UNUSED,
|
||||
PVR_PDS_REG_UNUSED,
|
||||
},
|
||||
.global_input_regs = {
|
||||
PVR_PDS_REG_UNUSED,
|
||||
PVR_PDS_REG_UNUSED,
|
||||
|
|
|
|||
|
|
@ -2609,26 +2609,37 @@ void pvr_CmdBindDescriptorSets2KHR(
|
|||
|
||||
PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
|
||||
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
||||
struct pvr_descriptor_state *desc_state =
|
||||
&cmd_buffer->state.gfx_desc_state;
|
||||
struct pvr_descriptor_state *graphics_desc_state =
|
||||
&cmd_buffer->state.gfx_desc_state;
|
||||
struct pvr_descriptor_state *compute_desc_state =
|
||||
&cmd_buffer->state.compute_desc_state;
|
||||
|
||||
for (unsigned u = 0; u < pBindDescriptorSetsInfo->descriptorSetCount;
|
||||
++u) {
|
||||
VK_FROM_HANDLE(pvr_descriptor_set,
|
||||
set,
|
||||
pBindDescriptorSetsInfo->pDescriptorSets[u]);
|
||||
unsigned desc_set = u + pBindDescriptorSetsInfo->firstSet;
|
||||
for (unsigned u = 0; u < pBindDescriptorSetsInfo->descriptorSetCount; ++u) {
|
||||
VK_FROM_HANDLE(pvr_descriptor_set,
|
||||
set,
|
||||
pBindDescriptorSetsInfo->pDescriptorSets[u]);
|
||||
unsigned desc_set = u + pBindDescriptorSetsInfo->firstSet;
|
||||
|
||||
if (desc_state->sets[desc_set] != set) {
|
||||
desc_state->sets[desc_set] = set;
|
||||
desc_state->dirty_sets |= BITFIELD_BIT(desc_set);
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
||||
if (graphics_desc_state->sets[desc_set] != set) {
|
||||
graphics_desc_state->sets[desc_set] = set;
|
||||
graphics_desc_state->dirty_sets |= BITFIELD_BIT(desc_set);
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->state.dirty.gfx_desc_dirty = true;
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
if (compute_desc_state->sets[desc_set] != set) {
|
||||
compute_desc_state->sets[desc_set] = set;
|
||||
compute_desc_state->dirty_sets |= BITFIELD_BIT(desc_set);
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(!(pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT));
|
||||
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
|
||||
cmd_buffer->state.dirty.gfx_desc_dirty = true;
|
||||
|
||||
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
|
||||
cmd_buffer->state.dirty.compute_desc_dirty = true;
|
||||
}
|
||||
|
||||
void pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
|
||||
|
|
@ -3600,8 +3611,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
|
|||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_csb *csb = &sub_cmd->control_stream;
|
||||
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
|
||||
const uint32_t const_shared_regs =
|
||||
pipeline->shader_state.const_shared_reg_count;
|
||||
const uint32_t const_shared_regs = pipeline->cs_data.common.shareds;
|
||||
struct pvr_compute_kernel_info info;
|
||||
|
||||
/* No shared regs, no need to use an allocation kernel. */
|
||||
|
|
@ -3624,7 +3634,7 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
|
|||
.usc_target = ROGUE_CDMCTRL_USC_TARGET_ALL,
|
||||
.usc_common_shared = true,
|
||||
.usc_common_size =
|
||||
DIV_ROUND_UP(const_shared_regs,
|
||||
DIV_ROUND_UP(PVR_DW_TO_BYTES(const_shared_regs),
|
||||
ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE),
|
||||
|
||||
.global_size = { 1, 1, 1 },
|
||||
|
|
@ -3748,8 +3758,6 @@ void pvr_compute_update_kernel_private(
|
|||
const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS])
|
||||
{
|
||||
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
|
||||
const struct pvr_device_runtime_info *dev_runtime_info =
|
||||
&pdevice->dev_runtime_info;
|
||||
struct pvr_csb *csb = &sub_cmd->control_stream;
|
||||
|
||||
struct pvr_compute_kernel_info info = {
|
||||
|
|
@ -3783,15 +3791,8 @@ void pvr_compute_update_kernel_private(
|
|||
uint32_t work_size = pipeline->workgroup_size.width *
|
||||
pipeline->workgroup_size.height *
|
||||
pipeline->workgroup_size.depth;
|
||||
uint32_t coeff_regs;
|
||||
|
||||
if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
|
||||
/* Enforce a single workgroup per cluster through allocation starvation.
|
||||
*/
|
||||
coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs;
|
||||
} else {
|
||||
coeff_regs = pipeline->coeff_regs_count;
|
||||
}
|
||||
uint32_t coeff_regs =
|
||||
pipeline->coeff_regs_count + pipeline->const_shared_regs_count;
|
||||
|
||||
info.usc_common_size =
|
||||
DIV_ROUND_UP(PVR_DW_TO_BYTES(coeff_regs),
|
||||
|
|
@ -3800,8 +3801,6 @@ void pvr_compute_update_kernel_private(
|
|||
/* Use a whole slot per workgroup. */
|
||||
work_size = MAX2(work_size, ROGUE_MAX_INSTANCES_PER_TASK);
|
||||
|
||||
coeff_regs += pipeline->const_shared_regs_count;
|
||||
|
||||
if (pipeline->const_shared_regs_count > 0)
|
||||
info.sd_type = ROGUE_CDMCTRL_SD_TYPE_USC;
|
||||
|
||||
|
|
@ -3818,24 +3817,53 @@ void pvr_compute_update_kernel_private(
|
|||
pvr_compute_generate_control_stream(csb, sub_cmd, &info);
|
||||
}
|
||||
|
||||
/* TODO: Wire up the base_workgroup variant program when implementing
|
||||
* VK_KHR_device_group. The values will also need patching into the program.
|
||||
*/
|
||||
static void pvr_compute_update_kernel(
|
||||
struct pvr_cmd_buffer *cmd_buffer,
|
||||
struct pvr_sub_cmd_compute *const sub_cmd,
|
||||
pvr_dev_addr_t indirect_addr,
|
||||
const uint32_t global_base_group[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS])
|
||||
{
|
||||
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
|
||||
const struct pvr_device_runtime_info *dev_runtime_info =
|
||||
&pdevice->dev_runtime_info;
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
struct pvr_csb *csb = &sub_cmd->control_stream;
|
||||
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
|
||||
const struct pvr_compute_shader_state *shader_state =
|
||||
&pipeline->shader_state;
|
||||
const struct pvr_pds_info *program_info = &pipeline->primary_program_info;
|
||||
const pco_data *const cs_data = &pipeline->cs_data;
|
||||
const struct pvr_pds_info *program_info = &pipeline->pds_cs_program_info;
|
||||
bool uses_wg_id = pipeline->base_workgroup_data_patching_offset != ~0u;
|
||||
bool uses_num_wgs = pipeline->num_workgroups_data_patching_offset != ~0u;
|
||||
bool base_group_set = !!global_base_group[0] || !!global_base_group[1] ||
|
||||
!!global_base_group[2];
|
||||
uint32_t pds_data_offset = pipeline->pds_cs_program.data_offset;
|
||||
|
||||
/* Does the PDS data segment need patching, or can the default be used? */
|
||||
if ((uses_wg_id && base_group_set) || uses_num_wgs) {
|
||||
struct pvr_pds_upload pds_data_upload;
|
||||
uint32_t *pds_data;
|
||||
|
||||
/* Upload and patch PDS data segment. */
|
||||
pvr_cmd_buffer_upload_pds_data(cmd_buffer,
|
||||
pipeline->pds_cs_data_section,
|
||||
program_info->data_size_in_dwords,
|
||||
16,
|
||||
&pds_data_upload);
|
||||
pds_data_offset = pds_data_upload.data_offset;
|
||||
pds_data = pvr_bo_suballoc_get_map_addr(pds_data_upload.pvr_bo);
|
||||
|
||||
if (uses_wg_id && base_group_set) {
|
||||
unsigned offset = pipeline->base_workgroup_data_patching_offset;
|
||||
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
|
||||
pds_data[offset + u] = global_base_group[u];
|
||||
}
|
||||
}
|
||||
|
||||
if (uses_num_wgs) {
|
||||
unsigned offset = pipeline->num_workgroups_data_patching_offset;
|
||||
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
|
||||
pds_data[offset + u] = global_workgroup_size[u];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct pvr_compute_kernel_info info = {
|
||||
.indirect_buffer_addr = indirect_addr,
|
||||
|
|
@ -3847,13 +3875,13 @@ static void pvr_compute_update_kernel(
|
|||
.pds_data_size =
|
||||
DIV_ROUND_UP(PVR_DW_TO_BYTES(program_info->data_size_in_dwords),
|
||||
ROGUE_CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE),
|
||||
.pds_data_offset = pipeline->primary_program.data_offset,
|
||||
.pds_code_offset = pipeline->primary_program.code_offset,
|
||||
.pds_data_offset = pds_data_offset,
|
||||
.pds_code_offset = pipeline->pds_cs_program.code_offset,
|
||||
|
||||
.sd_type = ROGUE_CDMCTRL_SD_TYPE_NONE,
|
||||
|
||||
.usc_unified_size =
|
||||
DIV_ROUND_UP(shader_state->input_register_count << 2U,
|
||||
DIV_ROUND_UP(cs_data->common.vtxins << 2U,
|
||||
ROGUE_CDMCTRL_KERNEL0_USC_UNIFIED_SIZE_UNIT_SIZE),
|
||||
|
||||
/* clang-format off */
|
||||
|
|
@ -3865,16 +3893,10 @@ static void pvr_compute_update_kernel(
|
|||
/* clang-format on */
|
||||
};
|
||||
|
||||
uint32_t work_size = shader_state->work_size;
|
||||
uint32_t coeff_regs;
|
||||
|
||||
if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
|
||||
/* Enforce a single workgroup per cluster through allocation starvation.
|
||||
*/
|
||||
coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs;
|
||||
} else {
|
||||
coeff_regs = shader_state->coefficient_register_count;
|
||||
}
|
||||
uint32_t work_size = cs_data->cs.workgroup_size[0] *
|
||||
cs_data->cs.workgroup_size[1] *
|
||||
cs_data->cs.workgroup_size[2];
|
||||
uint32_t coeff_regs = cs_data->common.coeffs + cs_data->common.shareds;
|
||||
|
||||
info.usc_common_size =
|
||||
DIV_ROUND_UP(PVR_DW_TO_BYTES(coeff_regs),
|
||||
|
|
@ -3883,9 +3905,7 @@ static void pvr_compute_update_kernel(
|
|||
/* Use a whole slot per workgroup. */
|
||||
work_size = MAX2(work_size, ROGUE_MAX_INSTANCES_PER_TASK);
|
||||
|
||||
coeff_regs += shader_state->const_shared_reg_count;
|
||||
|
||||
if (shader_state->const_shared_reg_count > 0)
|
||||
if (cs_data->common.shareds > 0)
|
||||
info.sd_type = ROGUE_CDMCTRL_SD_TYPE_USC;
|
||||
|
||||
work_size =
|
||||
|
|
@ -3947,19 +3967,21 @@ static VkResult pvr_cmd_upload_push_consts(struct pvr_cmd_buffer *cmd_buffer)
|
|||
static void pvr_cmd_dispatch(
|
||||
struct pvr_cmd_buffer *const cmd_buffer,
|
||||
const pvr_dev_addr_t indirect_addr,
|
||||
const uint32_t base_group[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
const uint32_t workgroup_size[static const PVR_WORKGROUP_DIMENSIONS])
|
||||
{
|
||||
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||
const struct pvr_compute_pipeline *compute_pipeline =
|
||||
state->compute_pipeline;
|
||||
const pco_data *const cs_data = &compute_pipeline->cs_data;
|
||||
struct pvr_sub_cmd_compute *sub_cmd;
|
||||
VkResult result;
|
||||
|
||||
pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_COMPUTE);
|
||||
|
||||
sub_cmd = &state->current_sub_cmd->compute;
|
||||
sub_cmd->uses_atomic_ops |= compute_pipeline->shader_state.uses_atomic_ops;
|
||||
sub_cmd->uses_barrier |= compute_pipeline->shader_state.uses_barrier;
|
||||
sub_cmd->uses_atomic_ops |= cs_data->common.uses.atomics;
|
||||
sub_cmd->uses_barrier |= cs_data->common.uses.barriers;
|
||||
|
||||
if (state->push_constants.dirty_stages & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
result = pvr_cmd_upload_push_consts(cmd_buffer);
|
||||
|
|
@ -3972,16 +3994,33 @@ static void pvr_cmd_dispatch(
|
|||
state->push_constants.dirty_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
}
|
||||
|
||||
UNREACHABLE("compute descriptor support");
|
||||
if (state->dirty.compute_desc_dirty ||
|
||||
state->dirty.compute_pipeline_binding) {
|
||||
result = pvr_setup_descriptor_mappings(
|
||||
cmd_buffer,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->descriptor_state,
|
||||
NULL,
|
||||
&state->pds_compute_descriptor_data_offset);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
}
|
||||
|
||||
pvr_compute_update_shared(cmd_buffer, sub_cmd);
|
||||
pvr_compute_update_kernel(cmd_buffer, sub_cmd, indirect_addr, workgroup_size);
|
||||
pvr_compute_update_kernel(cmd_buffer,
|
||||
sub_cmd,
|
||||
indirect_addr,
|
||||
base_group,
|
||||
workgroup_size);
|
||||
}
|
||||
|
||||
void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
||||
uint32_t groupCountX,
|
||||
uint32_t groupCountY,
|
||||
uint32_t groupCountZ)
|
||||
void pvr_CmdDispatchBase(VkCommandBuffer commandBuffer,
|
||||
uint32_t baseGroupX,
|
||||
uint32_t baseGroupY,
|
||||
uint32_t baseGroupZ,
|
||||
uint32_t groupCountX,
|
||||
uint32_t groupCountY,
|
||||
uint32_t groupCountZ)
|
||||
{
|
||||
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
|
|
@ -3992,6 +4031,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
|||
|
||||
pvr_cmd_dispatch(cmd_buffer,
|
||||
PVR_DEV_ADDR_INVALID,
|
||||
(uint32_t[]){ baseGroupX, baseGroupY, baseGroupZ },
|
||||
(uint32_t[]){ groupCountX, groupCountY, groupCountZ });
|
||||
}
|
||||
|
||||
|
|
@ -4006,6 +4046,7 @@ void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
|||
|
||||
pvr_cmd_dispatch(cmd_buffer,
|
||||
PVR_DEV_ADDR_OFFSET(buffer->dev_addr, offset),
|
||||
(uint32_t[]){ 0, 0, 0 },
|
||||
(uint32_t[]){ 1, 1, 1 });
|
||||
}
|
||||
|
||||
|
|
@ -4070,7 +4111,7 @@ pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
|
|||
state0.usc_target = ROGUE_VDMCTRL_USC_TARGET_ALL;
|
||||
|
||||
state0.usc_common_size =
|
||||
DIV_ROUND_UP(vs_data->common.shareds,
|
||||
DIV_ROUND_UP(PVR_DW_TO_BYTES(vs_data->common.shareds),
|
||||
ROGUE_VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE);
|
||||
|
||||
state0.pds_data_size = DIV_ROUND_UP(
|
||||
|
|
|
|||
|
|
@ -102,14 +102,6 @@ static const struct pvr_hard_coding_data {
|
|||
.shader_size = sizeof(pvr_simple_compute_shader),
|
||||
|
||||
.shader_info = {
|
||||
.uses_atomic_ops = false,
|
||||
.uses_barrier = false,
|
||||
.uses_num_workgroups = false,
|
||||
|
||||
.const_shared_reg_count = 4,
|
||||
.input_register_count = 8,
|
||||
.work_size = 1 * 1 * 1,
|
||||
.coefficient_register_count = 4,
|
||||
},
|
||||
|
||||
.build_info = {
|
||||
|
|
|
|||
|
|
@ -669,221 +669,164 @@ static void pvr_pds_descriptor_program_destroy(
|
|||
|
||||
static void pvr_pds_compute_program_setup(
|
||||
const struct pvr_device_info *dev_info,
|
||||
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
uint32_t barrier_coefficient,
|
||||
bool add_base_workgroup,
|
||||
uint32_t usc_temps,
|
||||
pvr_dev_addr_t usc_shader_dev_addr,
|
||||
pco_data *cs_data,
|
||||
struct pvr_compute_shader_state *compute_state,
|
||||
struct pvr_pds_compute_shader_program *const program)
|
||||
{
|
||||
pco_range *sys_vals = cs_data->common.sys_vals;
|
||||
|
||||
pvr_pds_compute_shader_program_init(program);
|
||||
program->local_input_regs[0] = local_input_regs[0];
|
||||
program->local_input_regs[1] = local_input_regs[1];
|
||||
program->local_input_regs[2] = local_input_regs[2];
|
||||
program->work_group_input_regs[0] = work_group_input_regs[0];
|
||||
program->work_group_input_regs[1] = work_group_input_regs[1];
|
||||
program->work_group_input_regs[2] = work_group_input_regs[2];
|
||||
program->barrier_coefficient = barrier_coefficient;
|
||||
program->add_base_workgroup = add_base_workgroup;
|
||||
|
||||
if (sys_vals[SYSTEM_VALUE_LOCAL_INVOCATION_INDEX].count > 0) {
|
||||
program->local_input_regs[0] =
|
||||
sys_vals[SYSTEM_VALUE_LOCAL_INVOCATION_INDEX].start;
|
||||
}
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(program->work_group_input_regs); ++u) {
|
||||
if (sys_vals[SYSTEM_VALUE_WORKGROUP_ID].count > u) {
|
||||
program->work_group_input_regs[u] =
|
||||
sys_vals[SYSTEM_VALUE_WORKGROUP_ID].start + u;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(program->num_work_groups_regs); ++u) {
|
||||
if (sys_vals[SYSTEM_VALUE_NUM_WORKGROUPS].count > u) {
|
||||
program->num_work_groups_regs[u] =
|
||||
sys_vals[SYSTEM_VALUE_NUM_WORKGROUPS].start + u;
|
||||
}
|
||||
}
|
||||
|
||||
program->flattened_work_groups = true;
|
||||
program->kick_usc = true;
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(program->local_input_regs) ==
|
||||
PVR_WORKGROUP_DIMENSIONS);
|
||||
STATIC_ASSERT(ARRAY_SIZE(program->work_group_input_regs) ==
|
||||
PVR_WORKGROUP_DIMENSIONS);
|
||||
STATIC_ASSERT(ARRAY_SIZE(program->global_input_regs) ==
|
||||
PVR_WORKGROUP_DIMENSIONS);
|
||||
|
||||
pvr_pds_setup_doutu(&program->usc_task_control,
|
||||
usc_shader_dev_addr.addr,
|
||||
usc_temps,
|
||||
compute_state->bo->dev_addr.addr,
|
||||
cs_data->common.temps,
|
||||
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
||||
false);
|
||||
|
||||
pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
|
||||
}
|
||||
|
||||
/* FIXME: See if pvr_device_init_compute_pds_program() and this could be merged.
|
||||
/* This uploads the code segment and base data segment variant.
|
||||
* This can be patched at dispatch time.
|
||||
*/
|
||||
static VkResult pvr_pds_compute_program_create_and_upload(
|
||||
struct pvr_device *const device,
|
||||
const VkAllocationCallbacks *const allocator,
|
||||
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
uint32_t barrier_coefficient,
|
||||
uint32_t usc_temps,
|
||||
pvr_dev_addr_t usc_shader_dev_addr,
|
||||
struct pvr_pds_upload *const pds_upload_out,
|
||||
struct pvr_pds_info *const pds_info_out)
|
||||
struct pvr_compute_shader_state *compute_state,
|
||||
struct pvr_compute_pipeline *compute_pipeline)
|
||||
{
|
||||
pco_range *sys_vals = compute_pipeline->cs_data.common.sys_vals;
|
||||
struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
struct pvr_pds_compute_shader_program program;
|
||||
uint32_t staging_buffer_size;
|
||||
uint32_t *staging_buffer;
|
||||
uint32_t *code_buffer;
|
||||
uint32_t *data_buffer;
|
||||
VkResult result;
|
||||
|
||||
bool uses_wg_id = sys_vals[SYSTEM_VALUE_WORKGROUP_ID].count > 0;
|
||||
bool uses_num_wgs = sys_vals[SYSTEM_VALUE_NUM_WORKGROUPS].count > 0;
|
||||
|
||||
pvr_pds_compute_program_setup(dev_info,
|
||||
local_input_regs,
|
||||
work_group_input_regs,
|
||||
barrier_coefficient,
|
||||
false,
|
||||
usc_temps,
|
||||
usc_shader_dev_addr,
|
||||
&compute_pipeline->cs_data,
|
||||
compute_state,
|
||||
&program);
|
||||
|
||||
/* FIXME: According to pvr_device_init_compute_pds_program() the code size
|
||||
* is in bytes. Investigate this.
|
||||
*/
|
||||
staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
|
||||
|
||||
staging_buffer = vk_alloc2(&device->vk.alloc,
|
||||
allocator,
|
||||
staging_buffer_size,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!staging_buffer)
|
||||
code_buffer = vk_alloc2(&device->vk.alloc,
|
||||
allocator,
|
||||
PVR_DW_TO_BYTES(program.code_size),
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!code_buffer)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* FIXME: pvr_pds_compute_shader doesn't implement
|
||||
* PDS_GENERATE_CODEDATA_SEGMENTS.
|
||||
*/
|
||||
data_buffer = vk_alloc2(&device->vk.alloc,
|
||||
allocator,
|
||||
PVR_DW_TO_BYTES(program.code_size),
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!data_buffer) {
|
||||
vk_free2(&device->vk.alloc, allocator, code_buffer);
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
pvr_pds_compute_shader(&program,
|
||||
&staging_buffer[0],
|
||||
&code_buffer[0],
|
||||
PDS_GENERATE_CODE_SEGMENT,
|
||||
dev_info);
|
||||
|
||||
pvr_pds_compute_shader(&program,
|
||||
&staging_buffer[program.code_size],
|
||||
&data_buffer[0],
|
||||
PDS_GENERATE_DATA_SEGMENT,
|
||||
dev_info);
|
||||
|
||||
/* Initialize. */
|
||||
if (uses_wg_id) {
|
||||
unsigned offset = program.base_workgroup_constant_offset_in_dwords[0];
|
||||
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
|
||||
data_buffer[offset + u] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (uses_num_wgs) {
|
||||
unsigned offset = program.num_workgroups_constant_offset_in_dwords[0];
|
||||
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
|
||||
data_buffer[offset + u] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* FIXME: Figure out the define for alignment of 16. */
|
||||
result = pvr_gpu_upload_pds(device,
|
||||
&staging_buffer[program.code_size],
|
||||
data_buffer,
|
||||
program.data_size,
|
||||
16,
|
||||
&staging_buffer[0],
|
||||
code_buffer,
|
||||
program.code_size,
|
||||
16,
|
||||
16,
|
||||
pds_upload_out);
|
||||
&compute_pipeline->pds_cs_program);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->vk.alloc, allocator, staging_buffer);
|
||||
vk_free2(&device->vk.alloc, allocator, code_buffer);
|
||||
vk_free2(&device->vk.alloc, allocator, data_buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
*pds_info_out = (struct pvr_pds_info){
|
||||
compute_pipeline->pds_cs_data_section = data_buffer;
|
||||
|
||||
/* The base workgroup and num workgroups can be patched in the
|
||||
* PDS data section before dispatch so we save their offsets.
|
||||
*/
|
||||
compute_pipeline->base_workgroup_data_patching_offset = ~0u;
|
||||
if (uses_wg_id) {
|
||||
compute_pipeline->base_workgroup_data_patching_offset =
|
||||
program.base_workgroup_constant_offset_in_dwords[0];
|
||||
}
|
||||
|
||||
compute_pipeline->num_workgroups_data_patching_offset = ~0u;
|
||||
if (uses_num_wgs) {
|
||||
compute_pipeline->num_workgroups_data_patching_offset =
|
||||
program.num_workgroups_constant_offset_in_dwords[0];
|
||||
}
|
||||
|
||||
compute_pipeline->pds_cs_program_info = (struct pvr_pds_info){
|
||||
.temps_required = program.highest_temp,
|
||||
.code_size_in_dwords = program.code_size,
|
||||
.data_size_in_dwords = program.data_size,
|
||||
};
|
||||
|
||||
vk_free2(&device->vk.alloc, allocator, staging_buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
};
|
||||
|
||||
static void pvr_pds_compute_program_destroy(
|
||||
struct pvr_device *const device,
|
||||
const struct VkAllocationCallbacks *const allocator,
|
||||
struct pvr_pds_upload *const pds_program,
|
||||
struct pvr_pds_info *const pds_info)
|
||||
{
|
||||
/* We don't allocate an entries buffer so we don't need to free it */
|
||||
pvr_bo_suballoc_free(pds_program->pvr_bo);
|
||||
}
|
||||
|
||||
/* This only uploads the code segment. The data segment will need to be patched
|
||||
* with the base workgroup before uploading.
|
||||
*/
|
||||
static VkResult pvr_pds_compute_base_workgroup_variant_program_init(
|
||||
struct pvr_device *const device,
|
||||
const VkAllocationCallbacks *const allocator,
|
||||
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||
uint32_t barrier_coefficient,
|
||||
uint32_t usc_temps,
|
||||
pvr_dev_addr_t usc_shader_dev_addr,
|
||||
struct pvr_pds_base_workgroup_program *program_out)
|
||||
{
|
||||
struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
struct pvr_pds_compute_shader_program program;
|
||||
uint32_t buffer_size;
|
||||
uint32_t *buffer;
|
||||
VkResult result;
|
||||
|
||||
pvr_pds_compute_program_setup(dev_info,
|
||||
local_input_regs,
|
||||
work_group_input_regs,
|
||||
barrier_coefficient,
|
||||
true,
|
||||
usc_temps,
|
||||
usc_shader_dev_addr,
|
||||
&program);
|
||||
|
||||
/* FIXME: According to pvr_device_init_compute_pds_program() the code size
|
||||
* is in bytes. Investigate this.
|
||||
*/
|
||||
buffer_size = PVR_DW_TO_BYTES(MAX2(program.code_size, program.data_size));
|
||||
|
||||
buffer = vk_alloc2(&device->vk.alloc,
|
||||
allocator,
|
||||
buffer_size,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!buffer)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pvr_pds_compute_shader(&program,
|
||||
&buffer[0],
|
||||
PDS_GENERATE_CODE_SEGMENT,
|
||||
dev_info);
|
||||
|
||||
/* FIXME: Figure out the define for alignment of 16. */
|
||||
result = pvr_gpu_upload_pds(device,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
buffer,
|
||||
program.code_size,
|
||||
16,
|
||||
16,
|
||||
&program_out->code_upload);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->vk.alloc, allocator, buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
pvr_pds_compute_shader(&program, buffer, PDS_GENERATE_DATA_SEGMENT, dev_info);
|
||||
|
||||
program_out->data_section = buffer;
|
||||
|
||||
/* We'll need to patch the base workgroup in the PDS data section before
|
||||
* dispatch so we save the offsets at which to patch. We only need to save
|
||||
* the offset for the first workgroup id since the workgroup ids are stored
|
||||
* contiguously in the data segment.
|
||||
*/
|
||||
program_out->base_workgroup_data_patching_offset =
|
||||
program.base_workgroup_constant_offset_in_dwords[0];
|
||||
|
||||
program_out->info = (struct pvr_pds_info){
|
||||
.temps_required = program.highest_temp,
|
||||
.code_size_in_dwords = program.code_size,
|
||||
.data_size_in_dwords = program.data_size,
|
||||
};
|
||||
vk_free2(&device->vk.alloc, allocator, code_buffer);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void pvr_pds_compute_base_workgroup_variant_program_finish(
|
||||
struct pvr_device *device,
|
||||
const VkAllocationCallbacks *const allocator,
|
||||
struct pvr_pds_base_workgroup_program *const state)
|
||||
static void
|
||||
pvr_pds_compute_program_destroy(struct pvr_device *device,
|
||||
const VkAllocationCallbacks *const allocator,
|
||||
struct pvr_pds_upload *const pds_cs_program,
|
||||
uint32_t *pds_cs_data_section)
|
||||
{
|
||||
pvr_bo_suballoc_free(state->code_upload.pvr_bo);
|
||||
vk_free2(&device->vk.alloc, allocator, state->data_section);
|
||||
pvr_bo_suballoc_free(pds_cs_program->pvr_bo);
|
||||
vk_free2(&device->vk.alloc, allocator, pds_cs_data_section);
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
|
|
@ -917,10 +860,28 @@ static void pvr_pipeline_finish(struct pvr_device *device,
|
|||
#define PVR_DEV_ADDR_SIZE_IN_SH_REGS \
|
||||
DIV_ROUND_UP(sizeof(pvr_dev_addr_t), sizeof(uint32_t))
|
||||
|
||||
static void pvr_preprocess_shader_data(pco_data *data,
|
||||
nir_shader *nir,
|
||||
const void *pCreateInfo,
|
||||
struct vk_pipeline_layout *layout);
|
||||
|
||||
static void pvr_postprocess_shader_data(pco_data *data,
|
||||
nir_shader *nir,
|
||||
const void *pCreateInfo,
|
||||
struct vk_pipeline_layout *layout);
|
||||
|
||||
/******************************************************************************
|
||||
Compute pipeline functions
|
||||
******************************************************************************/
|
||||
|
||||
static void
|
||||
pvr_compute_state_save(struct pvr_compute_pipeline *compute_pipeline,
|
||||
pco_shader *cs)
|
||||
{
|
||||
const pco_data *shader_data = pco_shader_data(cs);
|
||||
memcpy(&compute_pipeline->cs_data, shader_data, sizeof(*shader_data));
|
||||
}
|
||||
|
||||
/* Compiles and uploads shaders and PDS programs. */
|
||||
static VkResult pvr_compute_pipeline_compile(
|
||||
struct pvr_device *const device,
|
||||
|
|
@ -930,71 +891,76 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
struct pvr_compute_pipeline *const compute_pipeline)
|
||||
{
|
||||
struct vk_pipeline_layout *layout = compute_pipeline->base.layout;
|
||||
uint32_t work_group_input_regs[PVR_WORKGROUP_DIMENSIONS];
|
||||
uint32_t local_input_regs[PVR_WORKGROUP_DIMENSIONS];
|
||||
uint32_t barrier_coefficient;
|
||||
uint32_t usc_temps;
|
||||
const uint32_t cache_line_size =
|
||||
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
||||
pco_ctx *pco_ctx = device->pdevice->pco_ctx;
|
||||
void *shader_mem_ctx = ralloc_context(NULL);
|
||||
pco_data shader_data = { 0 };
|
||||
nir_shader *nir;
|
||||
pco_shader *cs;
|
||||
|
||||
struct pvr_compute_shader_state *compute_state =
|
||||
&compute_pipeline->shader_state;
|
||||
|
||||
VkResult result;
|
||||
|
||||
compute_pipeline->shader_state.const_shared_reg_count = 0;
|
||||
result =
|
||||
vk_pipeline_shader_stage_to_nir(&device->vk,
|
||||
compute_pipeline->base.pipeline_flags,
|
||||
&pCreateInfo->stage,
|
||||
pco_spirv_options(),
|
||||
pco_nir_options(),
|
||||
shader_mem_ctx,
|
||||
&nir);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_build_context;
|
||||
|
||||
/* FIXME: Compile and upload the shader. */
|
||||
/* FIXME: Initialize the shader state and setup build info. */
|
||||
UNREACHABLE("finishme: compute support");
|
||||
pco_preprocess_nir(pco_ctx, nir);
|
||||
pvr_preprocess_shader_data(&shader_data, nir, pCreateInfo, layout);
|
||||
pco_lower_nir(pco_ctx, nir, &shader_data);
|
||||
pco_postprocess_nir(pco_ctx, nir, &shader_data);
|
||||
pvr_postprocess_shader_data(&shader_data, nir, pCreateInfo, layout);
|
||||
|
||||
cs = pco_trans_nir(pco_ctx, nir, &shader_data, shader_mem_ctx);
|
||||
if (!cs) {
|
||||
result = VK_ERROR_INITIALIZATION_FAILED;
|
||||
goto err_free_build_context;
|
||||
}
|
||||
|
||||
pco_process_ir(pco_ctx, cs);
|
||||
pco_encode_ir(pco_ctx, cs);
|
||||
|
||||
pvr_compute_state_save(compute_pipeline, cs);
|
||||
|
||||
result = pvr_gpu_upload_usc(device,
|
||||
pco_shader_binary_data(cs),
|
||||
pco_shader_binary_size(cs),
|
||||
cache_line_size,
|
||||
&compute_pipeline->shader_state.bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_build_context;
|
||||
|
||||
result = pvr_pds_descriptor_program_create_and_upload(
|
||||
device,
|
||||
allocator,
|
||||
layout,
|
||||
MESA_SHADER_COMPUTE,
|
||||
NULL,
|
||||
&compute_pipeline->cs_data,
|
||||
&compute_pipeline->descriptor_state);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_shader;
|
||||
|
||||
result = pvr_pds_compute_program_create_and_upload(
|
||||
device,
|
||||
allocator,
|
||||
local_input_regs,
|
||||
work_group_input_regs,
|
||||
barrier_coefficient,
|
||||
usc_temps,
|
||||
compute_pipeline->shader_state.bo->dev_addr,
|
||||
&compute_pipeline->primary_program,
|
||||
&compute_pipeline->primary_program_info);
|
||||
result = pvr_pds_compute_program_create_and_upload(device,
|
||||
allocator,
|
||||
compute_state,
|
||||
compute_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_descriptor_program;
|
||||
|
||||
/* If the workgroup ID is required, then we require the base workgroup
|
||||
* variant of the PDS compute program as well.
|
||||
*/
|
||||
compute_pipeline->flags.base_workgroup =
|
||||
work_group_input_regs[0] != PVR_PDS_REG_UNUSED ||
|
||||
work_group_input_regs[1] != PVR_PDS_REG_UNUSED ||
|
||||
work_group_input_regs[2] != PVR_PDS_REG_UNUSED;
|
||||
|
||||
if (compute_pipeline->flags.base_workgroup) {
|
||||
result = pvr_pds_compute_base_workgroup_variant_program_init(
|
||||
device,
|
||||
allocator,
|
||||
local_input_regs,
|
||||
work_group_input_regs,
|
||||
barrier_coefficient,
|
||||
usc_temps,
|
||||
compute_pipeline->shader_state.bo->dev_addr,
|
||||
&compute_pipeline->primary_base_workgroup_variant_program);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_destroy_compute_program;
|
||||
}
|
||||
ralloc_free(shader_mem_ctx);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_destroy_compute_program:
|
||||
pvr_pds_compute_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->primary_program,
|
||||
&compute_pipeline->primary_program_info);
|
||||
|
||||
err_free_descriptor_program:
|
||||
pvr_pds_descriptor_program_destroy(device,
|
||||
allocator,
|
||||
|
|
@ -1003,6 +969,8 @@ err_free_descriptor_program:
|
|||
err_free_shader:
|
||||
pvr_bo_suballoc_free(compute_pipeline->shader_state.bo);
|
||||
|
||||
err_free_build_context:
|
||||
ralloc_free(shader_mem_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -1067,27 +1035,24 @@ pvr_compute_pipeline_create(struct pvr_device *device,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void pvr_pipeline_destroy_shader_data(pco_data *data);
|
||||
|
||||
static void pvr_compute_pipeline_destroy(
|
||||
struct pvr_device *const device,
|
||||
const VkAllocationCallbacks *const allocator,
|
||||
struct pvr_compute_pipeline *const compute_pipeline)
|
||||
{
|
||||
if (compute_pipeline->flags.base_workgroup) {
|
||||
pvr_pds_compute_base_workgroup_variant_program_finish(
|
||||
device,
|
||||
allocator,
|
||||
&compute_pipeline->primary_base_workgroup_variant_program);
|
||||
}
|
||||
|
||||
pvr_pds_compute_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->primary_program,
|
||||
&compute_pipeline->primary_program_info);
|
||||
&compute_pipeline->pds_cs_program,
|
||||
compute_pipeline->pds_cs_data_section);
|
||||
pvr_pds_descriptor_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->descriptor_state);
|
||||
pvr_bo_suballoc_free(compute_pipeline->shader_state.bo);
|
||||
|
||||
pvr_pipeline_destroy_shader_data(&compute_pipeline->cs_data);
|
||||
|
||||
pvr_pipeline_finish(device, &compute_pipeline->base);
|
||||
|
||||
vk_free2(&device->vk.alloc, allocator, compute_pipeline);
|
||||
|
|
@ -1915,6 +1880,58 @@ static void pvr_setup_fs_input_attachments(
|
|||
pvr_finishme("pvr_setup_fs_input_attachments");
|
||||
}
|
||||
|
||||
static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir)
|
||||
{
|
||||
BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
|
||||
BITSET_COPY(system_values_read, nir->info.system_values_read);
|
||||
|
||||
gl_system_value vtxin_sys_vals[] = {
|
||||
SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
};
|
||||
|
||||
gl_system_value coeff_sys_vals[] = {
|
||||
SYSTEM_VALUE_WORKGROUP_ID,
|
||||
SYSTEM_VALUE_NUM_WORKGROUPS,
|
||||
};
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(vtxin_sys_vals); ++u) {
|
||||
if (BITSET_TEST(system_values_read, vtxin_sys_vals[u])) {
|
||||
nir_intrinsic_op op =
|
||||
nir_intrinsic_from_system_value(vtxin_sys_vals[u]);
|
||||
unsigned dwords = nir_intrinsic_infos[op].dest_components;
|
||||
assert(dwords > 0);
|
||||
|
||||
allocate_val(data->common.sys_vals,
|
||||
&data->common.vtxins,
|
||||
vtxin_sys_vals[u],
|
||||
dwords);
|
||||
|
||||
BITSET_CLEAR(system_values_read, vtxin_sys_vals[u]);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(coeff_sys_vals); ++u) {
|
||||
if (BITSET_TEST(system_values_read, coeff_sys_vals[u])) {
|
||||
nir_intrinsic_op op =
|
||||
nir_intrinsic_from_system_value(coeff_sys_vals[u]);
|
||||
unsigned dwords = nir_intrinsic_infos[op].dest_components;
|
||||
assert(dwords > 0);
|
||||
|
||||
if (dwords > 1 && data->common.coeffs & 1)
|
||||
++data->common.coeffs;
|
||||
|
||||
allocate_val(data->common.sys_vals,
|
||||
&data->common.coeffs,
|
||||
coeff_sys_vals[u],
|
||||
dwords);
|
||||
|
||||
BITSET_CLEAR(system_values_read, coeff_sys_vals[u]);
|
||||
}
|
||||
}
|
||||
|
||||
assert(BITSET_IS_EMPTY(system_values_read));
|
||||
}
|
||||
|
||||
static void pvr_init_descriptors(pco_data *data,
|
||||
nir_shader *nir,
|
||||
struct vk_pipeline_layout *layout)
|
||||
|
|
@ -1985,27 +2002,28 @@ static void pvr_setup_descriptors(pco_data *data,
|
|||
assert(data->common.shareds < 256);
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_preprocess_shader_data(pco_data *data,
|
||||
nir_shader *nir,
|
||||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
struct vk_pipeline_layout *layout)
|
||||
static void pvr_preprocess_shader_data(pco_data *data,
|
||||
nir_shader *nir,
|
||||
const void *pCreateInfo,
|
||||
struct vk_pipeline_layout *layout)
|
||||
{
|
||||
const VkGraphicsPipelineCreateInfo *pGraphicsCreateInfo = pCreateInfo;
|
||||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_VERTEX: {
|
||||
const VkPipelineVertexInputStateCreateInfo *const vertex_input_state =
|
||||
pCreateInfo->pVertexInputState;
|
||||
pGraphicsCreateInfo->pVertexInputState;
|
||||
|
||||
pvr_init_vs_attribs(data, vertex_input_state);
|
||||
break;
|
||||
}
|
||||
|
||||
case MESA_SHADER_FRAGMENT: {
|
||||
PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
|
||||
PVR_FROM_HANDLE(pvr_render_pass, pass, pGraphicsCreateInfo->renderPass);
|
||||
const struct pvr_render_subpass *const subpass =
|
||||
&pass->subpasses[pCreateInfo->subpass];
|
||||
&pass->subpasses[pGraphicsCreateInfo->subpass];
|
||||
const struct pvr_renderpass_hw_map *subpass_map =
|
||||
&pass->hw_setup->subpass_map[pCreateInfo->subpass];
|
||||
&pass->hw_setup->subpass_map[pGraphicsCreateInfo->subpass];
|
||||
const struct pvr_renderpass_hwsetup_subpass *hw_subpass =
|
||||
&pass->hw_setup->renders[subpass_map->render]
|
||||
.subpasses[subpass_map->subpass];
|
||||
|
|
@ -2017,6 +2035,10 @@ pvr_preprocess_shader_data(pco_data *data,
|
|||
break;
|
||||
}
|
||||
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
|
@ -2026,12 +2048,13 @@ pvr_preprocess_shader_data(pco_data *data,
|
|||
/* TODO: common things, like large constants being put into shareds. */
|
||||
}
|
||||
|
||||
static void
|
||||
pvr_postprocess_shader_data(pco_data *data,
|
||||
nir_shader *nir,
|
||||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
struct vk_pipeline_layout *layout)
|
||||
static void pvr_postprocess_shader_data(pco_data *data,
|
||||
nir_shader *nir,
|
||||
const void *pCreateInfo,
|
||||
struct vk_pipeline_layout *layout)
|
||||
{
|
||||
const VkGraphicsPipelineCreateInfo *pGraphicsCreateInfo = pCreateInfo;
|
||||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_VERTEX: {
|
||||
pvr_alloc_vs_sysvals(data, nir);
|
||||
|
|
@ -2041,11 +2064,11 @@ pvr_postprocess_shader_data(pco_data *data,
|
|||
}
|
||||
|
||||
case MESA_SHADER_FRAGMENT: {
|
||||
PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
|
||||
PVR_FROM_HANDLE(pvr_render_pass, pass, pGraphicsCreateInfo->renderPass);
|
||||
const struct pvr_render_subpass *const subpass =
|
||||
&pass->subpasses[pCreateInfo->subpass];
|
||||
&pass->subpasses[pGraphicsCreateInfo->subpass];
|
||||
const struct pvr_renderpass_hw_map *subpass_map =
|
||||
&pass->hw_setup->subpass_map[pCreateInfo->subpass];
|
||||
&pass->hw_setup->subpass_map[pGraphicsCreateInfo->subpass];
|
||||
const struct pvr_renderpass_hwsetup_subpass *hw_subpass =
|
||||
&pass->hw_setup->renders[subpass_map->render]
|
||||
.subpasses[subpass_map->subpass];
|
||||
|
|
@ -2059,6 +2082,11 @@ pvr_postprocess_shader_data(pco_data *data,
|
|||
break;
|
||||
}
|
||||
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
pvr_alloc_cs_sysvals(data, nir);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -862,16 +862,6 @@ struct pvr_pipeline_stage_state {
|
|||
struct pvr_compute_shader_state {
|
||||
/* Pointer to a buffer object that contains the shader binary. */
|
||||
struct pvr_suballoc_bo *bo;
|
||||
|
||||
bool uses_atomic_ops;
|
||||
bool uses_barrier;
|
||||
/* E.g. GLSL shader uses gl_NumWorkGroups. */
|
||||
bool uses_num_workgroups;
|
||||
|
||||
uint32_t const_shared_reg_count;
|
||||
uint32_t input_register_count;
|
||||
uint32_t work_size;
|
||||
uint32_t coefficient_register_count;
|
||||
};
|
||||
|
||||
struct pvr_vertex_shader_state {
|
||||
|
|
@ -910,28 +900,17 @@ struct pvr_pipeline {
|
|||
struct pvr_compute_pipeline {
|
||||
struct pvr_pipeline base;
|
||||
|
||||
pco_data cs_data;
|
||||
|
||||
struct pvr_compute_shader_state shader_state;
|
||||
|
||||
struct {
|
||||
uint32_t base_workgroup : 1;
|
||||
} flags;
|
||||
|
||||
struct pvr_stage_allocation_descriptor_state descriptor_state;
|
||||
|
||||
struct pvr_pds_upload primary_program;
|
||||
struct pvr_pds_info primary_program_info;
|
||||
struct pvr_pds_upload pds_cs_program;
|
||||
struct pvr_pds_info pds_cs_program_info;
|
||||
|
||||
struct pvr_pds_base_workgroup_program {
|
||||
struct pvr_pds_upload code_upload;
|
||||
|
||||
uint32_t *data_section;
|
||||
/* Offset within the PDS data section at which the base workgroup id
|
||||
* resides.
|
||||
*/
|
||||
uint32_t base_workgroup_data_patching_offset;
|
||||
|
||||
struct pvr_pds_info info;
|
||||
} primary_base_workgroup_variant_program;
|
||||
uint32_t *pds_cs_data_section;
|
||||
uint32_t base_workgroup_data_patching_offset;
|
||||
uint32_t num_workgroups_data_patching_offset;
|
||||
};
|
||||
|
||||
struct pvr_graphics_pipeline {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue