pvr, pco: implement prerequisites for sampleRateShading

- Implement load_interpolated_input and friends.
- Optimize load_barycentric_* cases that can be simplified.
- Initial support for non-standard sample locations.

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37540>
This commit is contained in:
Simon Perretta 2025-08-27 15:06:46 +01:00
parent 83aecc8f3f
commit b111b8a844
11 changed files with 506 additions and 8 deletions

View file

@ -2790,3 +2790,10 @@ intrinsic("dma_ld_shregs_pco", src_comp=[2, 1, 1], bit_sizes=[32])
# dma_st_shregs_pco(address, burst_len, shreg_offset, emit_idf)
intrinsic("dma_st_shregs_pco", src_comp=[2, 1, 1], indices=[FLAGS], bit_sizes=[32])
intrinsic("load_tile_coord_pco", dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[COMPONENT], bit_sizes=[32])
intrinsic("load_fs_coeffs_pco", dest_comp=3, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[COMPONENT, IO_SEMANTICS], bit_sizes=[32])
# load_packed_sample_location_pco(dword_index)
intrinsic("load_packed_sample_location_pco", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])

View file

@ -1446,6 +1446,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
case nir_intrinsic_load_input_vertex:
case nir_intrinsic_load_coefficients_agx:
case nir_intrinsic_load_attribute_pan:
case nir_intrinsic_load_fs_coeffs_pco:
mode = nir_var_shader_in;
break;

View file

@ -104,6 +104,8 @@ typedef struct _pco_fs_data {
pco_range tile_buffers;
pco_range sample_locations;
struct {
bool w; /** Whether the shader uses pos.w. */
bool z; /** Whether the shader uses pos.z */
@ -114,6 +116,7 @@ typedef struct _pco_fs_data {
bool discard;
bool early_frag;
bool sample_shading;
bool sample_locations;
bool alpha_to_coverage;
bool olchk_skip;
} uses;

View file

@ -1764,6 +1764,7 @@ bool pco_nir_lower_clip_cull_vars(nir_shader *shader);
bool pco_nir_lower_demote_samples(nir_shader *shader);
bool pco_nir_lower_fs_intrinsics(nir_shader *shader);
bool pco_nir_lower_images(nir_shader *shader, pco_data *data);
bool pco_nir_lower_interpolation(nir_shader *shader, pco_fs_data *fs);
bool pco_nir_lower_io(nir_shader *shader);
bool pco_nir_lower_tex(nir_shader *shader, pco_data *data);
bool pco_nir_lower_variables(nir_shader *shader, bool inputs, bool outputs);

View file

@ -64,6 +64,8 @@ static const nir_shader_compiler_options nir_options = {
.lower_pack_64_2x32_split = true,
.lower_unpack_64_2x32_split = true,
.lower_interpolate_at = true,
.max_unroll_iterations = 16,
.io_options = nir_io_vectorizer_ignores_types,
@ -199,7 +201,8 @@ static bool gather_fs_data_pass(struct nir_builder *b,
switch (intr->intrinsic) {
/* Check whether the shader accesses z/w. */
case nir_intrinsic_load_input: {
case nir_intrinsic_load_input:
case nir_intrinsic_load_interpolated_input: {
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
if (io_semantics.location != VARYING_SLOT_POS)
return false;
@ -213,6 +216,19 @@ static bool gather_fs_data_pass(struct nir_builder *b,
break;
}
case nir_intrinsic_load_fs_coeffs_pco: {
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
b->shader->info.inputs_read |= BITFIELD64_BIT(io_semantics.location);
if (io_semantics.location != VARYING_SLOT_POS)
return false;
unsigned component = nir_intrinsic_component(intr);
data->fs.uses.z |= (component == 2);
data->fs.uses.w |= (component == 3);
break;
}
case nir_intrinsic_load_blend_const_color_rgba:
data->fs.blend_consts_needed |= PIPE_MASK_RGBA;
break;
@ -860,6 +876,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
.discard_ok = true,
};
NIR_PASS(_, nir, nir_opt_peephole_select, &peep_opts);
NIR_PASS(_, nir, pco_nir_lower_interpolation, &data->fs);
NIR_PASS(_, nir, pco_nir_pfo, &data->fs);
NIR_PASS(_, nir, pco_nir_lower_fs_intrinsics);
} else if (nir->info.stage == MESA_SHADER_VERTEX) {

View file

@ -1257,3 +1257,280 @@ bool pco_nir_link_clip_cull_vars(nir_shader *producer, nir_shader *consumer)
return true;
}
static bool lower_bary_at_sample(nir_builder *b, nir_intrinsic_instr *intr)
{
/* Check for and handle simple replacement cases:
* - Flat interpolation - don't care about sample num, will get consumed.
* - Sample num is current sample.
*/
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(intr);
nir_intrinsic_instr *sample = nir_src_as_intrinsic(intr->src[0]);
if (interp_mode == INTERP_MODE_FLAT ||
(sample && sample->intrinsic == nir_intrinsic_load_sample_id)) {
nir_def *repl = nir_load_barycentric_sample(
b,
intr->def.bit_size,
.interp_mode = nir_intrinsic_interp_mode(intr));
nir_def_replace(&intr->def, repl);
nir_instr_free(&intr->instr);
return true;
}
/* Turn the sample id into a position. */
nir_def *offset =
nir_load_sample_pos_from_id(b, intr->def.bit_size, intr->src[0].ssa);
offset = nir_fadd_imm(b, offset, -0.5f);
nir_def *repl = nir_load_barycentric_at_offset(
b,
intr->def.bit_size,
offset,
.interp_mode = nir_intrinsic_interp_mode(intr));
nir_def_replace(&intr->def, repl);
nir_instr_free(&intr->instr);
return true;
}
static bool src_is_vec2_sample_pos_minus_half(nir_src src)
{
nir_alu_instr *alu = nir_src_as_alu_instr(src);
if (!alu || alu->op != nir_op_vec2)
return false;
/* Check both vec2 components. */
for (unsigned u = 0; u < 2; ++u) {
nir_scalar comp = nir_get_scalar(&alu->def, u);
comp = nir_scalar_chase_movs(comp);
if (!nir_scalar_is_alu(comp))
return false;
/* Look for fadd(sample_pos.x/y, -0.5f) or fsub(sample_pos.x/y, +0.5f) */
nir_op op = nir_scalar_alu_op(comp);
if (op != nir_op_fadd && op != nir_op_fsub)
return false;
float half_val = op == nir_op_fadd ? -0.5f : +0.5f;
unsigned sample_pos_srcn = ~0U;
unsigned half_srcn = ~0U;
/* Check both fadd/fsub sources. */
for (unsigned n = 0; n < 2; ++n) {
nir_scalar src = nir_scalar_chase_alu_src(comp, n);
if (nir_scalar_is_intrinsic(src) &&
nir_scalar_intrinsic_op(src) == nir_intrinsic_load_sample_pos) {
sample_pos_srcn = n;
} else if (nir_scalar_is_const(src) &&
nir_scalar_as_const_value(src).f32 == half_val) {
half_srcn = n;
}
}
/* One or more operands not found. */
if (sample_pos_srcn == ~0U || half_srcn == ~0U)
return false;
/* fsub is not commutative. */
if (op == nir_op_fsub && (sample_pos_srcn != 0 || half_srcn != 1))
return false;
/* vec2.{x,y} needs to be referencing load_sample_pos.{x,y}. */
nir_scalar sample_pos_src =
nir_scalar_chase_alu_src(comp, sample_pos_srcn);
if (sample_pos_src.comp != u)
return false;
}
return true;
}
static bool lower_bary_at_offset(nir_builder *b, nir_intrinsic_instr *intr)
{
/* Check for and handle simple replacement cases:
* - Flat interpolation - don't care about offset, will get consumed.
* - Offset is zero.
* - sample_pos - 0.5f.
*/
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(intr);
nir_src src = intr->src[0];
if (interp_mode == INTERP_MODE_FLAT ||
(nir_src_is_const(src) && !nir_src_comp_as_int(src, 0) &&
!nir_src_comp_as_int(src, 1))) {
nir_def *repl = nir_load_barycentric_pixel(
b,
intr->def.bit_size,
.interp_mode = nir_intrinsic_interp_mode(intr));
nir_def_replace(&intr->def, repl);
nir_instr_free(&intr->instr);
return true;
}
if (src_is_vec2_sample_pos_minus_half(src)) {
nir_def *repl = nir_load_barycentric_sample(
b,
intr->def.bit_size,
.interp_mode = nir_intrinsic_interp_mode(intr));
nir_def_replace(&intr->def, repl);
nir_instr_free(&intr->instr);
return true;
}
/* Non-zero offsets handled in lower_interp. */
return false;
}
static bool
lower_bary(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *cb_data)
{
b->cursor = nir_before_instr(&intr->instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_barycentric_at_sample:
return lower_bary_at_sample(b, intr);
case nir_intrinsic_load_barycentric_at_offset:
return lower_bary_at_offset(b, intr);
default:
break;
}
return false;
}
static nir_def *alu_iter(nir_builder *b,
nir_def *coords,
unsigned component,
struct nir_io_semantics io_semantics)
{
nir_def *coeffs = nir_load_fs_coeffs_pco(b,
.component = component,
.io_semantics = io_semantics);
nir_def *result = nir_ffma(b,
nir_channel(b, coeffs, 1),
nir_channel(b, coords, 1),
nir_channel(b, coeffs, 2));
result =
nir_ffma(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result);
return result;
}
static bool
lower_sample_pos(nir_builder *b, nir_intrinsic_instr *intr, pco_fs_data *fs)
{
b->cursor = nir_before_instr(&intr->instr);
nir_def *msaa_samples = nir_bit_count(
b,
nir_u2u32(b, nir_alpha_to_coverage(b, nir_imm_float(b, 1.0f))));
nir_def *sample_id = intr->intrinsic == nir_intrinsic_load_sample_pos
? nir_load_sample_id(b)
: intr->src[0].ssa;
nir_def *dword_index =
nir_ishr_imm(b, nir_iadd(b, msaa_samples, sample_id), 2);
nir_def *packed_sample_location =
nir_load_packed_sample_location_pco(b, dword_index);
fs->uses.sample_locations = true;
nir_def *byte_index = nir_iand_imm(b, sample_id, 0b11);
packed_sample_location =
nir_extract_u8(b, packed_sample_location, byte_index);
nir_def *sample_location =
nir_vec2(b,
nir_ubitfield_extract_imm(b, packed_sample_location, 0, 4),
nir_ubitfield_extract_imm(b, packed_sample_location, 4, 4));
sample_location = nir_u2f32(b, sample_location);
sample_location = nir_fdiv_imm(b, sample_location, 16.0f);
sample_location = nir_bcsel(b,
nir_ieq_imm(b, msaa_samples, 1),
nir_imm_vec2(b, 0.5f, 0.5f),
sample_location);
nir_def_replace(&intr->def, sample_location);
nir_instr_free(&intr->instr);
return true;
}
static bool
lower_interp(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data)
{
pco_fs_data *fs = cb_data;
b->cursor = nir_before_instr(&intr->instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_sample_pos:
case nir_intrinsic_load_sample_pos_from_id:
return lower_sample_pos(b, intr, fs);
case nir_intrinsic_load_interpolated_input:
break;
default:
return false;
}
nir_intrinsic_instr *bary = nir_src_as_intrinsic(intr->src[0]);
assert(bary);
/* Skip cases that don't need handling. */
if (bary->intrinsic != nir_intrinsic_load_barycentric_at_offset)
return false;
assert(nir_src_as_uint(intr->src[1]) == 0);
nir_def *coords = nir_load_tile_coord_pco(b, 2);
coords = nir_fadd(b, coords, bary->src[0].ssa);
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(bary);
nir_def *rhw = alu_iter(b,
coords,
3,
(struct nir_io_semantics){
.location = VARYING_SLOT_POS,
.num_slots = 1,
});
nir_def *comps[4];
for (unsigned u = 0; u < intr->def.num_components; ++u) {
comps[u] = alu_iter(b, coords, u, nir_intrinsic_io_semantics(intr));
if (interp_mode != INTERP_MODE_NOPERSPECTIVE)
comps[u] = nir_fdiv(b, comps[u], rhw);
}
nir_def *repl = nir_vec(b, comps, intr->def.num_components);
nir_def_replace(&intr->def, repl);
nir_instr_free(&intr->instr);
return true;
}
bool pco_nir_lower_interpolation(nir_shader *shader, pco_fs_data *fs)
{
bool progress = false;
progress |= nir_shader_intrinsics_pass(shader,
lower_bary,
nir_metadata_control_flow,
NULL);
progress |= nir_shader_intrinsics_pass(shader,
lower_interp,
nir_metadata_control_flow,
fs);
return progress;
}

View file

@ -343,8 +343,10 @@ static inline pco_instr *build_itr(pco_builder *b,
* \param[in] dest Instruction destination.
* \return The translated PCO instruction.
*/
static pco_instr *
trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
static pco_instr *trans_load_input_fs(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
bool interp)
{
pco_fs_data *fs_data = &tctx->shader->data.fs;
UNUSED unsigned base = nir_intrinsic_base(intr);
@ -352,7 +354,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
unsigned component = nir_intrinsic_component(intr);
unsigned chans = pco_ref_get_chans(dest);
const nir_src offset = intr->src[0];
const nir_src offset = interp ? intr->src[1] : intr->src[0];
assert(nir_src_as_uint(offset) == 0);
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
@ -362,6 +364,9 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
nir_var_shader_in,
location);
enum glsl_interp_mode interp_mode = var->data.interpolation;
/* Setup iteration mode. */
enum pco_itr_mode itr_mode = PCO_ITR_MODE_PIXEL;
assert(!(var->data.sample && var->data.centroid));
if (var->data.sample)
@ -369,6 +374,31 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
else if (var->data.centroid)
itr_mode = PCO_ITR_MODE_CENTROID;
/* Override if load_input_interpolated. */
if (interp) {
nir_intrinsic_instr *bary = nir_src_as_intrinsic(intr->src[0]);
assert(bary);
interp_mode = nir_intrinsic_interp_mode(bary);
switch (bary->intrinsic) {
case nir_intrinsic_load_barycentric_pixel:
itr_mode = PCO_ITR_MODE_PIXEL;
break;
case nir_intrinsic_load_barycentric_sample:
itr_mode = PCO_ITR_MODE_SAMPLE;
break;
case nir_intrinsic_load_barycentric_centroid:
itr_mode = PCO_ITR_MODE_CENTROID;
break;
default:
UNREACHABLE("");
}
}
if (location == VARYING_SLOT_POS) {
/* Only scalar supported for now. */
/* TODO: support vector for zw. */
@ -377,7 +407,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
/* TODO: support packing/partial vars. */
assert(!var->data.location_frac);
assert(var->data.interpolation == INTERP_MODE_NOPERSPECTIVE);
assert(interp_mode == INTERP_MODE_NOPERSPECTIVE);
/* Special case: x and y are loaded from special registers. */
switch (component) {
@ -423,7 +453,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
bool usc_itrsmp_enhanced =
PVR_HAS_FEATURE(tctx->pco_ctx->dev_info, usc_itrsmp_enhanced);
switch (var->data.interpolation) {
switch (interp_mode) {
case INTERP_MODE_SMOOTH: {
assert(fs_data->uses.w);
@ -469,6 +499,51 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
}
}
static pco_instr *
trans_load_fs_coeffs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
{
pco_fs_data *fs_data = &tctx->shader->data.fs;
unsigned component = nir_intrinsic_component(intr);
unsigned chans = pco_ref_get_chans(dest);
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
gl_varying_slot location = io_semantics.location;
/* Special case, get z/w coeffs. */
if (location == VARYING_SLOT_POS) {
if (component == 2) {
assert(fs_data->uses.z);
pco_ref zcoeffs = pco_ref_hwreg_vec(0, PCO_REG_CLASS_COEFF, chans);
return pco_mov(&tctx->b, dest, zcoeffs, .rpt = chans);
} else if (component == 3) {
assert(fs_data->uses.w);
unsigned wcoeffs_index =
fs_data->uses.z ? ROGUE_USC_COEFFICIENT_SET_SIZE : 0;
pco_ref wcoeffs =
pco_ref_hwreg_vec(wcoeffs_index, PCO_REG_CLASS_COEFF, chans);
return pco_mov(&tctx->b, dest, wcoeffs, .rpt = chans);
}
UNREACHABLE("");
}
const pco_range *range = &fs_data->varyings[location];
assert(component + chans <= range->count);
unsigned coeffs_index =
range->start + (ROGUE_USC_COEFFICIENT_SET_SIZE * component);
pco_ref coeffs = pco_ref_hwreg_vec(coeffs_index, PCO_REG_CLASS_COEFF, chans);
return pco_mov(&tctx->b, dest, coeffs, .rpt = chans);
}
/**
* \brief Translates a NIR fs store_output intrinsic into PCO.
*
@ -1389,6 +1464,29 @@ trans_load_sysval(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
return pco_mov(&tctx->b, dest, src, .rpt = chans);
}
static pco_instr *trans_load_packed_sample_location(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
pco_ref offset_src)
{
unsigned idx_reg_num = 0;
pco_ref idx_reg =
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
pco_mov(&tctx->b, idx_reg, offset_src);
const pco_range *range = &tctx->shader->data.fs.sample_locations;
assert(range->count > 0);
/* TODO: add the start onto the offset src instead? */
assert(range->start < 256);
pco_ref src = pco_ref_hwreg(range->start, PCO_REG_CLASS_SHARED);
src = pco_ref_hwreg_idx_from(idx_reg_num, src);
return pco_mov(&tctx->b, dest, src);
}
static bool desc_set_binding_is_comb_img_smp(unsigned desc_set,
unsigned binding,
const pco_common_data *common)
@ -1691,11 +1789,22 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
if (tctx->stage == MESA_SHADER_VERTEX)
instr = trans_load_input_vs(tctx, intr, dest);
else if (tctx->stage == MESA_SHADER_FRAGMENT)
instr = trans_load_input_fs(tctx, intr, dest);
instr = trans_load_input_fs(tctx, intr, dest, false);
else
UNREACHABLE("Unsupported stage for \"nir_intrinsic_load_input\".");
break;
case nir_intrinsic_load_interpolated_input:
assert(tctx->stage == MESA_SHADER_FRAGMENT);
instr = trans_load_input_fs(tctx, intr, dest, true);
break;
/* Just consume. */
case nir_intrinsic_load_barycentric_pixel:
case nir_intrinsic_load_barycentric_sample:
case nir_intrinsic_load_barycentric_centroid:
return NULL;
case nir_intrinsic_store_output:
if (tctx->stage == MESA_SHADER_VERTEX)
instr = trans_store_output_vs(tctx, intr, src[0]);
@ -1705,6 +1814,10 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
UNREACHABLE("Unsupported stage for \"nir_intrinsic_store_output\".");
break;
case nir_intrinsic_load_fs_coeffs_pco:
instr = trans_load_fs_coeffs(tctx, intr, dest);
break;
case nir_intrinsic_uvsw_write_pco:
instr = trans_uvsw_write(tctx, intr, src[0], src[1]);
break;
@ -1799,6 +1912,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
&tctx->shader->data.fs.blend_consts);
break;
case nir_intrinsic_load_packed_sample_location_pco:
assert(tctx->stage == MESA_SHADER_FRAGMENT);
instr = trans_load_packed_sample_location(tctx, intr, dest, src[0]);
break;
case nir_intrinsic_load_shared:
assert(tctx->stage == MESA_SHADER_COMPUTE);
instr = trans_load_common_store(tctx,
@ -2096,6 +2214,22 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
pco_ref_hwreg(PCO_SR_FACE_ORIENT, PCO_REG_CLASS_SPEC));
break;
case nir_intrinsic_load_tile_coord_pco: {
assert(tctx->stage == MESA_SHADER_FRAGMENT);
pco_ref xy[] = {
pco_ref_hwreg(PCO_SR_TILE_X_P, PCO_REG_CLASS_SPEC),
pco_ref_hwreg(PCO_SR_TILE_Y_P, PCO_REG_CLASS_SPEC),
};
unsigned component = nir_intrinsic_component(intr);
unsigned chans = pco_ref_get_chans(dest);
assert(component + chans <= ARRAY_SIZE(xy));
instr = pco_vec(&tctx->b, dest, chans, &xy[component]);
break;
}
case nir_intrinsic_load_savmsk_vm_pco:
instr = pco_savmsk(&tctx->b,
dest,

View file

@ -909,6 +909,7 @@ struct pvr_pds_descriptor_set {
#define PVR_BUFFER_TYPE_TILE_BUFFERS (11)
#define PVR_BUFFER_TYPE_SPILL_INFO (12)
#define PVR_BUFFER_TYPE_SCRATCH_INFO (13)
#define PVR_BUFFER_TYPE_SAMPLE_LOCATIONS (14)
#define PVR_BUFFER_TYPE_INVALID (~0)
struct pvr_pds_buffer {

View file

@ -1584,7 +1584,8 @@ void pvr_pds_generate_descriptor_upload_program(
case PVR_BUFFER_TYPE_FS_META:
case PVR_BUFFER_TYPE_TILE_BUFFERS:
case PVR_BUFFER_TYPE_SPILL_INFO:
case PVR_BUFFER_TYPE_SCRATCH_INFO: {
case PVR_BUFFER_TYPE_SCRATCH_INFO:
case PVR_BUFFER_TYPE_SAMPLE_LOCATIONS: {
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
special_buffer_entry =

View file

@ -4127,6 +4127,33 @@ static VkResult pvr_setup_descriptor_mappings(
pds_info->data_size_in_dwords);
break;
}
case PVR_BUFFER_TYPE_SAMPLE_LOCATIONS: {
/* Standard sample locations. */
uint32_t packed_sample_locations[] = {
0x000044cc,
0xeaa26e26,
0x359db759,
0x1ffb71d3,
};
struct pvr_suballoc_bo *sample_locations_bo;
result =
pvr_cmd_buffer_upload_general(cmd_buffer,
&packed_sample_locations,
sizeof(packed_sample_locations),
&sample_locations_bo);
if (result != VK_SUCCESS)
return result;
PVR_WRITE(qword_buffer,
sample_locations_bo->dev_addr.addr,
special_buff_entry->const_offset,
pds_info->data_size_in_dwords);
break;
}
default:
UNREACHABLE("Unsupported special buffer type.");
}

View file

@ -656,6 +656,14 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
};
}
if (stage == MESA_SHADER_FRAGMENT && data->fs.sample_locations.count > 0) {
program.buffers[program.buffer_count++] = (struct pvr_pds_buffer){
.type = PVR_BUFFER_TYPE_SAMPLE_LOCATIONS,
.size_in_dwords = data->fs.sample_locations.count,
.destination = data->fs.sample_locations.start,
};
}
pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
@ -1835,6 +1843,16 @@ static void pvr_alloc_fs_sysvals(pco_data *data, nir_shader *nir)
gl_system_value builtin_sys_vals[] = {
SYSTEM_VALUE_SAMPLE_ID,
SYSTEM_VALUE_LAYER_ID,
SYSTEM_VALUE_SAMPLE_POS,
SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID,
SYSTEM_VALUE_BARYCENTRIC_LINEAR_COORD,
SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL,
SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE,
SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID,
SYSTEM_VALUE_BARYCENTRIC_PERSP_COORD,
SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL,
SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE,
SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL,
};
for (unsigned u = 0; u < ARRAY_SIZE(builtin_sys_vals); ++u)
@ -2173,6 +2191,16 @@ static void pvr_setup_fs_tile_buffers(pco_data *data)
data->common.shareds += data->fs.tile_buffers.count;
}
static void pvr_setup_fs_sample_locations(pco_data *data)
{
if (!data->fs.uses.sample_locations)
return;
data->fs.sample_locations.start = data->common.shareds;
data->fs.sample_locations.count = 4; /* TODO */
data->common.shareds += data->fs.sample_locations.count;
}
static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir)
{
BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
@ -2544,6 +2572,7 @@ static void pvr_postprocess_shader_data(pco_data *data,
pvr_setup_fs_input_attachments(data, nir, subpass, hw_subpass);
pvr_setup_fs_blend(data);
pvr_setup_fs_tile_buffers(data);
pvr_setup_fs_sample_locations(data);
/* TODO: push consts, blend consts, dynamic state, etc. */
break;