mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
pvr, pco: implement prerequisites for sampleRateShading
- Implement load_interpolated_input and friends. - Optimize load_barycentric_* cases that can be simplified. - Initial support for non-standard sample locations. Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37540>
This commit is contained in:
parent
83aecc8f3f
commit
b111b8a844
11 changed files with 506 additions and 8 deletions
|
|
@ -2790,3 +2790,10 @@ intrinsic("dma_ld_shregs_pco", src_comp=[2, 1, 1], bit_sizes=[32])
|
|||
|
||||
# dma_st_shregs_pco(address, burst_len, shreg_offset, emit_idf)
|
||||
intrinsic("dma_st_shregs_pco", src_comp=[2, 1, 1], indices=[FLAGS], bit_sizes=[32])
|
||||
|
||||
intrinsic("load_tile_coord_pco", dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[COMPONENT], bit_sizes=[32])
|
||||
|
||||
intrinsic("load_fs_coeffs_pco", dest_comp=3, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[COMPONENT, IO_SEMANTICS], bit_sizes=[32])
|
||||
|
||||
# load_packed_sample_location_pco(dword_index)
|
||||
intrinsic("load_packed_sample_location_pco", src_comp=[1], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
|
||||
|
|
|
|||
|
|
@ -1446,6 +1446,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
case nir_intrinsic_load_input_vertex:
|
||||
case nir_intrinsic_load_coefficients_agx:
|
||||
case nir_intrinsic_load_attribute_pan:
|
||||
case nir_intrinsic_load_fs_coeffs_pco:
|
||||
mode = nir_var_shader_in;
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -104,6 +104,8 @@ typedef struct _pco_fs_data {
|
|||
|
||||
pco_range tile_buffers;
|
||||
|
||||
pco_range sample_locations;
|
||||
|
||||
struct {
|
||||
bool w; /** Whether the shader uses pos.w. */
|
||||
bool z; /** Whether the shader uses pos.z */
|
||||
|
|
@ -114,6 +116,7 @@ typedef struct _pco_fs_data {
|
|||
bool discard;
|
||||
bool early_frag;
|
||||
bool sample_shading;
|
||||
bool sample_locations;
|
||||
bool alpha_to_coverage;
|
||||
bool olchk_skip;
|
||||
} uses;
|
||||
|
|
|
|||
|
|
@ -1764,6 +1764,7 @@ bool pco_nir_lower_clip_cull_vars(nir_shader *shader);
|
|||
bool pco_nir_lower_demote_samples(nir_shader *shader);
|
||||
bool pco_nir_lower_fs_intrinsics(nir_shader *shader);
|
||||
bool pco_nir_lower_images(nir_shader *shader, pco_data *data);
|
||||
bool pco_nir_lower_interpolation(nir_shader *shader, pco_fs_data *fs);
|
||||
bool pco_nir_lower_io(nir_shader *shader);
|
||||
bool pco_nir_lower_tex(nir_shader *shader, pco_data *data);
|
||||
bool pco_nir_lower_variables(nir_shader *shader, bool inputs, bool outputs);
|
||||
|
|
|
|||
|
|
@ -64,6 +64,8 @@ static const nir_shader_compiler_options nir_options = {
|
|||
.lower_pack_64_2x32_split = true,
|
||||
.lower_unpack_64_2x32_split = true,
|
||||
|
||||
.lower_interpolate_at = true,
|
||||
|
||||
.max_unroll_iterations = 16,
|
||||
|
||||
.io_options = nir_io_vectorizer_ignores_types,
|
||||
|
|
@ -199,7 +201,8 @@ static bool gather_fs_data_pass(struct nir_builder *b,
|
|||
|
||||
switch (intr->intrinsic) {
|
||||
/* Check whether the shader accesses z/w. */
|
||||
case nir_intrinsic_load_input: {
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_interpolated_input: {
|
||||
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
|
||||
if (io_semantics.location != VARYING_SLOT_POS)
|
||||
return false;
|
||||
|
|
@ -213,6 +216,19 @@ static bool gather_fs_data_pass(struct nir_builder *b,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_fs_coeffs_pco: {
|
||||
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
|
||||
b->shader->info.inputs_read |= BITFIELD64_BIT(io_semantics.location);
|
||||
|
||||
if (io_semantics.location != VARYING_SLOT_POS)
|
||||
return false;
|
||||
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
data->fs.uses.z |= (component == 2);
|
||||
data->fs.uses.w |= (component == 3);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_blend_const_color_rgba:
|
||||
data->fs.blend_consts_needed |= PIPE_MASK_RGBA;
|
||||
break;
|
||||
|
|
@ -860,6 +876,7 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
|
|||
.discard_ok = true,
|
||||
};
|
||||
NIR_PASS(_, nir, nir_opt_peephole_select, &peep_opts);
|
||||
NIR_PASS(_, nir, pco_nir_lower_interpolation, &data->fs);
|
||||
NIR_PASS(_, nir, pco_nir_pfo, &data->fs);
|
||||
NIR_PASS(_, nir, pco_nir_lower_fs_intrinsics);
|
||||
} else if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
|
|
|
|||
|
|
@ -1257,3 +1257,280 @@ bool pco_nir_link_clip_cull_vars(nir_shader *producer, nir_shader *consumer)
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lower_bary_at_sample(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
/* Check for and handle simple replacement cases:
|
||||
* - Flat interpolation - don't care about sample num, will get consumed.
|
||||
* - Sample num is current sample.
|
||||
*/
|
||||
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(intr);
|
||||
nir_intrinsic_instr *sample = nir_src_as_intrinsic(intr->src[0]);
|
||||
|
||||
if (interp_mode == INTERP_MODE_FLAT ||
|
||||
(sample && sample->intrinsic == nir_intrinsic_load_sample_id)) {
|
||||
nir_def *repl = nir_load_barycentric_sample(
|
||||
b,
|
||||
intr->def.bit_size,
|
||||
.interp_mode = nir_intrinsic_interp_mode(intr));
|
||||
nir_def_replace(&intr->def, repl);
|
||||
nir_instr_free(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Turn the sample id into a position. */
|
||||
nir_def *offset =
|
||||
nir_load_sample_pos_from_id(b, intr->def.bit_size, intr->src[0].ssa);
|
||||
offset = nir_fadd_imm(b, offset, -0.5f);
|
||||
|
||||
nir_def *repl = nir_load_barycentric_at_offset(
|
||||
b,
|
||||
intr->def.bit_size,
|
||||
offset,
|
||||
.interp_mode = nir_intrinsic_interp_mode(intr));
|
||||
|
||||
nir_def_replace(&intr->def, repl);
|
||||
nir_instr_free(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool src_is_vec2_sample_pos_minus_half(nir_src src)
|
||||
{
|
||||
nir_alu_instr *alu = nir_src_as_alu_instr(src);
|
||||
if (!alu || alu->op != nir_op_vec2)
|
||||
return false;
|
||||
|
||||
/* Check both vec2 components. */
|
||||
for (unsigned u = 0; u < 2; ++u) {
|
||||
nir_scalar comp = nir_get_scalar(&alu->def, u);
|
||||
comp = nir_scalar_chase_movs(comp);
|
||||
|
||||
if (!nir_scalar_is_alu(comp))
|
||||
return false;
|
||||
|
||||
/* Look for fadd(sample_pos.x/y, -0.5f) or fsub(sample_pos.x/y, +0.5f) */
|
||||
nir_op op = nir_scalar_alu_op(comp);
|
||||
if (op != nir_op_fadd && op != nir_op_fsub)
|
||||
return false;
|
||||
|
||||
float half_val = op == nir_op_fadd ? -0.5f : +0.5f;
|
||||
unsigned sample_pos_srcn = ~0U;
|
||||
unsigned half_srcn = ~0U;
|
||||
|
||||
/* Check both fadd/fsub sources. */
|
||||
for (unsigned n = 0; n < 2; ++n) {
|
||||
nir_scalar src = nir_scalar_chase_alu_src(comp, n);
|
||||
|
||||
if (nir_scalar_is_intrinsic(src) &&
|
||||
nir_scalar_intrinsic_op(src) == nir_intrinsic_load_sample_pos) {
|
||||
sample_pos_srcn = n;
|
||||
} else if (nir_scalar_is_const(src) &&
|
||||
nir_scalar_as_const_value(src).f32 == half_val) {
|
||||
half_srcn = n;
|
||||
}
|
||||
}
|
||||
|
||||
/* One or more operands not found. */
|
||||
if (sample_pos_srcn == ~0U || half_srcn == ~0U)
|
||||
return false;
|
||||
|
||||
/* fsub is not commutative. */
|
||||
if (op == nir_op_fsub && (sample_pos_srcn != 0 || half_srcn != 1))
|
||||
return false;
|
||||
|
||||
/* vec2.{x,y} needs to be referencing load_sample_pos.{x,y}. */
|
||||
nir_scalar sample_pos_src =
|
||||
nir_scalar_chase_alu_src(comp, sample_pos_srcn);
|
||||
if (sample_pos_src.comp != u)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lower_bary_at_offset(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
/* Check for and handle simple replacement cases:
|
||||
* - Flat interpolation - don't care about offset, will get consumed.
|
||||
* - Offset is zero.
|
||||
* - sample_pos - 0.5f.
|
||||
*/
|
||||
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(intr);
|
||||
nir_src src = intr->src[0];
|
||||
|
||||
if (interp_mode == INTERP_MODE_FLAT ||
|
||||
(nir_src_is_const(src) && !nir_src_comp_as_int(src, 0) &&
|
||||
!nir_src_comp_as_int(src, 1))) {
|
||||
nir_def *repl = nir_load_barycentric_pixel(
|
||||
b,
|
||||
intr->def.bit_size,
|
||||
.interp_mode = nir_intrinsic_interp_mode(intr));
|
||||
nir_def_replace(&intr->def, repl);
|
||||
nir_instr_free(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (src_is_vec2_sample_pos_minus_half(src)) {
|
||||
nir_def *repl = nir_load_barycentric_sample(
|
||||
b,
|
||||
intr->def.bit_size,
|
||||
.interp_mode = nir_intrinsic_interp_mode(intr));
|
||||
nir_def_replace(&intr->def, repl);
|
||||
nir_instr_free(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Non-zero offsets handled in lower_interp. */
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_bary(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *cb_data)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_at_sample:
|
||||
return lower_bary_at_sample(b, intr);
|
||||
|
||||
case nir_intrinsic_load_barycentric_at_offset:
|
||||
return lower_bary_at_offset(b, intr);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static nir_def *alu_iter(nir_builder *b,
|
||||
nir_def *coords,
|
||||
unsigned component,
|
||||
struct nir_io_semantics io_semantics)
|
||||
{
|
||||
nir_def *coeffs = nir_load_fs_coeffs_pco(b,
|
||||
.component = component,
|
||||
.io_semantics = io_semantics);
|
||||
|
||||
nir_def *result = nir_ffma(b,
|
||||
nir_channel(b, coeffs, 1),
|
||||
nir_channel(b, coords, 1),
|
||||
nir_channel(b, coeffs, 2));
|
||||
result =
|
||||
nir_ffma(b, nir_channel(b, coeffs, 0), nir_channel(b, coords, 0), result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sample_pos(nir_builder *b, nir_intrinsic_instr *intr, pco_fs_data *fs)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *msaa_samples = nir_bit_count(
|
||||
b,
|
||||
nir_u2u32(b, nir_alpha_to_coverage(b, nir_imm_float(b, 1.0f))));
|
||||
|
||||
nir_def *sample_id = intr->intrinsic == nir_intrinsic_load_sample_pos
|
||||
? nir_load_sample_id(b)
|
||||
: intr->src[0].ssa;
|
||||
|
||||
nir_def *dword_index =
|
||||
nir_ishr_imm(b, nir_iadd(b, msaa_samples, sample_id), 2);
|
||||
|
||||
nir_def *packed_sample_location =
|
||||
nir_load_packed_sample_location_pco(b, dword_index);
|
||||
fs->uses.sample_locations = true;
|
||||
|
||||
nir_def *byte_index = nir_iand_imm(b, sample_id, 0b11);
|
||||
|
||||
packed_sample_location =
|
||||
nir_extract_u8(b, packed_sample_location, byte_index);
|
||||
|
||||
nir_def *sample_location =
|
||||
nir_vec2(b,
|
||||
nir_ubitfield_extract_imm(b, packed_sample_location, 0, 4),
|
||||
nir_ubitfield_extract_imm(b, packed_sample_location, 4, 4));
|
||||
|
||||
sample_location = nir_u2f32(b, sample_location);
|
||||
sample_location = nir_fdiv_imm(b, sample_location, 16.0f);
|
||||
sample_location = nir_bcsel(b,
|
||||
nir_ieq_imm(b, msaa_samples, 1),
|
||||
nir_imm_vec2(b, 0.5f, 0.5f),
|
||||
sample_location);
|
||||
|
||||
nir_def_replace(&intr->def, sample_location);
|
||||
nir_instr_free(&intr->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_interp(nir_builder *b, nir_intrinsic_instr *intr, void *cb_data)
|
||||
{
|
||||
pco_fs_data *fs = cb_data;
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
case nir_intrinsic_load_sample_pos_from_id:
|
||||
return lower_sample_pos(b, intr, fs);
|
||||
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *bary = nir_src_as_intrinsic(intr->src[0]);
|
||||
assert(bary);
|
||||
|
||||
/* Skip cases that don't need handling. */
|
||||
if (bary->intrinsic != nir_intrinsic_load_barycentric_at_offset)
|
||||
return false;
|
||||
|
||||
assert(nir_src_as_uint(intr->src[1]) == 0);
|
||||
|
||||
nir_def *coords = nir_load_tile_coord_pco(b, 2);
|
||||
coords = nir_fadd(b, coords, bary->src[0].ssa);
|
||||
|
||||
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(bary);
|
||||
nir_def *rhw = alu_iter(b,
|
||||
coords,
|
||||
3,
|
||||
(struct nir_io_semantics){
|
||||
.location = VARYING_SLOT_POS,
|
||||
.num_slots = 1,
|
||||
});
|
||||
|
||||
nir_def *comps[4];
|
||||
for (unsigned u = 0; u < intr->def.num_components; ++u) {
|
||||
comps[u] = alu_iter(b, coords, u, nir_intrinsic_io_semantics(intr));
|
||||
if (interp_mode != INTERP_MODE_NOPERSPECTIVE)
|
||||
comps[u] = nir_fdiv(b, comps[u], rhw);
|
||||
}
|
||||
|
||||
nir_def *repl = nir_vec(b, comps, intr->def.num_components);
|
||||
nir_def_replace(&intr->def, repl);
|
||||
nir_instr_free(&intr->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool pco_nir_lower_interpolation(nir_shader *shader, pco_fs_data *fs)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
progress |= nir_shader_intrinsics_pass(shader,
|
||||
lower_bary,
|
||||
nir_metadata_control_flow,
|
||||
NULL);
|
||||
|
||||
progress |= nir_shader_intrinsics_pass(shader,
|
||||
lower_interp,
|
||||
nir_metadata_control_flow,
|
||||
fs);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -343,8 +343,10 @@ static inline pco_instr *build_itr(pco_builder *b,
|
|||
* \param[in] dest Instruction destination.
|
||||
* \return The translated PCO instruction.
|
||||
*/
|
||||
static pco_instr *
|
||||
trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
||||
static pco_instr *trans_load_input_fs(trans_ctx *tctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
pco_ref dest,
|
||||
bool interp)
|
||||
{
|
||||
pco_fs_data *fs_data = &tctx->shader->data.fs;
|
||||
UNUSED unsigned base = nir_intrinsic_base(intr);
|
||||
|
|
@ -352,7 +354,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
unsigned component = nir_intrinsic_component(intr);
|
||||
unsigned chans = pco_ref_get_chans(dest);
|
||||
|
||||
const nir_src offset = intr->src[0];
|
||||
const nir_src offset = interp ? intr->src[1] : intr->src[0];
|
||||
assert(nir_src_as_uint(offset) == 0);
|
||||
|
||||
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
|
||||
|
|
@ -362,6 +364,9 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
nir_var_shader_in,
|
||||
location);
|
||||
|
||||
enum glsl_interp_mode interp_mode = var->data.interpolation;
|
||||
|
||||
/* Setup iteration mode. */
|
||||
enum pco_itr_mode itr_mode = PCO_ITR_MODE_PIXEL;
|
||||
assert(!(var->data.sample && var->data.centroid));
|
||||
if (var->data.sample)
|
||||
|
|
@ -369,6 +374,31 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
else if (var->data.centroid)
|
||||
itr_mode = PCO_ITR_MODE_CENTROID;
|
||||
|
||||
/* Override if load_input_interpolated. */
|
||||
if (interp) {
|
||||
nir_intrinsic_instr *bary = nir_src_as_intrinsic(intr->src[0]);
|
||||
assert(bary);
|
||||
|
||||
interp_mode = nir_intrinsic_interp_mode(bary);
|
||||
|
||||
switch (bary->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
itr_mode = PCO_ITR_MODE_PIXEL;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
itr_mode = PCO_ITR_MODE_SAMPLE;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
itr_mode = PCO_ITR_MODE_CENTROID;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("");
|
||||
}
|
||||
}
|
||||
|
||||
if (location == VARYING_SLOT_POS) {
|
||||
/* Only scalar supported for now. */
|
||||
/* TODO: support vector for zw. */
|
||||
|
|
@ -377,7 +407,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
/* TODO: support packing/partial vars. */
|
||||
assert(!var->data.location_frac);
|
||||
|
||||
assert(var->data.interpolation == INTERP_MODE_NOPERSPECTIVE);
|
||||
assert(interp_mode == INTERP_MODE_NOPERSPECTIVE);
|
||||
|
||||
/* Special case: x and y are loaded from special registers. */
|
||||
switch (component) {
|
||||
|
|
@ -423,7 +453,7 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
bool usc_itrsmp_enhanced =
|
||||
PVR_HAS_FEATURE(tctx->pco_ctx->dev_info, usc_itrsmp_enhanced);
|
||||
|
||||
switch (var->data.interpolation) {
|
||||
switch (interp_mode) {
|
||||
case INTERP_MODE_SMOOTH: {
|
||||
assert(fs_data->uses.w);
|
||||
|
||||
|
|
@ -469,6 +499,51 @@ trans_load_input_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
}
|
||||
}
|
||||
|
||||
static pco_instr *
|
||||
trans_load_fs_coeffs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
||||
{
|
||||
pco_fs_data *fs_data = &tctx->shader->data.fs;
|
||||
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
unsigned chans = pco_ref_get_chans(dest);
|
||||
|
||||
struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
|
||||
gl_varying_slot location = io_semantics.location;
|
||||
|
||||
/* Special case, get z/w coeffs. */
|
||||
if (location == VARYING_SLOT_POS) {
|
||||
if (component == 2) {
|
||||
assert(fs_data->uses.z);
|
||||
|
||||
pco_ref zcoeffs = pco_ref_hwreg_vec(0, PCO_REG_CLASS_COEFF, chans);
|
||||
|
||||
return pco_mov(&tctx->b, dest, zcoeffs, .rpt = chans);
|
||||
} else if (component == 3) {
|
||||
assert(fs_data->uses.w);
|
||||
|
||||
unsigned wcoeffs_index =
|
||||
fs_data->uses.z ? ROGUE_USC_COEFFICIENT_SET_SIZE : 0;
|
||||
|
||||
pco_ref wcoeffs =
|
||||
pco_ref_hwreg_vec(wcoeffs_index, PCO_REG_CLASS_COEFF, chans);
|
||||
|
||||
return pco_mov(&tctx->b, dest, wcoeffs, .rpt = chans);
|
||||
}
|
||||
|
||||
UNREACHABLE("");
|
||||
}
|
||||
|
||||
const pco_range *range = &fs_data->varyings[location];
|
||||
assert(component + chans <= range->count);
|
||||
|
||||
unsigned coeffs_index =
|
||||
range->start + (ROGUE_USC_COEFFICIENT_SET_SIZE * component);
|
||||
|
||||
pco_ref coeffs = pco_ref_hwreg_vec(coeffs_index, PCO_REG_CLASS_COEFF, chans);
|
||||
|
||||
return pco_mov(&tctx->b, dest, coeffs, .rpt = chans);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Translates a NIR fs store_output intrinsic into PCO.
|
||||
*
|
||||
|
|
@ -1389,6 +1464,29 @@ trans_load_sysval(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
|
|||
return pco_mov(&tctx->b, dest, src, .rpt = chans);
|
||||
}
|
||||
|
||||
static pco_instr *trans_load_packed_sample_location(trans_ctx *tctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
pco_ref dest,
|
||||
pco_ref offset_src)
|
||||
{
|
||||
unsigned idx_reg_num = 0;
|
||||
pco_ref idx_reg =
|
||||
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
|
||||
|
||||
pco_mov(&tctx->b, idx_reg, offset_src);
|
||||
|
||||
const pco_range *range = &tctx->shader->data.fs.sample_locations;
|
||||
assert(range->count > 0);
|
||||
|
||||
/* TODO: add the start onto the offset src instead? */
|
||||
assert(range->start < 256);
|
||||
|
||||
pco_ref src = pco_ref_hwreg(range->start, PCO_REG_CLASS_SHARED);
|
||||
src = pco_ref_hwreg_idx_from(idx_reg_num, src);
|
||||
|
||||
return pco_mov(&tctx->b, dest, src);
|
||||
}
|
||||
|
||||
static bool desc_set_binding_is_comb_img_smp(unsigned desc_set,
|
||||
unsigned binding,
|
||||
const pco_common_data *common)
|
||||
|
|
@ -1691,11 +1789,22 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
if (tctx->stage == MESA_SHADER_VERTEX)
|
||||
instr = trans_load_input_vs(tctx, intr, dest);
|
||||
else if (tctx->stage == MESA_SHADER_FRAGMENT)
|
||||
instr = trans_load_input_fs(tctx, intr, dest);
|
||||
instr = trans_load_input_fs(tctx, intr, dest, false);
|
||||
else
|
||||
UNREACHABLE("Unsupported stage for \"nir_intrinsic_load_input\".");
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
assert(tctx->stage == MESA_SHADER_FRAGMENT);
|
||||
instr = trans_load_input_fs(tctx, intr, dest, true);
|
||||
break;
|
||||
|
||||
/* Just consume. */
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
case nir_intrinsic_load_barycentric_sample:
|
||||
case nir_intrinsic_load_barycentric_centroid:
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
if (tctx->stage == MESA_SHADER_VERTEX)
|
||||
instr = trans_store_output_vs(tctx, intr, src[0]);
|
||||
|
|
@ -1705,6 +1814,10 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
UNREACHABLE("Unsupported stage for \"nir_intrinsic_store_output\".");
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_fs_coeffs_pco:
|
||||
instr = trans_load_fs_coeffs(tctx, intr, dest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_uvsw_write_pco:
|
||||
instr = trans_uvsw_write(tctx, intr, src[0], src[1]);
|
||||
break;
|
||||
|
|
@ -1799,6 +1912,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
&tctx->shader->data.fs.blend_consts);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_packed_sample_location_pco:
|
||||
assert(tctx->stage == MESA_SHADER_FRAGMENT);
|
||||
instr = trans_load_packed_sample_location(tctx, intr, dest, src[0]);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_shared:
|
||||
assert(tctx->stage == MESA_SHADER_COMPUTE);
|
||||
instr = trans_load_common_store(tctx,
|
||||
|
|
@ -2096,6 +2214,22 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
pco_ref_hwreg(PCO_SR_FACE_ORIENT, PCO_REG_CLASS_SPEC));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_tile_coord_pco: {
|
||||
assert(tctx->stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
pco_ref xy[] = {
|
||||
pco_ref_hwreg(PCO_SR_TILE_X_P, PCO_REG_CLASS_SPEC),
|
||||
pco_ref_hwreg(PCO_SR_TILE_Y_P, PCO_REG_CLASS_SPEC),
|
||||
};
|
||||
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
unsigned chans = pco_ref_get_chans(dest);
|
||||
assert(component + chans <= ARRAY_SIZE(xy));
|
||||
|
||||
instr = pco_vec(&tctx->b, dest, chans, &xy[component]);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_savmsk_vm_pco:
|
||||
instr = pco_savmsk(&tctx->b,
|
||||
dest,
|
||||
|
|
|
|||
|
|
@ -909,6 +909,7 @@ struct pvr_pds_descriptor_set {
|
|||
#define PVR_BUFFER_TYPE_TILE_BUFFERS (11)
|
||||
#define PVR_BUFFER_TYPE_SPILL_INFO (12)
|
||||
#define PVR_BUFFER_TYPE_SCRATCH_INFO (13)
|
||||
#define PVR_BUFFER_TYPE_SAMPLE_LOCATIONS (14)
|
||||
#define PVR_BUFFER_TYPE_INVALID (~0)
|
||||
|
||||
struct pvr_pds_buffer {
|
||||
|
|
|
|||
|
|
@ -1584,7 +1584,8 @@ void pvr_pds_generate_descriptor_upload_program(
|
|||
case PVR_BUFFER_TYPE_FS_META:
|
||||
case PVR_BUFFER_TYPE_TILE_BUFFERS:
|
||||
case PVR_BUFFER_TYPE_SPILL_INFO:
|
||||
case PVR_BUFFER_TYPE_SCRATCH_INFO: {
|
||||
case PVR_BUFFER_TYPE_SCRATCH_INFO:
|
||||
case PVR_BUFFER_TYPE_SAMPLE_LOCATIONS: {
|
||||
struct pvr_const_map_entry_special_buffer *special_buffer_entry;
|
||||
|
||||
special_buffer_entry =
|
||||
|
|
|
|||
|
|
@ -4127,6 +4127,33 @@ static VkResult pvr_setup_descriptor_mappings(
|
|||
pds_info->data_size_in_dwords);
|
||||
break;
|
||||
}
|
||||
|
||||
case PVR_BUFFER_TYPE_SAMPLE_LOCATIONS: {
|
||||
/* Standard sample locations. */
|
||||
uint32_t packed_sample_locations[] = {
|
||||
0x000044cc,
|
||||
0xeaa26e26,
|
||||
0x359db759,
|
||||
0x1ffb71d3,
|
||||
};
|
||||
|
||||
struct pvr_suballoc_bo *sample_locations_bo;
|
||||
result =
|
||||
pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
&packed_sample_locations,
|
||||
sizeof(packed_sample_locations),
|
||||
&sample_locations_bo);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
PVR_WRITE(qword_buffer,
|
||||
sample_locations_bo->dev_addr.addr,
|
||||
special_buff_entry->const_offset,
|
||||
pds_info->data_size_in_dwords);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported special buffer type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -656,6 +656,14 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
};
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT && data->fs.sample_locations.count > 0) {
|
||||
program.buffers[program.buffer_count++] = (struct pvr_pds_buffer){
|
||||
.type = PVR_BUFFER_TYPE_SAMPLE_LOCATIONS,
|
||||
.size_in_dwords = data->fs.sample_locations.count,
|
||||
.destination = data->fs.sample_locations.start,
|
||||
};
|
||||
}
|
||||
|
||||
pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
|
||||
|
||||
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
|
||||
|
|
@ -1835,6 +1843,16 @@ static void pvr_alloc_fs_sysvals(pco_data *data, nir_shader *nir)
|
|||
gl_system_value builtin_sys_vals[] = {
|
||||
SYSTEM_VALUE_SAMPLE_ID,
|
||||
SYSTEM_VALUE_LAYER_ID,
|
||||
SYSTEM_VALUE_SAMPLE_POS,
|
||||
SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID,
|
||||
SYSTEM_VALUE_BARYCENTRIC_LINEAR_COORD,
|
||||
SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL,
|
||||
SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE,
|
||||
SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID,
|
||||
SYSTEM_VALUE_BARYCENTRIC_PERSP_COORD,
|
||||
SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL,
|
||||
SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE,
|
||||
SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL,
|
||||
};
|
||||
|
||||
for (unsigned u = 0; u < ARRAY_SIZE(builtin_sys_vals); ++u)
|
||||
|
|
@ -2173,6 +2191,16 @@ static void pvr_setup_fs_tile_buffers(pco_data *data)
|
|||
data->common.shareds += data->fs.tile_buffers.count;
|
||||
}
|
||||
|
||||
static void pvr_setup_fs_sample_locations(pco_data *data)
|
||||
{
|
||||
if (!data->fs.uses.sample_locations)
|
||||
return;
|
||||
|
||||
data->fs.sample_locations.start = data->common.shareds;
|
||||
data->fs.sample_locations.count = 4; /* TODO */
|
||||
data->common.shareds += data->fs.sample_locations.count;
|
||||
}
|
||||
|
||||
static void pvr_alloc_cs_sysvals(pco_data *data, nir_shader *nir)
|
||||
{
|
||||
BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
|
||||
|
|
@ -2544,6 +2572,7 @@ static void pvr_postprocess_shader_data(pco_data *data,
|
|||
pvr_setup_fs_input_attachments(data, nir, subpass, hw_subpass);
|
||||
pvr_setup_fs_blend(data);
|
||||
pvr_setup_fs_tile_buffers(data);
|
||||
pvr_setup_fs_sample_locations(data);
|
||||
|
||||
/* TODO: push consts, blend consts, dynamic state, etc. */
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue