mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-16 10:30:46 +01:00
nir,pan: Rework the pafrost tile load intrinsic
Instead of making it explicitly about outputs, this switchies it to being a NIR version of LD_TILE. It means we have to do a bit of work in NIR and add a builder helper but the end result is something much more versatile. Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39367>
This commit is contained in:
parent
592963e941
commit
11b6cd2f2c
8 changed files with 91 additions and 62 deletions
|
|
@ -1011,8 +1011,8 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_bvh_stack_rtn_amd:
|
||||
case nir_intrinsic_cmat_load_shared_nv:
|
||||
case nir_intrinsic_cmat_mov_transpose_nv:
|
||||
case nir_intrinsic_load_converted_output_pan:
|
||||
case nir_intrinsic_load_readonly_output_pan:
|
||||
case nir_intrinsic_load_tile_pan:
|
||||
case nir_intrinsic_load_tile_res_pan:
|
||||
case nir_intrinsic_load_cumulative_coverage_pan:
|
||||
case nir_intrinsic_load_blend_input_pan:
|
||||
case nir_intrinsic_atest_pan:
|
||||
|
|
|
|||
|
|
@ -1671,18 +1671,22 @@ intrinsic("load_frag_coord_zw_pan", [2], dest_comp=1, indices=[COMPONENT], flags
|
|||
# src[] = { sampler_index }
|
||||
load("sampler_lod_parameters", [1], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Like load_output but using a specified render target and conversion descriptor
|
||||
# src[] = { target, sample, conversion }
|
||||
# target must be in the [0..7] range when io_semantics.location is FRAG_RESULT_DATA0
|
||||
# and is ignored otherwise
|
||||
load("converted_output_pan", [1, 1, 1], indices=[ACCESS, DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE])
|
||||
# Maps to LD_TILE
|
||||
#
|
||||
# rt must be in the [0..7] range when and io_semantics.location is not
|
||||
# GL_FRAG_RESULT_DEPTH or GL_FRAG_RESULT_STENCIL
|
||||
#
|
||||
# src[] = { rt_sample_pixel, coverage_offset, conversion }
|
||||
load("tile_pan", [1, 1, 1], indices=[ACCESS, DEST_TYPE, IO_SEMANTICS],
|
||||
flags=[CAN_ELIMINATE])
|
||||
|
||||
# Like converted_output_pan but for case where the output is never written by the shader
|
||||
# This is used to relax waits on tile-buffer accesses and the target is read-only
|
||||
# src[] = { target, sample, conversion }
|
||||
# target must be in the [0..7] range when io_semantics.location is FRAG_RESULT_DATA0
|
||||
# and is ignored otherwise
|
||||
load("readonly_output_pan", [1, 1, 1], indices=[ACCESS, DEST_TYPE, IO_SEMANTICS], flags=[CAN_ELIMINATE])
|
||||
# Like load_tile_pan except it relies on resource tracking through
|
||||
# resource_read/write_mask for dependencies instead of ensuring absolute
|
||||
# pixel ordering like load_tile_pan does.
|
||||
#
|
||||
# src[] = { rt_sample_pixel, coverage_offset, conversion }
|
||||
load("tile_res_pan", [1, 1, 1], indices=[ACCESS, DEST_TYPE, IO_SEMANTICS],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Load converted memory given an address and a conversion descriptor
|
||||
# src[] = { address, conversion }
|
||||
|
|
|
|||
|
|
@ -412,8 +412,8 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
|||
progress = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_readonly_output_pan:
|
||||
case nir_intrinsic_load_converted_output_pan:
|
||||
case nir_intrinsic_load_tile_pan:
|
||||
case nir_intrinsic_load_tile_res_pan:
|
||||
/* render target can be nonuniform, but not conversion descriptor */
|
||||
if ((options->types & nir_lower_non_uniform_image_access) &&
|
||||
lower_non_uniform_access_intrin(&state, intrin, 2, nir_lower_non_uniform_image_access))
|
||||
|
|
|
|||
|
|
@ -1475,8 +1475,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
|
||||
case nir_intrinsic_load_output:
|
||||
case nir_intrinsic_load_per_vertex_output:
|
||||
case nir_intrinsic_load_converted_output_pan:
|
||||
case nir_intrinsic_load_readonly_output_pan:
|
||||
case nir_intrinsic_load_tile_pan:
|
||||
case nir_intrinsic_load_tile_res_pan:
|
||||
case nir_intrinsic_load_per_primitive_output:
|
||||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_store_per_primitive_output:
|
||||
|
|
|
|||
|
|
@ -1928,40 +1928,19 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
bi_index dest = bi_def_index(&instr->def);
|
||||
nir_alu_type T = nir_intrinsic_dest_type(instr);
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
bool is_zs = bi_is_zs(sem.location);
|
||||
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
|
||||
unsigned size = instr->def.bit_size;
|
||||
unsigned nr = instr->num_components;
|
||||
unsigned target = 0, sample = 0;
|
||||
|
||||
if (sem.location == FRAG_RESULT_DEPTH) {
|
||||
target = 255;
|
||||
} else if (sem.location == FRAG_RESULT_STENCIL) {
|
||||
target = 254;
|
||||
} else if (nir_src_is_const(instr->src[0])) {
|
||||
target = nir_src_as_uint(instr->src[0]);
|
||||
assert(target < 8);
|
||||
}
|
||||
bi_index pi = bi_src_index(&instr->src[0]);
|
||||
bi_index coverage = bi_src_index(&instr->src[1]);
|
||||
bi_index conversion = bi_src_index(&instr->src[2]);
|
||||
|
||||
if (nir_src_is_const(instr->src[1]))
|
||||
sample = nir_src_as_uint(instr->src[1]);
|
||||
bi_instr *I = bi_ld_tile_to(b, dest, pi, coverage, conversion,
|
||||
regfmt, nr - 1);
|
||||
I->z_stencil = bi_is_zs(sem.location);
|
||||
|
||||
bi_index pi = bi_pixel_indices(b, target, sample);
|
||||
|
||||
if (!is_zs && !nir_src_is_const(instr->src[0]))
|
||||
pi = bi_lshift_or(b, 32, bi_src_index(&instr->src[0]), pi, bi_imm_u8(8));
|
||||
|
||||
if (!nir_src_is_const(instr->src[1])) {
|
||||
pi = bi_mux_i32(b, bi_src_index(&instr->src[1]), pi,
|
||||
bi_imm_u32(0x1f), BI_MUX_BIT);
|
||||
}
|
||||
|
||||
bi_instr *I = bi_ld_tile_to(b, dest, pi, bi_coverage(b),
|
||||
bi_src_index(&instr->src[2]), regfmt, nr - 1);
|
||||
if (is_zs)
|
||||
I->z_stencil = true;
|
||||
|
||||
if (instr->intrinsic == nir_intrinsic_load_readonly_output_pan)
|
||||
if (instr->intrinsic == nir_intrinsic_load_tile_res_pan)
|
||||
I->wait_resource = true;
|
||||
|
||||
bi_emit_cached_split(b, dest, size * nr);
|
||||
|
|
@ -2378,8 +2357,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
bi_emit_store_converted_mem(b, instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_converted_output_pan:
|
||||
case nir_intrinsic_load_readonly_output_pan:
|
||||
case nir_intrinsic_load_tile_pan:
|
||||
case nir_intrinsic_load_tile_res_pan:
|
||||
bi_emit_ld_tile(b, instr);
|
||||
break;
|
||||
|
||||
|
|
@ -6184,9 +6163,11 @@ bi_lower_load_output(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
nir_def *conversion = nir_load_rt_conversion_pan(
|
||||
b, .base = rt, .src_type = nir_intrinsic_dest_type(intr));
|
||||
|
||||
nir_def *lowered = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, rt),
|
||||
nir_imm_int(b, 0), conversion, .dest_type = nir_intrinsic_dest_type(intr),
|
||||
nir_def *lowered = nir_load_tile_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size,
|
||||
pan_nir_tile_location_sample(b, loc, nir_imm_int(b, 0)),
|
||||
pan_nir_tile_default_coverage(b),
|
||||
conversion, .dest_type = nir_intrinsic_dest_type(intr),
|
||||
.io_semantics = nir_intrinsic_io_semantics(intr));
|
||||
|
||||
nir_def_rewrite_uses(&intr->def, lowered);
|
||||
|
|
|
|||
|
|
@ -26,10 +26,44 @@
|
|||
#define __PAN_NIR_H__
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "pan_compiler.h"
|
||||
|
||||
struct util_format_description;
|
||||
|
||||
static inline nir_def *
|
||||
pan_nir_tile_rt_sample(nir_builder *b, nir_def *rt, nir_def *sample)
|
||||
{
|
||||
/* y = 255 means "current pixel" */
|
||||
return nir_pack_32_4x8_split(b, nir_u2u8(b, sample),
|
||||
nir_u2u8(b, rt),
|
||||
nir_imm_intN_t(b, 0, 8),
|
||||
nir_imm_intN_t(b, 255, 8));
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
pan_nir_tile_location_sample(nir_builder *b, gl_frag_result location,
|
||||
nir_def *sample)
|
||||
{
|
||||
uint8_t rt;
|
||||
if (location == FRAG_RESULT_DEPTH) {
|
||||
rt = 255;
|
||||
} else if (location == FRAG_RESULT_STENCIL) {
|
||||
rt = 254;
|
||||
} else {
|
||||
assert(location >= FRAG_RESULT_DATA0);
|
||||
rt = location - FRAG_RESULT_DATA0;
|
||||
}
|
||||
|
||||
return pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample);
|
||||
}
|
||||
|
||||
static inline nir_def *
|
||||
pan_nir_tile_default_coverage(nir_builder *b)
|
||||
{
|
||||
return nir_iand_imm(b, nir_load_cumulative_coverage_pan(b), 0x1f);
|
||||
}
|
||||
|
||||
bool pan_nir_lower_store_component(nir_shader *shader);
|
||||
|
||||
bool pan_nir_lower_vertex_id(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -812,10 +812,13 @@ lower_rt_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_def *lowered = nir_load_converted_output_pan(
|
||||
nir_def *sample_id =
|
||||
nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
|
||||
|
||||
nir_def *lowered = nir_load_tile_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size,
|
||||
nir_imm_int(b, rt),
|
||||
nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0),
|
||||
pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
|
||||
pan_nir_tile_default_coverage(b),
|
||||
nir_imm_int(b, blend_desc >> 32),
|
||||
.dest_type = dest_type,
|
||||
.io_semantics = io);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "pan_nir.h"
|
||||
|
||||
struct panvk_lower_input_attachment_load_ctx {
|
||||
uint32_t ro_color_mask;
|
||||
|
|
@ -161,16 +162,20 @@ lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
iosem.location = FRAG_RESULT_DATA0;
|
||||
nir_push_if(b, is_read_only);
|
||||
{
|
||||
load_ro_color = nir_load_readonly_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
load_ro_color = nir_load_tile_res_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size,
|
||||
pan_nir_tile_rt_sample(b, target, intr->src[2].ssa),
|
||||
pan_nir_tile_default_coverage(b),
|
||||
conversion, .dest_type = dest_type,
|
||||
.access = nir_intrinsic_access(intr), .io_semantics = iosem);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
load_rw_color = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
load_rw_color = nir_load_tile_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size,
|
||||
pan_nir_tile_rt_sample(b, target, intr->src[2].ssa),
|
||||
pan_nir_tile_default_coverage(b),
|
||||
conversion, .dest_type = dest_type,
|
||||
.access = nir_intrinsic_access(intr), .io_semantics = iosem);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -201,9 +206,11 @@ lower_input_attachment_load(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
iosem.location = dest_type == nir_type_float32 ? FRAG_RESULT_DEPTH
|
||||
: FRAG_RESULT_STENCIL;
|
||||
target = nir_imm_int(b, 0);
|
||||
load_zs = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size, target,
|
||||
intr->src[2].ssa, conversion, .dest_type = dest_type,
|
||||
load_zs = nir_load_tile_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size,
|
||||
pan_nir_tile_location_sample(b, iosem.location, intr->src[2].ssa),
|
||||
pan_nir_tile_default_coverage(b),
|
||||
conversion, .dest_type = dest_type,
|
||||
.access = nir_intrinsic_access(intr), .io_semantics = iosem);
|
||||
|
||||
/* If we loaded the stencil value, the upper 24 bits might contain
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue