mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-03 17:38:25 +02:00
radv: select frag_coord_xy and pixel_coord conditionally based on dynamic state
the code explains it Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> (shader parts) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41689>
This commit is contained in:
parent
22e40edfb9
commit
1b45a8aee2
9 changed files with 92 additions and 13 deletions
|
|
@ -441,6 +441,9 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
/* Load the primitive topology from an user SGPR when it's unknown at compile time (GPL). */
|
||||
replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_RAST_PRIM);
|
||||
break;
|
||||
case nir_intrinsic_load_use_float_frag_coord_xy_amd:
|
||||
replacement = nir_ine_imm(b, GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_USE_FLOAT_FRAG_COORD_XY), 0);
|
||||
break;
|
||||
default:
|
||||
progress = false;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -9,6 +9,20 @@
|
|||
* not both.
|
||||
*
|
||||
* sample_pos counts as a frag_coord_xy use and is lowered to frag_coord_xy here.
|
||||
*
|
||||
* If frag_coord_xy survives and both components are used and sample_pos isn't used, it becomes:
|
||||
* load_use_float_frag_coord_xy_amd() ? load_frag_coord_xy() : u2f32(load_pixel_coord()) + 0.5;
|
||||
*
|
||||
* If load_use_float_frag_coord_xy_amd==false, load_frag_coord_xy() returns uninitialized values.
|
||||
* If load_use_float_frag_coord_xy_amd==true, load_pixel_coord() returns uninitialized values.
|
||||
*
|
||||
* SPI_PS_INPUT_ENA is used to disable VGPR initialization for frag_coord_xy (POS_X_FLOAT,
|
||||
* POS_Y_FLOAT) or pixel_coord (POS_FIXED_PT) while SPI_PS_INPUT_ADDR keeps them at the same
|
||||
* VGPR locations. Reducing the number of initialized VGPRs increases the PS wave launch rate,
|
||||
* which increases observed pixel throughput depending on other states.
|
||||
*
|
||||
* load_use_float_frag_coord_xy_amd() comes from a user SGPR, and determines which VGPRs are
|
||||
* initialized at PS wave launch.
|
||||
*/
|
||||
|
||||
#include "nir_builder.h"
|
||||
|
|
@ -21,10 +35,12 @@ typedef struct {
|
|||
bool has_frag_coord_xy_float_use;
|
||||
bool has_pixel_coord;
|
||||
bool has_sample_pos;
|
||||
uint8_t comp_usage_mask;
|
||||
|
||||
/* lower_frag_coord_and_pixel_coord */
|
||||
bool lower_to_pixel_coord;
|
||||
bool lower_to_frag_coord_xy;
|
||||
bool select_frag_coord_xy_dynamically;
|
||||
} opt_fs_frag_coord_and_pixel_coord_state;
|
||||
|
||||
static bool
|
||||
|
|
@ -45,10 +61,12 @@ gather_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
|
||||
state->has_frag_coord_xy |= intr->intrinsic == nir_intrinsic_load_frag_coord_xy;
|
||||
state->has_sample_pos |= intr->intrinsic == nir_intrinsic_load_sample_pos;
|
||||
state->comp_usage_mask |= nir_def_components_read(&intr->def);
|
||||
return false;
|
||||
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
state->has_pixel_coord = true;
|
||||
state->comp_usage_mask |= nir_def_components_read(&intr->def);
|
||||
return false;
|
||||
|
||||
default:
|
||||
|
|
@ -68,6 +86,10 @@ lower_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
if (state->lower_to_pixel_coord) {
|
||||
nir_def_replace(&intr->def, nir_fadd_imm(b, nir_u2f32(b, nir_load_pixel_coord(b)), 0.5));
|
||||
return true;
|
||||
} else if (state->select_frag_coord_xy_dynamically) {
|
||||
nir_def_replace(&intr->def, nir_bcsel(b, nir_load_use_float_frag_coord_xy_amd(b), nir_load_frag_coord_xy(b),
|
||||
nir_fadd_imm(b, nir_u2f32(b, nir_load_pixel_coord(b)), 0.5)));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
|
|
@ -82,7 +104,12 @@ lower_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
if (state->lower_to_frag_coord_xy) {
|
||||
nir_def_replace(&intr->def, nir_f2u16(b, nir_load_frag_coord_xy(b)));
|
||||
if (state->select_frag_coord_xy_dynamically) {
|
||||
nir_def_replace(&intr->def, nir_bcsel(b, nir_load_use_float_frag_coord_xy_amd(b),
|
||||
nir_f2u16(b, nir_load_frag_coord_xy(b)), nir_load_pixel_coord(b)));
|
||||
} else {
|
||||
nir_def_replace(&intr->def, nir_f2u16(b, nir_load_frag_coord_xy(b)));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -109,8 +136,10 @@ radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord)
|
|||
(state.has_frag_coord_xy || state.has_sample_pos) && !state.has_frag_coord_xy_float_use;
|
||||
state.lower_to_frag_coord_xy =
|
||||
(state.has_pixel_coord || state.has_sample_pos) && state.has_frag_coord_xy_float_use;
|
||||
state.select_frag_coord_xy_dynamically =
|
||||
state.has_frag_coord_xy_float_use && !state.has_sample_pos && state.comp_usage_mask == 0x3;
|
||||
|
||||
if (!state.lower_to_pixel_coord && !state.lower_to_frag_coord_xy)
|
||||
if (!state.lower_to_pixel_coord && !state.lower_to_frag_coord_xy && !state.select_frag_coord_xy_dynamically)
|
||||
return false;
|
||||
|
||||
return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state);
|
||||
|
|
|
|||
|
|
@ -8786,6 +8786,16 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT] &&
|
||||
cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]->info.ps.selects_frag_coord_xy_dynamically &&
|
||||
(!cmd_buffer->state.last_vgt_shader ||
|
||||
/* We just want to know whether the VRS output changes between enabled and disabled. */
|
||||
(cmd_buffer->state.last_vgt_shader->info.outinfo.writes_primitive_shading_rate ||
|
||||
cmd_buffer->state.last_vgt_shader->info.outinfo.writes_primitive_shading_rate_per_primitive) !=
|
||||
(shader->info.outinfo.writes_primitive_shading_rate ||
|
||||
shader->info.outinfo.writes_primitive_shading_rate_per_primitive)))
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PS_STATE;
|
||||
|
||||
cmd_buffer->state.last_vgt_shader = (struct radv_shader *)shader;
|
||||
}
|
||||
}
|
||||
|
|
@ -11691,7 +11701,29 @@ radv_emit_ps_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (!ps)
|
||||
return;
|
||||
|
||||
const uint32_t spi_ps_input_ena = ps->config.spi_ps_input_ena;
|
||||
uint32_t spi_ps_input_ena = ps->config.spi_ps_input_ena;
|
||||
bool use_float_frag_coord_xy = false;
|
||||
|
||||
if (ps->info.ps.selects_frag_coord_xy_dynamically) {
|
||||
/* The shader selects frag_coord_xy/pixel_coord dynamically depending on a flag in PS_STATE
|
||||
* that depends on the following dynamic state while preferring pixel_coord (POS_FIXED_PT)
|
||||
* if possible due to lower VGPR initialization cost.
|
||||
*/
|
||||
use_float_frag_coord_xy =
|
||||
/* Whether VRS can be other than 1x1. */
|
||||
cmd_buffer->state.dynamic.vk.fsr.fragment_size.width != 1 ||
|
||||
cmd_buffer->state.dynamic.vk.fsr.fragment_size.height != 1 || cmd_buffer->state.render.vrs_att.iview ||
|
||||
cmd_buffer->state.last_vgt_shader->info.outinfo.writes_primitive_shading_rate ||
|
||||
cmd_buffer->state.last_vgt_shader->info.outinfo.writes_primitive_shading_rate_per_primitive ||
|
||||
radv_is_sample_shading_enabled(cmd_buffer, NULL);
|
||||
|
||||
/* Disable the initialized PS VGPRs that the shader doesn't use. */
|
||||
if (use_float_frag_coord_xy)
|
||||
spi_ps_input_ena &= C_0286CC_POS_FIXED_PT_ENA;
|
||||
else
|
||||
spi_ps_input_ena &= C_0286CC_POS_X_FLOAT_ENA & C_0286CC_POS_Y_FLOAT_ENA;
|
||||
}
|
||||
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
||||
radeon_begin(cs);
|
||||
|
|
@ -11711,7 +11743,8 @@ radv_emit_ps_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
const unsigned ps_state = SET_SGPR_FIELD(PS_STATE_NUM_SAMPLES, rasterization_samples) |
|
||||
SET_SGPR_FIELD(PS_STATE_PS_ITER_MASK, ps_iter_mask) |
|
||||
SET_SGPR_FIELD(PS_STATE_LINE_RAST_MODE, line_rast_mode) |
|
||||
SET_SGPR_FIELD(PS_STATE_RAST_PRIM, vgt_outprim_type);
|
||||
SET_SGPR_FIELD(PS_STATE_RAST_PRIM, vgt_outprim_type) |
|
||||
SET_SGPR_FIELD(PS_STATE_USE_FLOAT_FRAG_COORD_XY, use_float_frag_coord_xy);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(ps_state_offset, ps_state);
|
||||
|
|
@ -12830,7 +12863,7 @@ radv_validate_dynamic_states(struct radv_cmd_buffer *cmd_buffer, uint64_t dynami
|
|||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DB_SHADER_CONTROL;
|
||||
|
||||
if (dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FSR_STATE;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FSR_STATE | RADV_CMD_DIRTY_PS_STATE;
|
||||
|
||||
if (dynamic_states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RAST_SAMPLES_STATE;
|
||||
|
|
|
|||
|
|
@ -237,14 +237,16 @@ struct radv_llvm_compiler_options {
|
|||
#define NGG_STATE_QUERY__SHIFT 6
|
||||
#define NGG_STATE_QUERY__MASK 0x7
|
||||
|
||||
#define PS_STATE_NUM_SAMPLES__SHIFT 0
|
||||
#define PS_STATE_NUM_SAMPLES__MASK 0xf
|
||||
#define PS_STATE_LINE_RAST_MODE__SHIFT 4
|
||||
#define PS_STATE_LINE_RAST_MODE__MASK 0x3
|
||||
#define PS_STATE_PS_ITER_MASK__SHIFT 6
|
||||
#define PS_STATE_PS_ITER_MASK__MASK 0xffff
|
||||
#define PS_STATE_RAST_PRIM__SHIFT 22
|
||||
#define PS_STATE_RAST_PRIM__MASK 0x3
|
||||
#define PS_STATE_NUM_SAMPLES__SHIFT 0
|
||||
#define PS_STATE_NUM_SAMPLES__MASK 0xf
|
||||
#define PS_STATE_LINE_RAST_MODE__SHIFT 4
|
||||
#define PS_STATE_LINE_RAST_MODE__MASK 0x3
|
||||
#define PS_STATE_PS_ITER_MASK__SHIFT 6
|
||||
#define PS_STATE_PS_ITER_MASK__MASK 0xffff
|
||||
#define PS_STATE_RAST_PRIM__SHIFT 22
|
||||
#define PS_STATE_RAST_PRIM__MASK 0x3
|
||||
#define PS_STATE_USE_FLOAT_FRAG_COORD_XY__SHIFT 24
|
||||
#define PS_STATE_USE_FLOAT_FRAG_COORD_XY__MASK 0x1
|
||||
|
||||
struct radv_shader_layout {
|
||||
uint32_t num_sets;
|
||||
|
|
|
|||
|
|
@ -420,6 +420,9 @@ radv_ps_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_
|
|||
if (info->ps.load_rasterization_prim && gfx_state->unknown_rast_prim)
|
||||
return true;
|
||||
|
||||
if (info->ps.selects_frag_coord_xy_dynamically)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -312,6 +312,9 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
|
|||
case nir_intrinsic_begin_invocation_interlock:
|
||||
info->ps.pops = true;
|
||||
break;
|
||||
case nir_intrinsic_load_use_float_frag_coord_xy_amd:
|
||||
info->ps.selects_frag_coord_xy_dynamically = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -213,6 +213,7 @@ struct radv_shader_info {
|
|||
bool load_rasterization_prim : 1;
|
||||
bool force_sample_iter_shading_rate : 1;
|
||||
bool allow_flat_shading : 1;
|
||||
bool selects_frag_coord_xy_dynamically : 1;
|
||||
|
||||
bool has_epilog : 1;
|
||||
} ps;
|
||||
|
|
|
|||
|
|
@ -351,6 +351,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_fbfetch_image_fmask_desc_amd:
|
||||
case nir_intrinsic_load_fbfetch_image_desc_amd:
|
||||
case nir_intrinsic_load_polygon_stipple_buffer_amd:
|
||||
case nir_intrinsic_load_use_float_frag_coord_xy_amd:
|
||||
case nir_intrinsic_load_tcs_mem_attrib_stride:
|
||||
case nir_intrinsic_load_printf_buffer_address:
|
||||
case nir_intrinsic_load_printf_buffer_size:
|
||||
|
|
|
|||
|
|
@ -1143,6 +1143,10 @@ system_value("blend_const_color_aaaa8888_unorm", 1)
|
|||
# System value for internal compute shaders in radeonsi.
|
||||
system_value("user_data_amd", 8)
|
||||
|
||||
# Whether to use load_frag_coord_x() or load_pixel_coord() based on dynamic states.
|
||||
intrinsic("load_use_float_frag_coord_xy_amd", dest_comp=1, bit_sizes=[1],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Loads for gl_Color, for radeonsi which interpolates these in the shader
|
||||
# prolog to handle flatshading and front/back color selection without
|
||||
# recompiles and therefore doesn't handle them like normal varyings.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue