radeonsi: inline shader_info in si_shader_info, keep only what's used

This reduces the si_shader_info size by 244 B.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
Marek Olšák 2024-12-02 23:59:04 -05:00 committed by Marge Bot
parent dc5e0e2b73
commit e478410466
7 changed files with 145 additions and 14 deletions

View file

@ -767,7 +767,7 @@ static void si_check_render_feedback(struct si_context *sctx)
si_check_render_feedback_images(sctx, &sctx->images[i],
u_bit_consecutive(0, info->base.num_images));
si_check_render_feedback_textures(sctx, &sctx->samplers[i],
info->base.textures_used[0]);
info->base.textures_used);
}
si_check_render_feedback_resident_images(sctx);

View file

@ -334,7 +334,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
const struct ac_shader_config *config = &shader->config;
unsigned rsrc2;
unsigned stage = shader->selector->info.base.stage;
unsigned stage = shader->selector->stage;
*prefetch = false;
@ -849,7 +849,7 @@ static bool si_check_needs_implicit_sync(struct si_context *sctx, uint32_t usage
*/
struct si_shader_info *info = &sctx->cs_shader_state.program->sel.info;
struct si_samplers *samplers = &sctx->samplers[PIPE_SHADER_COMPUTE];
unsigned mask = samplers->enabled_mask & info->base.textures_used[0];
unsigned mask = samplers->enabled_mask & info->base.textures_used;
while (mask) {
int i = u_bit_scan(&mask);

View file

@ -777,7 +777,7 @@ static void si_dump_descriptors(struct si_context *sctx, gl_shader_stage stage,
if (info) {
enabled_constbuf = u_bit_consecutive(0, info->base.num_ubos);
enabled_shaderbuf = u_bit_consecutive(0, info->base.num_ssbos);
enabled_samplers = info->base.textures_used[0];
enabled_samplers = info->base.textures_used;
enabled_images = u_bit_consecutive(0, info->base.num_images);
} else {
enabled_constbuf =

View file

@ -2442,7 +2442,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
unsigned num_sgprs = 8;
/* Image buffers are in desc[4..7]. */
if (BITSET_TEST(shader->info.base.image_buffers, i))
if (shader->info.base.image_buffers & BITFIELD_BIT(i))
num_sgprs = 4;
radeon_emit_array(&desc->list[desc_offset], num_sgprs);
@ -3003,7 +3003,7 @@ bool si_gfx_resources_check_encrypted(struct si_context *sctx)
si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[i]);
use_encrypted_bo |=
si_sampler_views_check_encrypted(sctx, &sctx->samplers[i],
current_shader->cso->info.base.textures_used[0]);
current_shader->cso->info.base.textures_used);
use_encrypted_bo |= si_image_views_check_encrypted(sctx, &sctx->images[i],
u_bit_consecutive(0, current_shader->cso->info.base.num_images));
}
@ -3088,7 +3088,7 @@ bool si_compute_resources_check_encrypted(struct si_context *sctx)
* or all writable buffers are encrypted.
*/
return si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[sh]) ||
si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used[0]) ||
si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used) ||
si_image_views_check_encrypted(sctx, &sctx->images[sh], u_bit_consecutive(0, info->base.num_images)) ||
si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);
}

View file

@ -467,7 +467,71 @@ struct si_vs_tcs_input_info {
};
struct si_shader_info {
shader_info base;
struct {
blake3_hash source_blake3;
bool use_aco_amd:1;
bool writes_memory:1;
enum gl_subgroup_size subgroup_size;
uint64_t outputs_read;
uint64_t outputs_written;
uint32_t patch_outputs_read;
uint32_t patch_outputs_written;
uint8_t num_ubos;
uint8_t num_ssbos;
uint8_t num_images;
uint32_t textures_used;
uint32_t image_buffers;
uint32_t msaa_images;
unsigned shared_size;
uint16_t workgroup_size[3];
bool workgroup_size_variable:1;
enum gl_derivative_group derivative_group:2;
uint8_t xfb_stride[MAX_XFB_BUFFERS];
uint8_t num_inlinable_uniforms:4;
union {
struct {
uint8_t blit_sgprs_amd:4;
bool window_space_position:1;
} vs;
struct {
enum tess_primitive_mode _primitive_mode;
enum gl_tess_spacing spacing;
uint8_t tcs_vertices_out;
bool ccw:1;
bool point_mode:1;
} tess;
struct {
enum mesa_prim output_primitive;
enum mesa_prim input_primitive;
uint16_t vertices_out;
uint8_t invocations;
uint8_t active_stream_mask:4;
} gs;
struct {
bool uses_discard:1;
bool uses_fbfetch_output:1;
bool needs_coarse_quad_helper_invocations:1;
bool uses_sample_shading:1;
bool early_fragment_tests:1;
bool post_depth_coverage:1;
bool pixel_center_integer:1;
enum gl_frag_depth_layout depth_layout:3;
} fs;
struct {
uint8_t user_data_components_amd:4;
} cs;
};
} base;
uint32_t options; /* bitmask of SI_PROFILE_* */

View file

@ -371,6 +371,7 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info
void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
struct si_shader_info *info, bool colors_lowered)
{
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
nir_divergence_analysis(nir);
#if AMD_LLVM_AVAILABLE
@ -400,7 +401,74 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
}
memset(info, 0, sizeof(*info));
info->base = nir->info;
memcpy(info->base.source_blake3, nir->info.source_blake3, sizeof(nir->info.source_blake3));
info->base.use_aco_amd = nir->info.use_aco_amd;
info->base.writes_memory = nir->info.writes_memory;
info->base.subgroup_size = nir->info.subgroup_size;
info->base.outputs_read = nir->info.outputs_read;
info->base.outputs_written = nir->info.outputs_written;
info->base.patch_outputs_read = nir->info.patch_outputs_read;
info->base.patch_outputs_written = nir->info.patch_outputs_written;
info->base.num_ubos = nir->info.num_ubos;
info->base.num_ssbos = nir->info.num_ssbos;
info->base.num_images = nir->info.num_images;
info->base.textures_used = nir->info.textures_used[0];
info->base.image_buffers = nir->info.image_buffers[0];
info->base.msaa_images = nir->info.msaa_images[0];
info->base.shared_size = nir->info.shared_size;
memcpy(info->base.workgroup_size, nir->info.workgroup_size, sizeof(nir->info.workgroup_size));
info->base.workgroup_size_variable = nir->info.workgroup_size_variable;
info->base.derivative_group = nir->info.derivative_group;
memcpy(info->base.xfb_stride, nir->info.xfb_stride, sizeof(nir->info.xfb_stride));
info->base.num_inlinable_uniforms = nir->info.num_inlinable_uniforms;
switch (nir->info.stage) {
case MESA_SHADER_VERTEX:
info->base.vs.blit_sgprs_amd = nir->info.vs.blit_sgprs_amd;
info->base.vs.window_space_position = nir->info.vs.window_space_position;
break;
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
info->base.tess._primitive_mode = nir->info.tess._primitive_mode;
info->base.tess.spacing = nir->info.tess.spacing;
info->base.tess.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
info->base.tess.ccw = nir->info.tess.ccw;
info->base.tess.point_mode = nir->info.tess.point_mode;
break;
case MESA_SHADER_GEOMETRY:
info->base.gs.output_primitive = nir->info.gs.output_primitive;
info->base.gs.input_primitive = nir->info.gs.input_primitive;
info->base.gs.vertices_out = nir->info.gs.vertices_out;
info->base.gs.invocations = nir->info.gs.invocations;
info->base.gs.active_stream_mask = nir->info.gs.active_stream_mask;
break;
case MESA_SHADER_FRAGMENT:
info->base.fs.uses_discard = nir->info.fs.uses_discard;
info->base.fs.uses_fbfetch_output = nir->info.fs.uses_fbfetch_output;
info->base.fs.needs_coarse_quad_helper_invocations = nir->info.fs.needs_coarse_quad_helper_invocations;
info->base.fs.uses_sample_shading = nir->info.fs.uses_sample_shading;
info->base.fs.early_fragment_tests = nir->info.fs.early_fragment_tests;
info->base.fs.post_depth_coverage = nir->info.fs.post_depth_coverage;
info->base.fs.pixel_center_integer = nir->info.fs.pixel_center_integer;
info->base.fs.depth_layout = nir->info.fs.depth_layout;
break;
case MESA_SHADER_COMPUTE:
case MESA_SHADER_KERNEL:
info->base.cs.user_data_components_amd = nir->info.cs.user_data_components_amd;
break;
default:
unreachable("unexpected shader stage");
}
/* Get options from shader profiles. */
for (unsigned i = 0; i < ARRAY_SIZE(si_shader_profiles); i++) {

View file

@ -3469,11 +3469,10 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
/* Compile the shader if it hasn't been loaded from the cache. */
if (!si_compile_shader(sscreen, *compiler, shader, debug)) {
fprintf(stderr,
"radeonsi: can't compile a main shader part (type: %s, name: %s).\n"
"radeonsi: can't compile a main shader part (type: %s).\n"
"This is probably a driver bug, please report "
"it to https://gitlab.freedesktop.org/mesa/mesa/-/issues.\n",
gl_shader_stage_name(shader->selector->stage),
shader->selector->info.base.name);
gl_shader_stage_name(shader->selector->stage));
FREE(shader);
return;
}
@ -3565,8 +3564,8 @@ void si_get_active_slot_masks(struct si_screen *sscreen, const struct si_shader_
num_constbufs = info->base.num_ubos;
/* two 8-byte images share one 16-byte slot */
num_images = align(info->base.num_images, 2);
num_msaa_images = align(BITSET_LAST_BIT(info->base.msaa_images), 2);
num_samplers = BITSET_LAST_BIT(info->base.textures_used);
num_msaa_images = align(util_last_bit(info->base.msaa_images), 2);
num_samplers = util_last_bit(info->base.textures_used);
/* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */
start = si_get_shaderbuf_slot(num_shaderbufs - 1);