mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-21 01:38:23 +02:00
jay: cache message headers locally
this is a little spicier than CSE, but well, the stats speak for themselves. SIMD16: Totals from 1150 (43.45% of 2647) affected shaders: Instrs: 1752063 -> 1671121 (-4.62%); split: -4.62%, +0.00% CodeSize: 24366528 -> 23326992 (-4.27%); split: -4.28%, +0.01% SIMD32: Totals from 1152 (43.52% of 2647) affected shaders: Instrs: 2008124 -> 1922714 (-4.25%); split: -4.27%, +0.02% CodeSize: 28563184 -> 27442624 (-3.92%); split: -3.95%, +0.02% Number of spill instructions: 12562 -> 12600 (+0.30%); split: -0.02%, +0.32% Number of fill instructions: 31496 -> 31545 (+0.16%); split: -0.01%, +0.16% Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42097>
This commit is contained in:
parent
592022f989
commit
7dc69f747f
1 changed files with 43 additions and 17 deletions
|
|
@ -95,6 +95,10 @@ struct nir_to_jay_state {
|
|||
*/
|
||||
jay_def active_lane_mask, active_lane, active_lane_x4;
|
||||
|
||||
/* Likewise we cache a message header */
|
||||
jay_def msg_header[16];
|
||||
jay_def msg_header_unmoved[16];
|
||||
|
||||
/* These defs contain the extracted payload. They are only valid while
|
||||
* translating NIR->Jay since they aren't maintained by Jay passes.
|
||||
*/
|
||||
|
|
@ -135,6 +139,36 @@ emit_active_lane_mask(struct nir_to_jay_state *nj)
|
|||
return nj->active_lane_mask;
|
||||
}
|
||||
|
||||
static jay_def
|
||||
build_msg_header(struct nir_to_jay_state *nj, jay_def *desired)
|
||||
{
|
||||
jay_builder *b = &nj->bld;
|
||||
|
||||
/* Vectorized zeroing of the header when we first construct it */
|
||||
if (jay_is_null(nj->msg_header[0])) {
|
||||
jay_def zeroes = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader));
|
||||
jay_MOV(b, zeroes, 0);
|
||||
|
||||
jay_foreach_comp(zeroes, i) {
|
||||
nj->msg_header[i] = jay_extract(zeroes, i);
|
||||
nj->msg_header_unmoved[i] = jay_imm(0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Set all fields to what they should be */
|
||||
for (unsigned i = 0; i < jay_ugpr_per_grf(b->shader); ++i) {
|
||||
jay_def d = jay_is_null(desired[i]) ? jay_imm(0) : desired[i];
|
||||
|
||||
if (!jay_defs_equivalent(nj->msg_header_unmoved[i], d)) {
|
||||
nj->msg_header_unmoved[i] = desired[i];
|
||||
nj->msg_header[i] = jay_MOV_u32(b, desired[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Zip it all up into a vector of UGPRs which will RA to a single GRF */
|
||||
return jay_collect_vectors(b, nj->msg_header, jay_ugpr_per_grf(b->shader));
|
||||
}
|
||||
|
||||
static jay_def
|
||||
emit_active_lane(struct nir_to_jay_state *nj)
|
||||
{
|
||||
|
|
@ -2284,19 +2318,7 @@ jay_emit_texture(struct nir_to_jay_state *nj, nir_tex_instr *tex)
|
|||
}
|
||||
}
|
||||
|
||||
/* Vectorized zeroing of the header. TODO: This can be optimized more. */
|
||||
jay_def zeroes = jay_alloc_def(b, UGPR, jay_ugpr_per_grf(b->shader));
|
||||
jay_MOV(b, zeroes, 0);
|
||||
|
||||
jay_def ugprs[JAY_MAX_DEF_LENGTH];
|
||||
jay_foreach_comp(zeroes, i) {
|
||||
ugprs[i] = jay_extract(zeroes, i);
|
||||
}
|
||||
|
||||
/* Set the main immediate part of the header */
|
||||
if (header2 != 0) {
|
||||
ugprs[2] = jay_MOV_u32(b, header2);
|
||||
}
|
||||
jay_def header_builder[16] = { [2] = jay_imm(header2) };
|
||||
|
||||
if (sampler_bindless) {
|
||||
/* Bindless sampler handles aren't relative to the sampler state
|
||||
|
|
@ -2311,7 +2333,7 @@ jay_emit_texture(struct nir_to_jay_state *nj, nir_tex_instr *tex)
|
|||
* address space but means we can do something more efficient in the
|
||||
* shader.
|
||||
*/
|
||||
ugprs[3] = sampler;
|
||||
header_builder[3] = sampler;
|
||||
} else {
|
||||
/* Select the default dynamic state base address + offset */
|
||||
jay_def sampler_ptr = nj->payload.sampler_state_pointer;
|
||||
|
|
@ -2338,10 +2360,10 @@ jay_emit_texture(struct nir_to_jay_state *nj, nir_tex_instr *tex)
|
|||
}
|
||||
}
|
||||
|
||||
ugprs[3] = sampler_ptr;
|
||||
header_builder[3] = sampler_ptr;
|
||||
}
|
||||
/* Zip it all up into a vector of UGPRs which will RA to a single GRF */
|
||||
header = jay_collect_vectors(b, ugprs, jay_num_values(zeroes));
|
||||
|
||||
header = build_msg_header(nj, header_builder);
|
||||
}
|
||||
|
||||
assert(payload_type_bit_size == 16 || payload_type_bit_size == 32);
|
||||
|
|
@ -2627,6 +2649,10 @@ jay_emit_block(struct nir_to_jay_state *nj, nir_block *nb)
|
|||
{
|
||||
jay_builder *b = &nj->bld;
|
||||
|
||||
/* Reset per block state */
|
||||
memset(nj->msg_header, 0, sizeof(nj->msg_header));
|
||||
memset(nj->msg_header_unmoved, 0, sizeof(nj->msg_header_unmoved));
|
||||
|
||||
if (nj->after_block) {
|
||||
nj->current_block = nj->after_block;
|
||||
nj->after_block = NULL;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue