mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 22:20:09 +01:00
anv/xe3+: Set RegistersPerThread for bindless shader dispatch.
v2: Use MOV and wrap in conditional during BTD spawn header setup
(Lionel). Remove references to SIMD8 (Tapani).
v3: Update brw_bsr() to specify number of registers per thread, don't
initialize Registers Per Thread on BTD spawn header (Lionel).
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32664>
This commit is contained in:
parent
b25d0f899b
commit
dd1712515b
4 changed files with 45 additions and 7 deletions
|
|
@ -18,13 +18,15 @@
|
|||
|
||||
static uint64_t
|
||||
brw_bsr(const struct intel_device_info *devinfo,
|
||||
uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset)
|
||||
uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset,
|
||||
uint8_t grf_used)
|
||||
{
|
||||
assert(offset % 64 == 0);
|
||||
assert(simd_size == 8 || simd_size == 16);
|
||||
assert(local_arg_offset % 8 == 0);
|
||||
|
||||
return offset |
|
||||
return ((uint64_t)ptl_register_blocks(grf_used) << 60) |
|
||||
offset |
|
||||
SET_BITS(simd_size == 8, 4, 4) |
|
||||
SET_BITS(local_arg_offset / 8, 2, 0);
|
||||
}
|
||||
|
|
@ -69,7 +71,8 @@ compile_single_bs(const struct brw_compiler *compiler,
|
|||
nir_shader *shader,
|
||||
brw_generator *g,
|
||||
struct brw_compile_stats *stats,
|
||||
int *prog_offset)
|
||||
int *prog_offset,
|
||||
uint64_t *bsr)
|
||||
{
|
||||
const bool debug_enabled = brw_should_print_shader(shader, DEBUG_RT);
|
||||
|
||||
|
|
@ -147,7 +150,10 @@ compile_single_bs(const struct brw_compiler *compiler,
|
|||
else
|
||||
assert(offset == 0);
|
||||
|
||||
if (!prog_offset)
|
||||
if (bsr)
|
||||
*bsr = brw_bsr(compiler->devinfo, offset, dispatch_width, 0,
|
||||
selected->grf_used);
|
||||
else
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
selected->grf_used);
|
||||
|
||||
|
|
@ -185,7 +191,7 @@ brw_compile_bs(const struct brw_compiler *compiler,
|
|||
|
||||
prog_data->simd_size =
|
||||
compile_single_bs(compiler, params, params->key, prog_data,
|
||||
shader, &g, params->base.stats, NULL);
|
||||
shader, &g, params->base.stats, NULL, NULL);
|
||||
if (prog_data->simd_size == 0)
|
||||
return NULL;
|
||||
|
||||
|
|
@ -206,12 +212,12 @@ brw_compile_bs(const struct brw_compiler *compiler,
|
|||
int offset = 0;
|
||||
uint8_t simd_size =
|
||||
compile_single_bs(compiler, params, params->key,
|
||||
prog_data, resume_shaders[i], &g, NULL, &offset);
|
||||
prog_data, resume_shaders[i], &g, NULL, &offset,
|
||||
&resume_sbt[i]);
|
||||
if (simd_size == 0)
|
||||
return NULL;
|
||||
|
||||
assert(offset > 0);
|
||||
resume_sbt[i] = brw_bsr(compiler->devinfo, offset, simd_size, 0);
|
||||
}
|
||||
|
||||
/* We only have one constant data so we want to make sure they're all the
|
||||
|
|
|
|||
|
|
@ -2214,6 +2214,14 @@ lower_btd_logical_send(const brw_builder &bld, fs_inst *inst)
|
|||
global_addr.type = BRW_TYPE_UD;
|
||||
global_addr.stride = 1;
|
||||
ubld.group(2, 0).MOV(header, global_addr);
|
||||
|
||||
/* XXX - There is a Registers Per Thread field in the BTD spawn
|
||||
* header starting on Xe3, it doesn't appear to be needed
|
||||
* by the hardware so we don't set it. If it's ever
|
||||
* needed though we will need some sort of reloc since
|
||||
* we'll have to initialize it based on the prog_data
|
||||
* structure of the callee.
|
||||
*/
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,12 @@
|
|||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<genxml name="RT" gen="30">
|
||||
<import name="gen200_rt.xml" />
|
||||
<struct name="BINDLESS_SHADER_RECORD" length="2">
|
||||
<field name="Offset To Local Arguments" start="0" end="2" type="uint" />
|
||||
<field name="Bindless Shader Dispatch Mode" start="4" end="4" type="uint">
|
||||
<value name="RT_SIMD16" value="0" />
|
||||
</field>
|
||||
<field name="Kernel Start Pointer" start="6" end="31" type="offset" />
|
||||
<field name="Registers Per Thread" start="60" end="63" type="uint" />
|
||||
</struct>
|
||||
</genxml>
|
||||
|
|
|
|||
|
|
@ -276,6 +276,21 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline);
|
|||
void
|
||||
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
|
||||
|
||||
#if GFX_VERx10 >= 300
|
||||
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
|
||||
assert((local_arg_offset) % 8 == 0); \
|
||||
const struct brw_bs_prog_data *prog_data = \
|
||||
brw_bs_prog_data_const(bin->prog_data); \
|
||||
assert(prog_data->simd_size == 16); \
|
||||
\
|
||||
(struct GENX(BINDLESS_SHADER_RECORD)) { \
|
||||
.OffsetToLocalArguments = (local_arg_offset) / 8, \
|
||||
.BindlessShaderDispatchMode = RT_SIMD16, \
|
||||
.KernelStartPointer = bin->kernel.offset, \
|
||||
.RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used), \
|
||||
}; \
|
||||
})
|
||||
#else
|
||||
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
|
||||
assert((local_arg_offset) % 8 == 0); \
|
||||
const struct brw_bs_prog_data *prog_data = \
|
||||
|
|
@ -289,6 +304,7 @@ genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
|
|||
.KernelStartPointer = bin->kernel.offset, \
|
||||
}; \
|
||||
})
|
||||
#endif
|
||||
|
||||
void
|
||||
genX(batch_set_preemption)(struct anv_batch *batch,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue