iris: Fill out scratch base address dynamically

Now that shaders are shared between contexts, we can't pre-bake the
shader scratch address into the derived 3DSTATE_XS packets.  Scratch
buffers are and must be per-context, as multiple contexts could be
executing shaders using scratch at the same time.

So instead, we leave that field blank when pre-filling those packets
up-front, and merge in the actual address when emitting them.  It's
a little more overhead, but only in the case where scratch is used.

Fixes: 84a38ec133 ("iris: Enable PIPE_CAP_SHAREABLE_SHADERS.")
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8922>
This commit is contained in:
Kenneth Graunke 2021-02-03 01:41:42 -08:00 committed by Marge Bot
parent 564a9e18a7
commit 4256f7ed58
3 changed files with 43 additions and 43 deletions

View file

@ -124,6 +124,7 @@ iris_upload_shader(struct iris_context *ice,
struct hash_table *cache = ice->shaders.cache; struct hash_table *cache = ice->shaders.cache;
void *mem_ctx = ish ? NULL : (void *) cache; void *mem_ctx = ish ? NULL : (void *) cache;
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
const struct gen_device_info *devinfo = &screen->devinfo;
struct iris_compiled_shader *shader = struct iris_compiled_shader *shader =
rzalloc_size(mem_ctx, sizeof(struct iris_compiled_shader) + rzalloc_size(mem_ctx, sizeof(struct iris_compiled_shader) +
screen->vtbl.derived_program_state_size(cache_id)); screen->vtbl.derived_program_state_size(cache_id));
@ -170,7 +171,7 @@ iris_upload_shader(struct iris_context *ice,
ralloc_steal(shader, shader->system_values); ralloc_steal(shader, shader->system_values);
/* Store the 3DSTATE shader packets and other derived state. */ /* Store the 3DSTATE shader packets and other derived state. */
screen->vtbl.store_derived_program_state(ice, cache_id, shader); screen->vtbl.store_derived_program_state(devinfo, cache_id, shader);
if (ish) { if (ish) {
assert(key_size <= sizeof(union iris_any_prog_key)); assert(key_size <= sizeof(union iris_any_prog_key));

View file

@ -110,7 +110,7 @@ struct iris_vtable {
uint32_t report_id); uint32_t report_id);
unsigned (*derived_program_state_size)(enum iris_program_cache_id id); unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
void (*store_derived_program_state)(struct iris_context *ice, void (*store_derived_program_state)(const struct gen_device_info *devinfo,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader); struct iris_compiled_shader *shader);
uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol, uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,

View file

@ -4307,20 +4307,24 @@ KSP(const struct iris_compiled_shader *shader)
pkt.Enable = true; \ pkt.Enable = true; \
\ \
if (prog_data->total_scratch) { \ if (prog_data->total_scratch) { \
struct iris_bo *bo = \
iris_get_scratch_space(ice, prog_data->total_scratch, stage); \
uint32_t scratch_addr = bo->gtt_offset; \
pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \ pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \
pkt.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr, \
IRIS_DOMAIN_NONE); \
} }
#define MERGE_SCRATCH_ADDR(name) \
{ \
uint32_t pkt2[GENX(name##_length)] = {0}; \
_iris_pack_command(batch, GENX(name), pkt2, p) { \
p.ScratchSpaceBasePointer = rw_bo(scratch_bo, 0, IRIS_DOMAIN_NONE); \
} \
iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \
}
/** /**
* Encode most of 3DSTATE_VS based on the compiled shader. * Encode most of 3DSTATE_VS based on the compiled shader.
*/ */
static void static void
iris_store_vs_state(struct iris_context *ice, iris_store_vs_state(const struct gen_device_info *devinfo,
const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_stage_prog_data *prog_data = shader->prog_data;
@ -4339,8 +4343,7 @@ iris_store_vs_state(struct iris_context *ice,
* Encode most of 3DSTATE_HS based on the compiled shader. * Encode most of 3DSTATE_HS based on the compiled shader.
*/ */
static void static void
iris_store_tcs_state(struct iris_context *ice, iris_store_tcs_state(const struct gen_device_info *devinfo,
const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_stage_prog_data *prog_data = shader->prog_data;
@ -4384,8 +4387,7 @@ iris_store_tcs_state(struct iris_context *ice,
* Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader. * Encode 3DSTATE_TE and most of 3DSTATE_DS based on the compiled shader.
*/ */
static void static void
iris_store_tes_state(struct iris_context *ice, iris_store_tes_state(const struct gen_device_info *devinfo,
const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_stage_prog_data *prog_data = shader->prog_data;
@ -4422,8 +4424,7 @@ iris_store_tes_state(struct iris_context *ice,
* Encode most of 3DSTATE_GS based on the compiled shader. * Encode most of 3DSTATE_GS based on the compiled shader.
*/ */
static void static void
iris_store_gs_state(struct iris_context *ice, iris_store_gs_state(const struct gen_device_info *devinfo,
const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_stage_prog_data *prog_data = shader->prog_data;
@ -4470,8 +4471,7 @@ iris_store_gs_state(struct iris_context *ice,
* Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader. * Encode most of 3DSTATE_PS and 3DSTATE_PS_EXTRA based on the shader.
*/ */
static void static void
iris_store_fs_state(struct iris_context *ice, iris_store_fs_state(const struct gen_device_info *devinfo,
const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_stage_prog_data *prog_data = shader->prog_data;
@ -4504,15 +4504,8 @@ iris_store_fs_state(struct iris_context *ice,
ps.PositionXYOffsetSelect = ps.PositionXYOffsetSelect =
wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
if (prog_data->total_scratch) { if (prog_data->total_scratch)
struct iris_bo *bo =
iris_get_scratch_space(ice, prog_data->total_scratch,
MESA_SHADER_FRAGMENT);
uint32_t scratch_addr = bo->gtt_offset;
ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
ps.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr,
IRIS_DOMAIN_NONE);
}
} }
iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
@ -4538,8 +4531,7 @@ iris_store_fs_state(struct iris_context *ice,
* This must match the data written by the iris_store_xs_state() functions. * This must match the data written by the iris_store_xs_state() functions.
*/ */
static void static void
iris_store_cs_state(struct iris_context *ice, iris_store_cs_state(const struct gen_device_info *devinfo,
const struct gen_device_info *devinfo,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data; struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data;
@ -4596,31 +4588,28 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id)
* get most of the state packet without having to reconstruct it. * get most of the state packet without having to reconstruct it.
*/ */
static void static void
iris_store_derived_program_state(struct iris_context *ice, iris_store_derived_program_state(const struct gen_device_info *devinfo,
enum iris_program_cache_id cache_id, enum iris_program_cache_id cache_id,
struct iris_compiled_shader *shader) struct iris_compiled_shader *shader)
{ {
struct iris_screen *screen = (void *) ice->ctx.screen;
const struct gen_device_info *devinfo = &screen->devinfo;
switch (cache_id) { switch (cache_id) {
case IRIS_CACHE_VS: case IRIS_CACHE_VS:
iris_store_vs_state(ice, devinfo, shader); iris_store_vs_state(devinfo, shader);
break; break;
case IRIS_CACHE_TCS: case IRIS_CACHE_TCS:
iris_store_tcs_state(ice, devinfo, shader); iris_store_tcs_state(devinfo, shader);
break; break;
case IRIS_CACHE_TES: case IRIS_CACHE_TES:
iris_store_tes_state(ice, devinfo, shader); iris_store_tes_state(devinfo, shader);
break; break;
case IRIS_CACHE_GS: case IRIS_CACHE_GS:
iris_store_gs_state(ice, devinfo, shader); iris_store_gs_state(devinfo, shader);
break; break;
case IRIS_CACHE_FS: case IRIS_CACHE_FS:
iris_store_fs_state(ice, devinfo, shader); iris_store_fs_state(devinfo, shader);
break; break;
case IRIS_CACHE_CS: case IRIS_CACHE_CS:
iris_store_cs_state(ice, devinfo, shader); iris_store_cs_state(devinfo, shader);
case IRIS_CACHE_BLORP: case IRIS_CACHE_BLORP:
break; break;
default: default:
@ -5855,18 +5844,15 @@ iris_upload_dirty_render_state(struct iris_context *ice,
struct iris_resource *cache = (void *) shader->assembly.res; struct iris_resource *cache = (void *) shader->assembly.res;
iris_use_pinned_bo(batch, cache->bo, false, IRIS_DOMAIN_NONE); iris_use_pinned_bo(batch, cache->bo, false, IRIS_DOMAIN_NONE);
if (prog_data->total_scratch > 0) { struct iris_bo *scratch_bo = prog_data->total_scratch == 0 ? NULL :
struct iris_bo *bo =
iris_get_scratch_space(ice, prog_data->total_scratch, stage); iris_get_scratch_space(ice, prog_data->total_scratch, stage);
iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE);
}
if (stage == MESA_SHADER_FRAGMENT) { if (stage == MESA_SHADER_FRAGMENT) {
UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast; UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast;
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) {
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
@ -5898,6 +5884,11 @@ iris_upload_dirty_render_state(struct iris_context *ice,
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
ps.KernelStartPointer2 = KSP(shader) + ps.KernelStartPointer2 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
if (scratch_bo) {
ps.ScratchSpaceBasePointer =
rw_bo(scratch_bo, 0, IRIS_DOMAIN_NONE);
}
} }
uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0};
@ -5924,6 +5915,14 @@ iris_upload_dirty_render_state(struct iris_context *ice,
GENX(3DSTATE_PS_length)); GENX(3DSTATE_PS_length));
iris_emit_merge(batch, shader_psx, psx_state, iris_emit_merge(batch, shader_psx, psx_state,
GENX(3DSTATE_PS_EXTRA_length)); GENX(3DSTATE_PS_EXTRA_length));
} else if (scratch_bo) {
uint32_t *pkt = (uint32_t *) shader->derived_data;
switch (stage) {
case MESA_SHADER_VERTEX: MERGE_SCRATCH_ADDR(3DSTATE_VS); break;
case MESA_SHADER_TESS_CTRL: MERGE_SCRATCH_ADDR(3DSTATE_HS); break;
case MESA_SHADER_TESS_EVAL: MERGE_SCRATCH_ADDR(3DSTATE_DS); break;
case MESA_SHADER_GEOMETRY: MERGE_SCRATCH_ADDR(3DSTATE_GS); break;
}
} else { } else {
iris_batch_emit(batch, shader->derived_data, iris_batch_emit(batch, shader->derived_data,
iris_derived_program_state_size(stage)); iris_derived_program_state_size(stage));