diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index ff139800c98..b4824de264d 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -57,9 +57,8 @@ anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer) state->compute.z_pass_async_compute_thread_limit = UINT8_MAX; state->compute.np_z_async_throttle_settings = UINT8_MAX; - memcpy(state->gfx.dyn_state.dirty, - cmd_buffer->device->gfx_dirty_state, - sizeof(state->gfx.dyn_state.dirty)); + BITSET_COPY(state->gfx.dyn_state.pack_dirty, + cmd_buffer->device->gfx_dirty_state); } static void @@ -516,25 +515,25 @@ anv_cmd_buffer_flush_pipeline_hw_state(struct anv_cmd_buffer *cmd_buffer, assert(old_pipeline == NULL || \ old_pipeline->name.len == new_pipeline->name.len); \ /* Don't bother memcmp if the state is already dirty */ \ - if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \ + if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \ (old_pipeline == NULL || \ memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \ &new_pipeline->batch_data[new_pipeline->name.offset], \ 4 * new_pipeline->name.len) != 0)) \ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \ + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \ } while (0) #define diff_var_state(bit, name) \ do { \ /* Don't bother memcmp if the state is already dirty */ \ /* Also if the new state is empty, avoid marking dirty */ \ - if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \ + if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \ new_pipeline->name.len != 0 && \ (old_pipeline == NULL || \ old_pipeline->name.len != new_pipeline->name.len || \ memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \ &new_pipeline->batch_data[new_pipeline->name.offset], \ 4 * new_pipeline->name.len) != 0)) \ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \ + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \ } while (0) #define assert_identical(bit, name) \ do { \ diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 3c34ee41bdb..7e71bed2a8b 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -131,7 +131,7 @@ genX(cmd_buffer_ensure_wa_14018283232)(struct anv_cmd_buffer *cmd_buffer, if (intel_needs_workaround(cmd_buffer->device->info, 14018283232) && hw_state->wa_14018283232_toggle != toggle) { hw_state->wa_14018283232_toggle = toggle; - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WA_14018283232); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WA_14018283232); genX(batch_emit_wa_14018283232)(&cmd_buffer->batch); } } @@ -164,7 +164,7 @@ genX(cmd_buffer_set_coarse_pixel_active)(struct anv_cmd_buffer *cmd_buffer, if (intel_needs_workaround(cmd_buffer->device->info, 18038825448) && gfx->dyn_state.coarse_state != state) { gfx->dyn_state.coarse_state = state; - BITSET_SET(gfx->dyn_state.dirty, ANV_GFX_STATE_COARSE_STATE); + BITSET_SET(gfx->dyn_state.emit_dirty, ANV_GFX_STATE_PS_EXTRA); return true; } return false; diff --git a/src/intel/vulkan/anv_measure.c b/src/intel/vulkan/anv_measure.c index 048def6459b..3e16de39dae 100644 --- a/src/intel/vulkan/anv_measure.c +++ b/src/intel/vulkan/anv_measure.c @@ -154,16 +154,13 @@ anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer, snapshot->renderpass = (type == INTEL_SNAPSHOT_COMPUTE) ? 0 : measure->base.renderpass; - if (type == INTEL_SNAPSHOT_COMPUTE && cmd_buffer->state.compute.base.pipeline) { - const struct anv_compute_pipeline *pipeline = - anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline); - snapshot->cs = pipeline->cs->prog_data->source_hash; - } else if (type == INTEL_SNAPSHOT_DRAW && cmd_buffer->state.gfx.base.pipeline) { - const struct anv_graphics_pipeline *pipeline = - anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline); -#define SHADER_SOURCE_HASH(_name) \ - (pipeline->base.shaders[MESA_SHADER_##VERTEX] ? \ - pipeline->base.shaders[MESA_SHADER_##VERTEX]->prog_data->source_hash : 0) + if (type == INTEL_SNAPSHOT_COMPUTE && cmd_buffer->state.compute.shader) { + snapshot->cs = cmd_buffer->state.compute.shader->prog_data->source_hash; + } else if (type == INTEL_SNAPSHOT_DRAW) { + const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; +#define SHADER_SOURCE_HASH(_name) \ + (gfx->shaders[MESA_SHADER_##_name] ? \ + gfx->shaders[MESA_SHADER_##_name]->prog_data->source_hash : 0) snapshot->vs = SHADER_SOURCE_HASH(VERTEX); snapshot->tcs = SHADER_SOURCE_HASH(TESS_CTRL); snapshot->tes = SHADER_SOURCE_HASH(TESS_EVAL); @@ -220,17 +217,12 @@ state_changed(struct anv_cmd_buffer *cmd_buffer, return false; if (type == INTEL_SNAPSHOT_COMPUTE) { - const struct anv_compute_pipeline *cs_pipe = - anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline); - assert(cs_pipe); - cs = cs_pipe->cs->prog_data->source_hash; + cs = cmd_buffer->state.compute.shader->prog_data->source_hash; } else if (type == INTEL_SNAPSHOT_DRAW) { - const struct anv_graphics_pipeline *gfx = - anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline); - assert(gfx); + const struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; #define SHADER_SOURCE_HASH(_name) \ - (gfx->base.shaders[MESA_SHADER_##VERTEX] ? \ - gfx->base.shaders[MESA_SHADER_##VERTEX]->prog_data->source_hash : 0) + (gfx->shaders[MESA_SHADER_##_name] ? \ + gfx->shaders[MESA_SHADER_##_name]->prog_data->source_hash : 0) vs = SHADER_SOURCE_HASH(VERTEX); tcs = SHADER_SOURCE_HASH(TESS_CTRL); tes = SHADER_SOURCE_HASH(TESS_EVAL); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 49b7fc8bf45..2e9229049a8 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1534,11 +1534,9 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_TASK_SHADER, ANV_GFX_STATE_TASK_REDISTRIB, /* Dynamic states */ - ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */ - ANV_GFX_STATE_BLEND_STATE_PTR, /* The pointer to the dynamic state */ + ANV_GFX_STATE_BLEND_STATE, ANV_GFX_STATE_CLIP, ANV_GFX_STATE_CC_STATE, - ANV_GFX_STATE_CC_STATE_PTR, ANV_GFX_STATE_COARSE_PIXEL, ANV_GFX_STATE_CPS, ANV_GFX_STATE_DEPTH_BOUNDS, @@ -1558,7 +1556,6 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_VF_TOPOLOGY, ANV_GFX_STATE_VFG, ANV_GFX_STATE_VIEWPORT_CC, - ANV_GFX_STATE_VIEWPORT_CC_PTR, ANV_GFX_STATE_VIEWPORT_SF_CLIP, ANV_GFX_STATE_WM, ANV_GFX_STATE_WM_DEPTH_STENCIL, @@ -1569,7 +1566,6 @@ enum anv_gfx_state_bits { ANV_GFX_STATE_TBIMR_TILE_PASS_INFO, ANV_GFX_STATE_FS_MSAA_FLAGS, ANV_GFX_STATE_TCS_INPUT_VERTICES, - ANV_GFX_STATE_COARSE_STATE, ANV_GFX_STATE_MESH_PROVOKING_VERTEX, ANV_GFX_STATE_MAX, @@ -1995,7 +1991,64 @@ struct anv_gfx_dynamic_state { */ enum anv_coarse_pixel_state coarse_state; - BITSET_DECLARE(dirty, ANV_GFX_STATE_MAX); + /** Dirty bits of what needs to be repacked */ + BITSET_DECLARE(pack_dirty, ANV_GFX_STATE_MAX); + + struct { + uint32_t vf[2]; + uint32_t vft[2]; + uint32_t vfs[1]; + uint32_t vfg[4]; + uint32_t vf_sgvs[2]; + uint32_t vf_sgvs_2[3]; + uint32_t vf_sgvs_instancing[6]; + uint32_t vf_sgvs_instancing_len; + uint32_t vf_component_packing[5]; + uint32_t ib[5]; + uint32_t so[5]; + uint32_t so_decl_list[3 + 2 * 128]; + uint32_t so_decl_list_len; + uint32_t clip[4]; + uint32_t clip_mesh[2]; + uint32_t sf_clip[2]; + uint32_t cc_viewport[2]; + uint32_t scissor[2]; + uint32_t mesh_control[3]; + uint32_t task_control[3]; + uint32_t mesh_shader[8]; + uint32_t task_shader[7]; + uint32_t mesh_distrib[2]; + uint32_t task_redistrib[2]; + uint32_t vs[9]; + uint32_t hs[9]; + uint32_t te[5]; + uint32_t ds[11]; + uint32_t gs[10]; + uint32_t sf[4]; + uint32_t ms[2]; + uint32_t sm[2]; + uint32_t sp[9]; + uint32_t raster[5]; + uint32_t cps[9]; + uint32_t ls[3]; + uint32_t db[4]; + uint32_t wm_ds[4]; + uint32_t wm[2]; + uint32_t pr[6]; + uint32_t sbe[6]; + uint32_t sbe_swiz[11]; + uint32_t sbe_mesh[2]; + uint32_t ps[12]; + uint32_t ps_extra[2]; + uint32_t ps_extra_dep[2]; + uint32_t ps_blend[2]; + uint32_t blend_state[2]; + uint32_t cc_state[2]; + uint32_t tbimr[4]; + } packed; + + /** Dirty bits of what needs to be reemitted */ + BITSET_DECLARE(emit_dirty, ANV_GFX_STATE_MAX); }; enum anv_internal_kernel_name { @@ -2768,28 +2821,6 @@ _anv_combine_address(struct anv_batch *batch, void *location, _dst = NULL; \ })) -#define anv_batch_emit_merge_protected(batch, cmd, pipeline, state, \ - name, protected) \ - for (struct cmd name = { 0 }, \ - *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ - __builtin_expect(_dst != NULL, 1); \ - ({ struct anv_gfx_state_ptr *_cmd_state = protected ? \ - &(pipeline)->state##_protected : \ - &(pipeline)->state; \ - uint32_t _partial[__anv_cmd_length(cmd)]; \ - assert(_cmd_state->len == __anv_cmd_length(cmd)); \ - __anv_cmd_pack(cmd)(batch, _partial, &name); \ - for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \ - assert((_partial[i] & \ - (pipeline)->batch_data[ \ - (pipeline)->state.offset + i]) == 0); \ - ((uint32_t *)_dst)[i] = _partial[i] | \ - (pipeline)->batch_data[_cmd_state->offset + i]; \ - } \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ - _dst = NULL; \ - })) - #define anv_batch_emit(batch, cmd, name) \ for (struct cmd name = { __anv_cmd_header(cmd) }, \ *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index 4483a3a6b79..c75805f0010 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -159,10 +159,8 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state) NAME(TASK_CONTROL); NAME(TASK_SHADER); NAME(TASK_REDISTRIB); - NAME(BLEND_STATE_PTR); NAME(CLIP); NAME(CC_STATE); - NAME(CC_STATE_PTR); NAME(CPS); NAME(DEPTH_BOUNDS); NAME(INDEX_BUFFER); @@ -180,7 +178,6 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state) NAME(VF_TOPOLOGY); NAME(VFG); NAME(VIEWPORT_CC); - NAME(VIEWPORT_CC_PTR); NAME(VIEWPORT_SF_CLIP); NAME(WM); NAME(WM_DEPTH_STENCIL); @@ -190,7 +187,6 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state) NAME(TBIMR_TILE_PASS_INFO); NAME(FS_MSAA_FLAGS); NAME(TCS_INPUT_VERTICES); - NAME(COARSE_STATE); NAME(MESH_PROVOKING_VERTEX); default: UNREACHABLE("invalid state"); } diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index d10100fe956..4a75f77b621 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -328,7 +328,7 @@ blorp_exec_on_render(struct blorp_batch *batch, * will trigger a PIPE_CONTROL too. */ hw_state->ds_write_state = blorp_ds_state; - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WA_18019816803); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WA_18019816803); /* Add the stall that will flush prior to the blorp operation by * genX(cmd_buffer_apply_pipe_flushes) @@ -388,45 +388,45 @@ blorp_exec_on_render(struct blorp_batch *batch, #endif /* Flag all the instructions emitted by BLORP. */ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_URB); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_URB); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_STATISTICS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_TOPOLOGY); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VERTEX_INPUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS); #if GFX_VER >= 11 - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS_2); #endif #if GFX_VER >= 12 - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); #endif - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_CC); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_RASTER); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_CLIP); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SAMPLE_MASK); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MULTISAMPLE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SBE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SBE_SWIZ); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_DEPTH_BOUNDS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WM); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_HS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_DS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_GS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS_EXTRA); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_BLEND_STATE); if (batch->blorp->config.use_mesh_shading) { - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MESH_CONTROL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TASK_CONTROL); } if (params->wm_prog_data) { - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_BLEND); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_CC_STATE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS_BLEND); } anv_cmd_dirty_mask_t dirty = ~(ANV_CMD_DIRTY_INDEX_BUFFER | diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7d2f979ed14..83331827fd7 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3688,9 +3688,9 @@ genX(CmdExecuteCommands)( memset(&container->state.gfx.urb_cfg, 0, sizeof(struct intel_urb_config)); /* Reemit all GFX instructions in container */ - memcpy(container->state.gfx.dyn_state.dirty, - device->gfx_dirty_state, - sizeof(container->state.gfx.dyn_state.dirty)); + BITSET_OR(container->state.gfx.dyn_state.emit_dirty, + container->state.gfx.dyn_state.emit_dirty, + device->gfx_dirty_state); if (container->device->vk.enabled_extensions.KHR_fragment_shading_rate) { /* Also recompute the CPS_STATE offset */ struct vk_dynamic_graphics_state *dyn = diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index 28010ce9254..aedcc1a3618 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -874,7 +874,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_flush_gfx_runtime_state)(cmd_buffer); /* Flush the HW state into the commmand buffer */ - if (!BITSET_IS_EMPTY(cmd_buffer->state.gfx.dyn_state.dirty)) + if (!BITSET_IS_EMPTY(cmd_buffer->state.gfx.dyn_state.emit_dirty)) genX(cmd_buffer_flush_gfx_hw_state)(cmd_buffer); /* If the pipeline changed, we may need to re-allocate push constant space diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index dd94c922205..ba238e39f60 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -38,30 +38,16 @@ #include "genX_mi_builder.h" -#define anv_gfx_emit(_gfx, _bit, _field, cmd, name) \ - for (struct cmd name = { __anv_cmd_header(cmd) }, *_dst = &name; \ - __builtin_expect(_dst != NULL, 1); \ - ({ assert(ARRAY_SIZE((_gfx)->_field) >= __anv_cmd_length(cmd)); \ - uint32_t __packed[__anv_cmd_length(cmd)]; \ - __anv_cmd_pack(cmd)(NULL, __packed, &name); \ - if (memcmp(__packed, (_gfx)->_field, \ - 4 * __anv_cmd_length(cmd))) { \ - memcpy((_gfx)->_field, __packed, \ - 4 * __anv_cmd_length(cmd)); \ - BITSET_SET((_gfx)->dirty, ANV_GFX_STATE_##_bit); \ - } \ - _dst = NULL; \ - })) - -#define anv_batch_emit_gfx_state(batch, _packed, cmd) \ - ({ \ - assert((_packed)[0] != 0); \ - void *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ - if (_dst != NULL) { \ - memcpy(_dst, _packed, 4 * __anv_cmd_length(cmd)); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \ - } \ - }) +#define anv_gfx_pack(field, cmd, name) \ + for (struct cmd name = { __anv_cmd_header(cmd) }, \ + *_dst = (struct cmd *)hw_state->packed.field; \ + __builtin_expect(_dst != NULL, 1); \ + ({ \ + assert(sizeof(hw_state->packed.field) >= \ + 4 * __anv_cmd_length(cmd)); \ + __anv_cmd_pack(cmd)(NULL, _dst, &name); \ + _dst = NULL; \ + })) static const uint32_t vk_to_intel_blend[] = { [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, @@ -751,7 +737,8 @@ calculate_tile_dimensions(const struct anv_device *device, __typeof(hw_state->field) __v = value; \ if (hw_state->field != __v) { \ hw_state->field = __v; \ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \ + BITSET_SET(hw_state->pack_dirty, \ + ANV_GFX_STATE_##bit); \ } \ } while (0) #define SET_STAGE(bit, field, value, stage) \ @@ -763,7 +750,8 @@ calculate_tile_dimensions(const struct anv_device *device, } \ if (hw_state->field != __v) { \ hw_state->field = __v; \ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \ + BITSET_SET(hw_state->pack_dirty, \ + ANV_GFX_STATE_##bit); \ } \ } while (0) #define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \ @@ -2009,7 +1997,7 @@ update_viewports(struct anv_gfx_dynamic_state *hw_state, do { \ if (hw_state->state.field != sfv.field) { \ hw_state->state.field = sfv.field; \ - BITSET_SET(hw_state->dirty, \ + BITSET_SET(hw_state->pack_dirty, \ ANV_GFX_STATE_##bit); \ } \ } while (0) @@ -2056,15 +2044,15 @@ update_viewports(struct anv_gfx_dynamic_state *hw_state, * number of viewport programmed previously was larger than what we need * now, no need to reemit we can just keep the old programmed values. */ - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) || + if (BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) || hw_state->vp_sf_clip.count < dyn->vp.viewport_count) { hw_state->vp_sf_clip.count = dyn->vp.viewport_count; - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) || + if (BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_VIEWPORT_CC) || hw_state->vp_cc.count < dyn->vp.viewport_count) { hw_state->vp_cc.count = dyn->vp.viewport_count; - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC); + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_VIEWPORT_CC); } } @@ -2130,10 +2118,10 @@ update_scissors(struct anv_gfx_dynamic_state *hw_state, * programmed previously was larger than what we need now, no need to * reemit we can just keep the old programmed values. */ - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR) || + if (BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_SCISSOR) || hw_state->scissor.count < dyn->vp.scissor_count) { hw_state->scissor.count = dyn->vp.scissor_count; - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR); + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_SCISSOR); } } @@ -2233,7 +2221,7 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, update_urb_config(hw_state, gfx, device); if ((gfx->dirty & ANV_CMD_DIRTY_PS) || - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) { + BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) { update_ps(hw_state, device, dyn, gfx); update_ps_extra_wm(hw_state, gfx); } @@ -2272,7 +2260,7 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_VERTEX_INPUT); #if GFX_VER >= 11 if (device->vk.enabled_extensions.KHR_fragment_shading_rate && @@ -2368,7 +2356,7 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, if ((gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER) || (gfx->dirty & ANV_CMD_DIRTY_INDEX_TYPE)) - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER); + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_INDEX_BUFFER); #if GFX_VERx10 >= 125 if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS) @@ -2381,7 +2369,7 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, if (device->vk.enabled_extensions.EXT_sample_locations && (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE))) - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN); + BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_SAMPLE_PATTERN); if ((gfx->dirty & ANV_CMD_DIRTY_PS) || (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) || @@ -2465,6 +2453,779 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state, #undef SET_STAGE #undef SETUP_PROVOKING_VERTEX +static void +cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state, + struct anv_cmd_buffer *cmd_buffer, + const struct anv_cmd_graphics_state *gfx, + const struct anv_graphics_pipeline *pipeline) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_instance *instance = device->physical->instance; + +#define INIT(category, name) \ + .name = hw_state->category.name +#define SET(s, category, name) \ + s.name = hw_state->category.name +#define SET_ARRAY(s, category, name) \ + do { \ + assert(sizeof(s.name) == \ + sizeof(hw_state->category.name)); \ + memcpy(&s.name, \ + &hw_state->category.name, \ + sizeof(s.name)); \ + } while (0) +#define IS_DIRTY(name) BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##name) + +#define anv_gfx_copy(field, cmd, source) ({ \ + assert(sizeof(hw_state->packed.field) >= \ + 4 * __anv_cmd_length(cmd)); \ + assert((source).len == __anv_cmd_length(cmd)); \ + memcpy(&hw_state->packed.field, \ + &pipeline->batch_data[(source).offset], \ + 4 * __anv_cmd_length(cmd)); \ + }) +#define anv_gfx_copy_variable(field, source) ({ \ + assert(sizeof(hw_state->packed.field) >= \ + 4 * (source).len); \ + memcpy(&hw_state->packed.field, \ + &pipeline->batch_data[(source).offset], \ + 4 * (source).len); \ + hw_state->packed.field##_len = (source).len; \ + }) +#define anv_gfx_copy_protected(field, cmd, source) ({ \ + const bool __protected = (cmd_buffer->vk.pool->flags & \ + VK_COMMAND_POOL_CREATE_PROTECTED_BIT); \ + assert(sizeof(hw_state->packed.field) >= \ + 4 * __anv_cmd_length(cmd)); \ + assert((source).len == __anv_cmd_length(cmd)); \ + memcpy(&hw_state->packed.field, \ + &pipeline->batch_data[ \ + __protected ? \ + (source##_protected).offset : \ + (source).offset], \ + 4 * __anv_cmd_length(cmd)); \ + }) +#define anv_gfx_pack_merge(field, cmd, prepacked, name) \ + for (struct cmd name = { 0 }, \ + *_dst = (struct cmd *)hw_state->packed.field; \ + __builtin_expect(_dst != NULL, 1); \ + ({ const struct anv_gfx_state_ptr *_cmd_state = &prepacked; \ + uint32_t _partial[__anv_cmd_length(cmd)]; \ + assert(_cmd_state->len == __anv_cmd_length(cmd)); \ + assert(sizeof(hw_state->packed.field) >= \ + 4 * __anv_cmd_length(cmd)); \ + __anv_cmd_pack(cmd)(NULL, _partial, &name); \ + for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \ + assert((_partial[i] & \ + (pipeline)->batch_data[ \ + (prepacked).offset + i]) == 0); \ + ((uint32_t *)_dst)[i] = _partial[i] | \ + (pipeline)->batch_data[_cmd_state->offset + i]; \ + } \ + _dst = NULL; \ + })) +#define anv_gfx_pack_merge_protected(field, cmd, prepacked, name) \ + for (struct cmd name = { 0 }, \ + *_dst = (struct cmd *)hw_state->packed.field; \ + __builtin_expect(_dst != NULL, 1); \ + ({ const struct anv_gfx_state_ptr *_cmd_state = \ + (cmd_buffer->vk.pool->flags & \ + VK_COMMAND_POOL_CREATE_PROTECTED_BIT) ? \ + &prepacked##_protected : &prepacked; \ + uint32_t _partial[__anv_cmd_length(cmd)]; \ + assert(_cmd_state->len == __anv_cmd_length(cmd)); \ + assert(sizeof(hw_state->packed.field) >= \ + 4 * __anv_cmd_length(cmd)); \ + __anv_cmd_pack(cmd)(NULL, _partial, &name); \ + for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \ + assert((_partial[i] & \ + (pipeline)->batch_data[ \ + (prepacked).offset + i]) == 0); \ + ((uint32_t *)_dst)[i] = _partial[i] | \ + (pipeline)->batch_data[_cmd_state->offset + i]; \ + } \ + _dst = NULL; \ + })) + + + if (IS_DIRTY(VF)) { + anv_gfx_pack(vf, GENX(3DSTATE_VF), vf) { +#if GFX_VERx10 >= 125 + vf.GeometryDistributionEnable = instance->enable_vf_distribution; +#endif + vf.ComponentPackingEnable = instance->vf_component_packing; + SET(vf, vf, IndexedDrawCutIndexEnable); + SET(vf, vf, CutIndex); + } + } + + if (IS_DIRTY(VF_TOPOLOGY)) { + anv_gfx_pack(vft, GENX(3DSTATE_VF_TOPOLOGY), vft) { + SET(vft, vft, PrimitiveTopologyType); + } + } + + if (IS_DIRTY(VF_STATISTICS)) { + anv_gfx_pack(vfs, GENX(3DSTATE_VF_STATISTICS), vfs) { + vfs.StatisticsEnable = true; + } + } + +#if GFX_VERx10 >= 125 + if (IS_DIRTY(VFG)) { + anv_gfx_pack(vfg, GENX(3DSTATE_VFG), vfg) { + /* 192 vertices for TRILIST_ADJ */ + vfg.ListNBatchSizeScale = 0; + /* Batch size of 384 vertices */ + vfg.List3BatchSizeScale = 2; + /* Batch size of 128 vertices */ + vfg.List2BatchSizeScale = 1; + /* Batch size of 128 vertices */ + vfg.List1BatchSizeScale = 2; + /* Batch size of 256 vertices for STRIP topologies */ + vfg.StripBatchSizeScale = 3; + /* 192 control points for PATCHLIST_3 */ + vfg.PatchBatchSizeScale = 1; + /* 192 control points for PATCHLIST_3 */ + vfg.PatchBatchSizeMultiplier = 31; + + SET(vfg, vfg, DistributionGranularity); + SET(vfg, vfg, DistributionMode); + SET(vfg, vfg, GranularityThresholdDisable); + SET(vfg, vfg, ListCutIndexEnable); + } + } +#endif + + if (IS_DIRTY(VF_SGVS)) + anv_gfx_copy(vf_sgvs, GENX(3DSTATE_VF_SGVS), pipeline->final.vf_sgvs); + +#if GFX_VER >= 11 + if (IS_DIRTY(VF_SGVS_2)) + anv_gfx_copy(vf_sgvs_2, GENX(3DSTATE_VF_SGVS_2), pipeline->final.vf_sgvs_2); +#endif + + if (IS_DIRTY(VF_SGVS_INSTANCING)) + anv_gfx_copy_variable(vf_sgvs_instancing, pipeline->final.vf_sgvs_instancing); + + if (instance->vf_component_packing && IS_DIRTY(VF_COMPONENT_PACKING)) { + anv_gfx_copy(vf_component_packing, GENX(3DSTATE_VF_COMPONENT_PACKING), + pipeline->final.vf_component_packing); + } + + if (IS_DIRTY(INDEX_BUFFER)) { + anv_gfx_pack(ib, GENX(3DSTATE_INDEX_BUFFER), ib) { + ib.IndexFormat = vk_to_intel_index_type(gfx->index_type); + ib.MOCS = gfx->index_addr == 0 ? + anv_mocs(device, NULL, ISL_SURF_USAGE_INDEX_BUFFER_BIT) : + gfx->index_mocs; +#if GFX_VER >= 12 + ib.L3BypassDisable = true; +#endif + ib.BufferStartingAddress = anv_address_from_u64(gfx->index_addr); + ib.BufferSize = gfx->index_size; + } + } + + if (IS_DIRTY(STREAMOUT)) { + anv_gfx_pack_merge(so, GENX(3DSTATE_STREAMOUT), + pipeline->partial.so, so) { + SET(so, so, RenderingDisable); + SET(so, so, RenderStreamSelect); + SET(so, so, ReorderMode); + SET(so, so, ForceRendering); + } + } + + if (IS_DIRTY(SO_DECL_LIST)) + anv_gfx_copy_variable(so_decl_list, pipeline->final.so_decl_list); + + if (IS_DIRTY(CLIP)) { + anv_gfx_pack(clip, GENX(3DSTATE_CLIP), clip) { + clip.ClipEnable = true; + clip.StatisticsEnable = true; + clip.EarlyCullEnable = true; + clip.GuardbandClipTestEnable = true; + + clip.VertexSubPixelPrecisionSelect = _8Bit; + clip.ClipMode = CLIPMODE_NORMAL; + + clip.MinimumPointWidth = 0.125; + clip.MaximumPointWidth = 255.875; + + SET(clip, clip, APIMode); + SET(clip, clip, ViewportXYClipTestEnable); + SET(clip, clip, TriangleStripListProvokingVertexSelect); + SET(clip, clip, LineStripListProvokingVertexSelect); + SET(clip, clip, TriangleFanProvokingVertexSelect); +#if GFX_VERx10 >= 200 + SET(clip, clip, TriangleStripOddProvokingVertexSelect); +#endif + SET(clip, clip, MaximumVPIndex); + SET(clip, clip, ForceZeroRTAIndexEnable); + SET(clip, clip, NonPerspectiveBarycentricEnable); + } + } + + if (IS_DIRTY(VIEWPORT_SF_CLIP)) { + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + hw_state->vp_sf_clip.count * 64, 64); + + for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) { + struct GENX(SF_CLIP_VIEWPORT) sfv = { + INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00), + INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11), + INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22), + INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30), + INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31), + INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32), + INIT(vp_sf_clip.elem[i], XMinClipGuardband), + INIT(vp_sf_clip.elem[i], XMaxClipGuardband), + INIT(vp_sf_clip.elem[i], YMinClipGuardband), + INIT(vp_sf_clip.elem[i], YMaxClipGuardband), + INIT(vp_sf_clip.elem[i], XMinViewPort), + INIT(vp_sf_clip.elem[i], XMaxViewPort), + INIT(vp_sf_clip.elem[i], YMinViewPort), + INIT(vp_sf_clip.elem[i], YMaxViewPort), + }; + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); + } + + anv_gfx_pack(sf_clip, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { + clip.SFClipViewportPointer = sf_clip_state.offset; + } + } + + if (IS_DIRTY(VIEWPORT_CC)) { + hw_state->vp_cc.state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + hw_state->vp_cc.count * 8, 32); + + for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) { + struct GENX(CC_VIEWPORT) cc_viewport = { + INIT(vp_cc.elem[i], MinimumDepth), + INIT(vp_cc.elem[i], MaximumDepth), + }; + GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8, + &cc_viewport); + } + + anv_gfx_pack(cc_viewport, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { + cc.CCViewportPointer = hw_state->vp_cc.state.offset; + } + } + + if (IS_DIRTY(SCISSOR)) { + /* Wa_1409725701: + * + * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is + * stored as an array of up to 16 elements. The location of first + * element of the array, as specified by Pointer to SCISSOR_RECT, + * should be aligned to a 64-byte boundary. + */ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + hw_state->scissor.count * 8, 64); + + for (uint32_t i = 0; i < hw_state->scissor.count; i++) { + struct GENX(SCISSOR_RECT) scissor = { + INIT(scissor.elem[i], ScissorRectangleYMin), + INIT(scissor.elem[i], ScissorRectangleXMin), + INIT(scissor.elem[i], ScissorRectangleYMax), + INIT(scissor.elem[i], ScissorRectangleXMax), + }; + GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor); + } + + anv_gfx_pack(scissor, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) { + ssp.ScissorRectPointer = scissor_state.offset; + } + } + +#if GFX_VER >= 30 + if (IS_DIRTY(CPS)) { + anv_gfx_pack(cps, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) { + coarse_pixel.DisableCPSPointers = true; + SET(coarse_pixel, coarse_pixel, CPSizeX); + SET(coarse_pixel, coarse_pixel, CPSizeY); + SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode); + SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode); + } + } +#else + if (IS_DIRTY(CPS)) { +#if GFX_VER == 11 + anv_gfx_pack(cps, GENX(3DSTATE_CPS), cps) { + SET(cps, cps, CoarsePixelShadingMode); + SET(cps, cps, MinCPSizeX); + SET(cps, cps, MinCPSizeY); + } +#elif GFX_VER >= 12 + anv_gfx_pack(cps, GENX(3DSTATE_CPS_POINTERS), cps) { + SET(cps, cps, CoarsePixelShadingStateArrayPointer); + } +#endif + } +#endif /* GFX_VER >= 30 */ + + if (IS_DIRTY(SF)) { + anv_gfx_pack(sf, GENX(3DSTATE_SF), sf) { + /* Fixed values */ + sf.ViewportTransformEnable = true; + sf.StatisticsEnable = true; + sf.VertexSubPixelPrecisionSelect = _8Bit; + sf.AALineDistanceMode = true; + sf.PointWidth = 1.0; + +#if GFX_VER >= 12 + SET(sf, sf, DerefBlockSize); +#endif + SET(sf, sf, PointWidthSource); + SET(sf, sf, LineWidth); + SET(sf, sf, TriangleStripListProvokingVertexSelect); + SET(sf, sf, LineStripListProvokingVertexSelect); + SET(sf, sf, TriangleFanProvokingVertexSelect); +#if GFX_VERx10 >= 200 + SET(sf, sf, TriangleStripOddProvokingVertexSelect); +#endif + SET(sf, sf, LegacyGlobalDepthBiasEnable); + } + } + + if (BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_RASTER)) { + anv_gfx_pack(raster, GENX(3DSTATE_RASTER), raster) { + /* For details on 3DSTATE_RASTER multisample state, see the BSpec + * table "Multisample Modes State". + * + * NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix + * computations. If we ever set this bit to a different value, they + * will need to be updated accordingly. + */ + raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0; + raster.ForceMultisampling = false; + raster.ScissorRectangleEnable = true; + + SET(raster, raster, APIMode); + SET(raster, raster, DXMultisampleRasterizationEnable); + SET(raster, raster, AntialiasingEnable); + SET(raster, raster, CullMode); + SET(raster, raster, FrontWinding); + SET(raster, raster, GlobalDepthOffsetEnableSolid); + SET(raster, raster, GlobalDepthOffsetEnableWireframe); + SET(raster, raster, GlobalDepthOffsetEnablePoint); + SET(raster, raster, GlobalDepthOffsetConstant); + SET(raster, raster, GlobalDepthOffsetScale); + SET(raster, raster, GlobalDepthOffsetClamp); + SET(raster, raster, FrontFaceFillMode); + SET(raster, raster, BackFaceFillMode); + SET(raster, raster, ViewportZFarClipTestEnable); + SET(raster, raster, ViewportZNearClipTestEnable); + SET(raster, raster, ConservativeRasterizationEnable); +#if GFX_VER >= 20 + SET(raster, raster, LegacyBaryAssignmentDisable); +#endif + } + } + + if (IS_DIRTY(LINE_STIPPLE)) { + anv_gfx_pack(ls, GENX(3DSTATE_LINE_STIPPLE), ls) { + SET(ls, ls, LineStipplePattern); + SET(ls, ls, LineStippleInverseRepeatCount); + SET(ls, ls, LineStippleRepeatCount); + } + } + + if (IS_DIRTY(MULTISAMPLE)) { + anv_gfx_pack(ms, GENX(3DSTATE_MULTISAMPLE), ms) { + ms.PixelLocation = CENTER; + + /* The PRM says that this bit is valid only for DX9: + * + * SW can choose to set this bit only for DX9 API. DX10/OGL API's + * should not have any effect by setting or not setting this bit. + */ + ms.PixelPositionOffsetEnable = false; + + SET(ms, ms, NumberofMultisamples); + } + } + + if (IS_DIRTY(SAMPLE_MASK)) { + anv_gfx_pack(sm, GENX(3DSTATE_SAMPLE_MASK), sm) { + SET(sm, sm, SampleMask); + } + } + + if (IS_DIRTY(TE)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) { + anv_gfx_pack_merge(te, GENX(3DSTATE_TE), pipeline->partial.te, te) { + SET(te, te, OutputTopology); +#if GFX_VERx10 >= 125 + SET(te, te, TessellationDistributionMode); +#endif + } + } else { + anv_gfx_pack(te, GENX(3DSTATE_TE), te); + } + } + + if (IS_DIRTY(WM_DEPTH_STENCIL)) { + anv_gfx_pack(wm_ds, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { + SET(ds, ds, DoubleSidedStencilEnable); + SET(ds, ds, StencilTestMask); + SET(ds, ds, StencilWriteMask); + SET(ds, ds, BackfaceStencilTestMask); + SET(ds, ds, BackfaceStencilWriteMask); + SET(ds, ds, StencilReferenceValue); + SET(ds, ds, BackfaceStencilReferenceValue); + SET(ds, ds, DepthTestEnable); + SET(ds, ds, DepthBufferWriteEnable); + SET(ds, ds, DepthTestFunction); + SET(ds, ds, StencilTestEnable); + SET(ds, ds, StencilBufferWriteEnable); + SET(ds, ds, StencilFailOp); + SET(ds, ds, StencilPassDepthPassOp); + SET(ds, ds, StencilPassDepthFailOp); + SET(ds, ds, StencilTestFunction); + SET(ds, ds, BackfaceStencilFailOp); + SET(ds, ds, BackfaceStencilPassDepthPassOp); + SET(ds, ds, BackfaceStencilPassDepthFailOp); + SET(ds, ds, BackfaceStencilTestFunction); + } + } + +#if GFX_VER >= 12 + if (IS_DIRTY(DEPTH_BOUNDS)) { + anv_gfx_pack(db, GENX(3DSTATE_DEPTH_BOUNDS), db) { + SET(db, db, DepthBoundsTestEnable); + SET(db, db, DepthBoundsTestMinValue); + SET(db, db, DepthBoundsTestMaxValue); + } + } +#endif + +#if GFX_VER >= 12 + if (IS_DIRTY(PRIMITIVE_REPLICATION)) { + anv_gfx_pack(pr, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) { + SET(pr, pr, ReplicaMask); + SET(pr, pr, ReplicationCount); + SET_ARRAY(pr, pr, RTAIOffset); + } + } +#endif + + if (IS_DIRTY(SBE)) { + anv_gfx_pack(sbe, GENX(3DSTATE_SBE), sbe) { + for (unsigned i = 0; i < 32; i++) + sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; + sbe.ForceVertexURBEntryReadOffset = true; + sbe.ForceVertexURBEntryReadLength = true; + + SET(sbe, sbe, AttributeSwizzleEnable); + SET(sbe, sbe, PointSpriteTextureCoordinateEnable); + SET(sbe, sbe, PointSpriteTextureCoordinateOrigin); + SET(sbe, sbe, NumberofSFOutputAttributes); + SET(sbe, sbe, ConstantInterpolationEnable); + SET(sbe, sbe, VertexURBEntryReadOffset); + SET(sbe, sbe, VertexURBEntryReadLength); +#if GFX_VER >= 20 + SET(sbe, sbe, VertexAttributesBypass); +#endif + SET(sbe, sbe, PrimitiveIDOverrideAttributeSelect); + SET(sbe, sbe, PrimitiveIDOverrideComponentX); + SET(sbe, sbe, PrimitiveIDOverrideComponentY); + SET(sbe, sbe, PrimitiveIDOverrideComponentZ); + SET(sbe, sbe, PrimitiveIDOverrideComponentW); + } + } + +#if GFX_VERx10 >= 125 + if (IS_DIRTY(SBE_MESH)) { + anv_gfx_pack(sbe_mesh, GENX(3DSTATE_SBE_MESH), sbe_mesh) { + SET(sbe_mesh, sbe_mesh, PerVertexURBEntryOutputReadOffset); + SET(sbe_mesh, sbe_mesh, PerVertexURBEntryOutputReadLength); + SET(sbe_mesh, sbe_mesh, PerPrimitiveURBEntryOutputReadOffset); + SET(sbe_mesh, sbe_mesh, PerPrimitiveURBEntryOutputReadLength); + } + } +#endif + + if (IS_DIRTY(SBE_SWIZ)) { + anv_gfx_pack(sbe_swiz, GENX(3DSTATE_SBE_SWIZ), sbe_swiz) { + for (unsigned i = 0; i < 16; i++) + SET(sbe_swiz, sbe_swiz, Attribute[i].SourceAttribute); + } + } + + if (IS_DIRTY(WM)) { + anv_gfx_pack_merge(wm, GENX(3DSTATE_WM), pipeline->partial.wm, wm) { + SET(wm, wm, LineStippleEnable); + SET(wm, wm, BarycentricInterpolationMode); + } + } + + if (IS_DIRTY(PS_BLEND)) { + anv_gfx_pack(ps_blend, GENX(3DSTATE_PS_BLEND), blend) { + SET(blend, ps_blend, HasWriteableRT); + SET(blend, ps_blend, ColorBufferBlendEnable); + SET(blend, ps_blend, SourceAlphaBlendFactor); + SET(blend, ps_blend, DestinationAlphaBlendFactor); + SET(blend, ps_blend, SourceBlendFactor); + SET(blend, ps_blend, DestinationBlendFactor); + SET(blend, ps_blend, AlphaTestEnable); + SET(blend, ps_blend, IndependentAlphaBlendEnable); + SET(blend, ps_blend, AlphaToCoverageEnable); + } + } + + if (IS_DIRTY(CC_STATE)) { + hw_state->cc.state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GENX(COLOR_CALC_STATE_length) * 4, + 64); + struct GENX(COLOR_CALC_STATE) cc = { + INIT(cc, BlendConstantColorRed), + INIT(cc, BlendConstantColorGreen), + INIT(cc, BlendConstantColorBlue), + INIT(cc, BlendConstantColorAlpha), + }; + GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc); + + anv_gfx_pack(cc_state, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { + ccp.ColorCalcStatePointer = hw_state->cc.state.offset; + ccp.ColorCalcStatePointerValid = true; + } + } + + if (IS_DIRTY(BLEND_STATE)) { + const uint32_t num_dwords = GENX(BLEND_STATE_length) + + GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; + hw_state->blend.state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + num_dwords * 4, + 64); + + uint32_t *dws = hw_state->blend.state.map; + + struct GENX(BLEND_STATE) blend_state = { + INIT(blend, AlphaToCoverageEnable), + INIT(blend, AlphaToOneEnable), + INIT(blend, IndependentAlphaBlendEnable), + INIT(blend, ColorDitherEnable), + }; + GENX(BLEND_STATE_pack)(NULL, dws, &blend_state); + + /* Jump to blend entries. */ + dws += GENX(BLEND_STATE_length); + for (uint32_t i = 0; i < MAX_RTS; i++) { + struct GENX(BLEND_STATE_ENTRY) entry = { + INIT(blend.rts[i], WriteDisableAlpha), + INIT(blend.rts[i], WriteDisableRed), + INIT(blend.rts[i], WriteDisableGreen), + INIT(blend.rts[i], WriteDisableBlue), + INIT(blend.rts[i], LogicOpFunction), + INIT(blend.rts[i], LogicOpEnable), + INIT(blend.rts[i], ColorBufferBlendEnable), + INIT(blend.rts[i], ColorClampRange), +#if GFX_VER >= 30 + INIT(blend.rts[i], SimpleFloatBlendEnable), +#endif + INIT(blend.rts[i], PreBlendColorClampEnable), + INIT(blend.rts[i], PostBlendColorClampEnable), + INIT(blend.rts[i], SourceBlendFactor), + INIT(blend.rts[i], DestinationBlendFactor), + INIT(blend.rts[i], ColorBlendFunction), + INIT(blend.rts[i], SourceAlphaBlendFactor), + INIT(blend.rts[i], DestinationAlphaBlendFactor), + INIT(blend.rts[i], AlphaBlendFunction), + }; + + GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); + dws += GENX(BLEND_STATE_ENTRY_length); + } + + anv_gfx_pack(blend_state, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { + bsp.BlendStatePointer = hw_state->blend.state.offset; + bsp.BlendStatePointerValid = true; + } + } + +#if GFX_VERx10 >= 125 + if (device->vk.enabled_features.meshShader) { + if (IS_DIRTY(MESH_CONTROL)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_MESH)) { + anv_gfx_copy_protected(mesh_control, + GENX(3DSTATE_MESH_CONTROL), + pipeline->final.mesh_control); + } else { + anv_gfx_pack(mesh_control, GENX(3DSTATE_MESH_CONTROL), mc); + } + } + + if (IS_DIRTY(TASK_CONTROL)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_TASK)) { + anv_gfx_copy_protected(task_control, GENX(3DSTATE_TASK_CONTROL), + pipeline->final.task_control); + } else { + anv_gfx_pack(task_control, GENX(3DSTATE_TASK_CONTROL), tc); + } + } + + if (IS_DIRTY(MESH_SHADER)) { + anv_gfx_copy(mesh_shader, GENX(3DSTATE_MESH_SHADER), + pipeline->final.mesh_shader); + } + + if (IS_DIRTY(MESH_DISTRIB)) { + anv_gfx_copy(mesh_distrib, GENX(3DSTATE_MESH_DISTRIB), + pipeline->final.mesh_distrib); + } + + if (IS_DIRTY(CLIP_MESH)) { + anv_gfx_copy(clip_mesh, GENX(3DSTATE_CLIP_MESH), + pipeline->final.clip_mesh); + } + + if (IS_DIRTY(TASK_SHADER)) { + anv_gfx_copy(task_shader, GENX(3DSTATE_TASK_SHADER), + pipeline->final.task_shader); + } + + if (IS_DIRTY(TASK_REDISTRIB)) { + anv_gfx_copy(task_redistrib, GENX(3DSTATE_TASK_REDISTRIB), + pipeline->final.task_redistrib); + } + } +#endif /* GFX_VERx10 >= 125 */ + + if (IS_DIRTY(VS)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_VERTEX)) { + anv_gfx_copy_protected(vs, GENX(3DSTATE_VS), pipeline->final.vs); + } else { + anv_gfx_pack(vs, GENX(3DSTATE_VS), vs); + } + } + + if (IS_DIRTY(HS)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_CTRL)) { + anv_gfx_copy_protected(hs, GENX(3DSTATE_HS), pipeline->final.hs); + } else { + anv_gfx_pack(hs, GENX(3DSTATE_HS), hs); + } + } + + if (IS_DIRTY(DS)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) { + anv_gfx_copy_protected(ds, GENX(3DSTATE_DS), pipeline->final.ds); + } else { + anv_gfx_pack(ds, GENX(3DSTATE_DS), ds); + } + } + + if (IS_DIRTY(GS)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_GEOMETRY)) { + anv_gfx_pack_merge_protected(gs, GENX(3DSTATE_GS), + pipeline->partial.gs, gs) { + SET(gs, gs, ReorderMode); + } + } else { + anv_gfx_pack(gs, GENX(3DSTATE_GS), gs); + } + } + + if (IS_DIRTY(PS)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT)) { + anv_gfx_pack_merge_protected(ps, GENX(3DSTATE_PS), + pipeline->partial.ps, ps) { + SET(ps, ps, KernelStartPointer0); + SET(ps, ps, KernelStartPointer1); + SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0); + SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1); + +#if GFX_VER < 20 + SET(ps, ps, KernelStartPointer2); + SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2); + + SET(ps, ps, _8PixelDispatchEnable); + SET(ps, ps, _16PixelDispatchEnable); + SET(ps, ps, _32PixelDispatchEnable); +#else + SET(ps, ps, Kernel0Enable); + SET(ps, ps, Kernel1Enable); + SET(ps, ps, Kernel0SIMDWidth); + SET(ps, ps, Kernel1SIMDWidth); + SET(ps, ps, Kernel0PolyPackingPolicy); + SET(ps, ps, Kernel0MaximumPolysperThread); +#endif + SET(ps, ps, PositionXYOffsetSelect); + } + } else { + anv_gfx_pack(ps, GENX(3DSTATE_PS), ps); + } + } + + if (IS_DIRTY(PS_EXTRA)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT)) { + anv_gfx_pack_merge(ps_extra, GENX(3DSTATE_PS_EXTRA), + pipeline->partial.ps_extra, pse) { + SET(pse, ps_extra, PixelShaderHasUAV); + SET(pse, ps_extra, PixelShaderIsPerSample); +#if GFX_VER >= 11 + SET(pse, ps_extra, PixelShaderIsPerCoarsePixel); +#endif + SET(pse, ps_extra, PixelShaderKillsPixel); + +#if GFX_VERx10 >= 125 + SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange); +#endif + } +#if INTEL_WA_18038825448_GFX_VER + /* Add a dependency if easier the shader needs it (because of runtime + * change through pre-rasterization shader) or if we notice a change. + */ + anv_gfx_pack_merge(ps_extra_dep, GENX(3DSTATE_PS_EXTRA), + pipeline->partial.ps_extra, pse) { + SET(pse, ps_extra, PixelShaderHasUAV); + SET(pse, ps_extra, PixelShaderIsPerSample); +#if GFX_VER >= 11 + SET(pse, ps_extra, PixelShaderIsPerCoarsePixel); +#endif + SET(pse, ps_extra, PixelShaderKillsPixel); + +#if GFX_VERx10 >= 125 && INTEL_WA_18038825448_GFX_VER + pse.EnablePSDependencyOnCPsizeChange = true; +#endif + } +#endif /* INTEL_WA_18038825448_GFX_VER */ + } else { + anv_gfx_pack(ps_extra, GENX(3DSTATE_PS_EXTRA), ps_extra); + anv_gfx_pack(ps_extra_dep, GENX(3DSTATE_PS_EXTRA), ps_extra); + } + } + +#if GFX_VERx10 >= 125 + if (hw_state->use_tbimr && IS_DIRTY(TBIMR_TILE_PASS_INFO)) { + anv_gfx_pack(tbimr, GENX(3DSTATE_TBIMR_TILE_PASS_INFO), tbimr) { + SET(tbimr, tbimr, TileRectangleHeight); + SET(tbimr, tbimr, TileRectangleWidth); + SET(tbimr, tbimr, VerticalTileCount); + SET(tbimr, tbimr, HorizontalTileCount); + SET(tbimr, tbimr, TBIMRBatchSize); + SET(tbimr, tbimr, TileBoxCheck); + } + } +#endif + +#undef IS_DIRTY +#undef GET +#undef SET + + BITSET_OR(hw_state->emit_dirty, hw_state->emit_dirty, hw_state->pack_dirty); + BITSET_ZERO(hw_state->pack_dirty); +} + /** * This function takes the vulkan runtime values & dirty states and updates * the values in anv_gfx_dynamic_state, flagging HW instructions for @@ -2484,6 +3245,11 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->vk.level); vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state); + + cmd_buffer_repack_gfx_state(&cmd_buffer->state.gfx.dyn_state, + cmd_buffer, + &cmd_buffer->state.gfx, + anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline)); } static void @@ -2644,17 +3410,14 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) struct anv_push_constants *push_consts = &cmd_buffer->state.gfx.base.push_constants; struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state; - const bool protected = cmd_buffer->vk.pool->flags & - VK_COMMAND_POOL_CREATE_PROTECTED_BIT; #define DEBUG_SHADER_HASH(stage) do { \ if (unlikely( \ (instance->debug & ANV_DEBUG_SHADER_HASH) && \ - anv_pipeline_has_stage(pipeline, stage))) { \ + anv_gfx_has_stage(gfx, stage))) { \ mi_store(&b, \ mi_mem32(device->workaround_address), \ - mi_imm(pipeline->base.shaders[stage]-> \ - prog_data->source_hash)); \ + mi_imm(gfx->shaders[stage]->prog_data->source_hash)); \ } \ } while (0) @@ -2668,32 +3431,34 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) /* Will be emitted in front of every draw instead */ if (intel_needs_workaround(device->info, 16011107343) && anv_cmd_buffer_has_gfx_stage(cmd_buffer, MESA_SHADER_TESS_CTRL)) - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_HS); #endif #if INTEL_WA_22018402687_GFX_VER /* Will be emitted in front of every draw instead */ if (intel_needs_workaround(device->info, 22018402687) && anv_cmd_buffer_has_gfx_stage(cmd_buffer, MESA_SHADER_TESS_EVAL)) - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_DS); #endif +#define IS_DIRTY(name) BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_##name) + /* * Values provided by push constants */ - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TCS_INPUT_VERTICES)) { + if (IS_DIRTY(TCS_INPUT_VERTICES)) { push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points; cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; gfx->base.push_constants_data_dirty = true; } #if INTEL_WA_18019110168_GFX_VER - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_PROVOKING_VERTEX)) + if (IS_DIRTY(MESH_PROVOKING_VERTEX)) cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_MESH_BIT_EXT; #endif - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) { + if (IS_DIRTY(FS_MSAA_FLAGS)) { push_consts->gfx.fs_msaa_flags = hw_state->fs_msaa_flags; const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx); @@ -2707,20 +3472,25 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) gfx->base.push_constants_data_dirty = true; } -#define INIT(category, name) \ - .name = hw_state->category.name -#define SET(s, category, name) \ - s.name = hw_state->category.name -#define SET_ARRAY(s, category, name) \ - do { \ - assert(sizeof(s.name) == \ - sizeof(hw_state->category.name)); \ - memcpy(&s.name, \ - &hw_state->category.name, \ - sizeof(s.name)); \ +#define anv_batch_emit_gfx(batch, cmd, name) ({ \ + void *__dst = anv_batch_emit_dwords( \ + batch, __anv_cmd_length(cmd)); \ + memcpy(__dst, hw_state->packed.name, \ + 4 * __anv_cmd_length(cmd)); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED( \ + __dst, __anv_cmd_length(cmd) * 4)); \ + __dst; \ + }) +#define anv_batch_emit_gfx_variable(batch, name) do { \ + void *__dst = anv_batch_emit_dwords( \ + batch, hw_state->packed.name##_len); \ + memcpy(__dst, hw_state->packed.name, \ + 4 * hw_state->packed.name##_len); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED( \ + __dst, 4 * hw_state->packed.name##_len)); \ } while (0) - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) { + if (IS_DIRTY(URB)) { #if INTEL_NEEDS_WA_16014912113 if (genX(need_wa_16014912113)( &cmd_buffer->state.gfx.urb_cfg, &hw_state->urb_cfg)) @@ -2734,48 +3504,42 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) genX(emit_urb_setup)(batch, device, &hw_state->urb_cfg); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING)) - anv_batch_emit_pipeline_state(batch, pipeline, final.vf_sgvs_instancing); + if (IS_DIRTY(VF_SGVS_INSTANCING)) + anv_batch_emit_gfx_variable(batch, vf_sgvs_instancing); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS)) - anv_batch_emit_pipeline_state(batch, pipeline, final.vf_sgvs); + if (IS_DIRTY(VF_SGVS)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_SGVS), vf_sgvs); #if GFX_VER >= 11 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2)) - anv_batch_emit_pipeline_state(batch, pipeline, final.vf_sgvs_2); + if (IS_DIRTY(VF_SGVS_2)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_SGVS_2), vf_sgvs_2); #endif if (device->physical->instance->vf_component_packing && - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_COMPONENT_PACKING)) { - anv_batch_emit_pipeline_state(batch, pipeline, - final.vf_component_packing); + IS_DIRTY(VF_COMPONENT_PACKING)) { + anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_COMPONENT_PACKING), + vf_component_packing); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) { + if (IS_DIRTY(VS)) { DEBUG_SHADER_HASH(MESA_SHADER_VERTEX); - anv_batch_emit_pipeline_state_protected(batch, pipeline, - final.vs, protected); + anv_batch_emit_gfx(batch, GENX(3DSTATE_VS), vs); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) { + if (IS_DIRTY(HS)) { DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL); - anv_batch_emit_pipeline_state_protected(batch, pipeline, - final.hs, protected); + anv_batch_emit_gfx(batch, GENX(3DSTATE_HS), hs); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) { + if (IS_DIRTY(DS)) { DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL); - anv_batch_emit_pipeline_state_protected(batch, pipeline, - final.ds, protected); + anv_batch_emit_gfx(batch, GENX(3DSTATE_DS), ds); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS)) { - anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vfs) { - vfs.StatisticsEnable = true; - } - } + if (IS_DIRTY(VF_STATISTICS)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_STATISTICS), vfs); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) { + if (IS_DIRTY(SO_DECL_LIST)) { /* Wa_16011773973: * If SOL is enabled and SO_DECL state has to be programmed, * 1. Send 3D State SOL state with SOL disabled @@ -2786,8 +3550,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) pipeline->uses_xfb) anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so); - anv_batch_emit_pipeline_state(batch, pipeline, - final.so_decl_list); + anv_batch_emit_gfx_variable(batch, so_decl_list); #if GFX_VER >= 11 && GFX_VER < 20 /* ICL PRMs, Volume 2a - Command Reference: Instructions, @@ -2805,317 +3568,95 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) } #if GFX_VERx10 >= 125 - if (device->vk.enabled_extensions.EXT_mesh_shader) { - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) { - anv_batch_emit_pipeline_state_protected(batch, pipeline, - final.mesh_control, protected); - } + if (device->vk.enabled_features.meshShader) { + if (IS_DIRTY(MESH_CONTROL)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_MESH_CONTROL), mesh_control); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER)) - anv_batch_emit_pipeline_state(batch, pipeline, final.mesh_shader); + if (IS_DIRTY(MESH_SHADER)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_MESH_SHADER), mesh_shader); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB)) - anv_batch_emit_pipeline_state(batch, pipeline, final.mesh_distrib); + if (IS_DIRTY(MESH_DISTRIB)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_MESH_DISTRIB), mesh_distrib); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL)) { - anv_batch_emit_pipeline_state_protected(batch, pipeline, - final.task_control, protected); - } + if (IS_DIRTY(TASK_CONTROL)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_TASK_CONTROL), task_control); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER)) - anv_batch_emit_pipeline_state(batch, pipeline, final.task_shader); + if (IS_DIRTY(TASK_SHADER)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_TASK_SHADER), task_shader); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB)) - anv_batch_emit_pipeline_state(batch, pipeline, final.task_redistrib); + if (IS_DIRTY(TASK_REDISTRIB)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_TASK_REDISTRIB), task_redistrib); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH)) { - anv_batch_emit(batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) { - SET(sbe_mesh, sbe_mesh, PerVertexURBEntryOutputReadOffset); - SET(sbe_mesh, sbe_mesh, PerVertexURBEntryOutputReadLength); - SET(sbe_mesh, sbe_mesh, PerPrimitiveURBEntryOutputReadOffset); - SET(sbe_mesh, sbe_mesh, PerPrimitiveURBEntryOutputReadLength); - } - } + if (IS_DIRTY(SBE_MESH)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_SBE_MESH), sbe_mesh); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH)) - anv_batch_emit_pipeline_state(batch, pipeline, final.clip_mesh); - } else + if (IS_DIRTY(CLIP_MESH)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_CLIP_MESH), clip_mesh); + } #endif - { - assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) && - !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH)); - } - /* Now the potentially dynamic instructions */ + if (IS_DIRTY(SBE)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_SBE), sbe); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE)) { - anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) { - for (unsigned i = 0; i < 32; i++) - sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; - sbe.ForceVertexURBEntryReadOffset = true; - sbe.ForceVertexURBEntryReadLength = true; + if (IS_DIRTY(SBE_SWIZ)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_SBE_SWIZ), sbe_swiz); - SET(sbe, sbe, AttributeSwizzleEnable); - SET(sbe, sbe, PointSpriteTextureCoordinateEnable); - SET(sbe, sbe, PointSpriteTextureCoordinateOrigin); - SET(sbe, sbe, NumberofSFOutputAttributes); - SET(sbe, sbe, ConstantInterpolationEnable); - SET(sbe, sbe, VertexURBEntryReadOffset); - SET(sbe, sbe, VertexURBEntryReadLength); -#if GFX_VER >= 20 - SET(sbe, sbe, VertexAttributesBypass); -#endif - SET(sbe, sbe, PrimitiveIDOverrideAttributeSelect); - SET(sbe, sbe, PrimitiveIDOverrideComponentX); - SET(sbe, sbe, PrimitiveIDOverrideComponentY); - SET(sbe, sbe, PrimitiveIDOverrideComponentZ); - SET(sbe, sbe, PrimitiveIDOverrideComponentW); - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ)) { - anv_batch_emit(batch, GENX(3DSTATE_SBE_SWIZ), sbe_swiz) { - for (unsigned i = 0; i < 16; i++) - SET(sbe_swiz, sbe_swiz, Attribute[i].SourceAttribute); - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) { + if (IS_DIRTY(PS)) { DEBUG_SHADER_HASH(MESA_SHADER_FRAGMENT); - anv_batch_emit_merge_protected(batch, GENX(3DSTATE_PS), - pipeline, partial.ps, ps, protected) { - SET(ps, ps, KernelStartPointer0); - SET(ps, ps, KernelStartPointer1); - SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0); - SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1); - -#if GFX_VER < 20 - SET(ps, ps, KernelStartPointer2); - SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2); - - SET(ps, ps, _8PixelDispatchEnable); - SET(ps, ps, _16PixelDispatchEnable); - SET(ps, ps, _32PixelDispatchEnable); -#else - SET(ps, ps, Kernel0Enable); - SET(ps, ps, Kernel1Enable); - SET(ps, ps, Kernel0SIMDWidth); - SET(ps, ps, Kernel1SIMDWidth); - SET(ps, ps, Kernel0PolyPackingPolicy); - SET(ps, ps, Kernel0MaximumPolysperThread); -#endif - SET(ps, ps, PositionXYOffsetSelect); - } + anv_batch_emit_gfx(batch, GENX(3DSTATE_PS), ps); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA) || - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE)) { - anv_batch_emit_merge(batch, GENX(3DSTATE_PS_EXTRA), - pipeline, partial.ps_extra, pse) { - SET(pse, ps_extra, PixelShaderHasUAV); - SET(pse, ps_extra, PixelShaderIsPerSample); -#if GFX_VER >= 11 - SET(pse, ps_extra, PixelShaderIsPerCoarsePixel); -#endif - SET(pse, ps_extra, PixelShaderKillsPixel); - #if INTEL_WA_18038825448_GFX_VER - /* Add a dependency if easier the shader needs it (because of runtime - * change through pre-rasterization shader) or if we notice a change. - */ - pse.EnablePSDependencyOnCPsizeChange = - hw_state->ps_extra.EnablePSDependencyOnCPsizeChange || - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE); -#elif GFX_VERx10 >= 125 - SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange); -#endif - } + if (IS_DIRTY(PS_EXTRA) || IS_DIRTY(CPS)) { + if (IS_DIRTY(CPS)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_PS_EXTRA), ps_extra_dep); + else + anv_batch_emit_gfx(batch, GENX(3DSTATE_PS_EXTRA), ps_extra); } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) { - anv_batch_emit(batch, GENX(3DSTATE_CLIP), clip) { - clip.ClipEnable = true; - clip.StatisticsEnable = true; - clip.EarlyCullEnable = true; - clip.GuardbandClipTestEnable = true; - - clip.VertexSubPixelPrecisionSelect = _8Bit; - clip.ClipMode = CLIPMODE_NORMAL; - - clip.MinimumPointWidth = 0.125; - clip.MaximumPointWidth = 255.875; - - SET(clip, clip, APIMode); - SET(clip, clip, ViewportXYClipTestEnable); - SET(clip, clip, TriangleStripListProvokingVertexSelect); - SET(clip, clip, LineStripListProvokingVertexSelect); - SET(clip, clip, TriangleFanProvokingVertexSelect); -#if GFX_VERx10 >= 200 - SET(clip, clip, TriangleStripOddProvokingVertexSelect); +#else + if (IS_DIRTY(PS_EXTRA)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_PS_EXTRA), ps_extra); #endif - SET(clip, clip, MaximumVPIndex); - SET(clip, clip, ForceZeroRTAIndexEnable); - SET(clip, clip, NonPerspectiveBarycentricEnable); - } - } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) { + if (IS_DIRTY(CLIP)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_CLIP), clip); + + if (IS_DIRTY(STREAMOUT)) { genX(streamout_prologue)(cmd_buffer, gfx); - - anv_batch_emit_merge(batch, GENX(3DSTATE_STREAMOUT), - pipeline, partial.so, so) { - SET(so, so, RenderingDisable); - SET(so, so, RenderStreamSelect); - SET(so, so, ReorderMode); - SET(so, so, ForceRendering); - } + anv_batch_emit_gfx(batch, GENX(3DSTATE_STREAMOUT), so); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) { - struct anv_state sf_clip_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - hw_state->vp_sf_clip.count * 64, 64); + if (IS_DIRTY(VIEWPORT_SF_CLIP)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), sf_clip); - for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) { - struct GENX(SF_CLIP_VIEWPORT) sfv = { - INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00), - INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11), - INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22), - INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30), - INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31), - INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32), - INIT(vp_sf_clip.elem[i], XMinClipGuardband), - INIT(vp_sf_clip.elem[i], XMaxClipGuardband), - INIT(vp_sf_clip.elem[i], YMinClipGuardband), - INIT(vp_sf_clip.elem[i], YMaxClipGuardband), - INIT(vp_sf_clip.elem[i], XMinViewPort), - INIT(vp_sf_clip.elem[i], XMaxViewPort), - INIT(vp_sf_clip.elem[i], YMinViewPort), - INIT(vp_sf_clip.elem[i], YMaxViewPort), - }; - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv); - } - - anv_batch_emit(batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { - clip.SFClipViewportPointer = sf_clip_state.offset; - } - } - - /* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming - * 3DSTATE_VIEWPORT_STATE_POINTERS_CC : - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647 - */ - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) || - (GFX_VER == 9 && - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) { - hw_state->vp_cc.state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - hw_state->vp_cc.count * 8, 32); - - for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) { - struct GENX(CC_VIEWPORT) cc_viewport = { - INIT(vp_cc.elem[i], MinimumDepth), - INIT(vp_cc.elem[i], MaximumDepth), - }; - GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8, - &cc_viewport); - } - - /* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below - */ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR); - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) { - anv_batch_emit(batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { - cc.CCViewportPointer = hw_state->vp_cc.state.offset; - } + if (IS_DIRTY(VIEWPORT_CC)) { + anv_batch_emit_gfx(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc_viewport); cmd_buffer->state.gfx.viewport_set = true; } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) { - /* Wa_1409725701: - * - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements. The location of first - * element of the array, as specified by Pointer to SCISSOR_RECT, - * should be aligned to a 64-byte boundary. - */ - struct anv_state scissor_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - hw_state->scissor.count * 8, 64); + if (IS_DIRTY(SCISSOR)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), scissor); - for (uint32_t i = 0; i < hw_state->scissor.count; i++) { - struct GENX(SCISSOR_RECT) scissor = { - INIT(scissor.elem[i], ScissorRectangleYMin), - INIT(scissor.elem[i], ScissorRectangleXMin), - INIT(scissor.elem[i], ScissorRectangleYMax), - INIT(scissor.elem[i], ScissorRectangleXMax), - }; - GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor); - } + if (IS_DIRTY(VF_TOPOLOGY)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_TOPOLOGY), vft); - anv_batch_emit(batch, - GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) { - ssp.ScissorRectPointer = scissor_state.offset; - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) { - anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), vft) { - SET(vft, vft, PrimitiveTopologyType); - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) { + if (IS_DIRTY(VERTEX_INPUT)) { genX(batch_emit_pipeline_vertex_input)(batch, device, pipeline, dyn->vi); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) { - anv_batch_emit_merge(batch, GENX(3DSTATE_TE), - pipeline, partial.te, te) { - SET(te, te, OutputTopology); -#if GFX_VERx10 >= 125 - SET(te, te, TessellationDistributionMode); -#endif - } - } + if (IS_DIRTY(TE)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_TE), te); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) { + if (IS_DIRTY(GS)) { DEBUG_SHADER_HASH(MESA_SHADER_GEOMETRY); - anv_batch_emit_merge_protected(batch, GENX(3DSTATE_GS), - pipeline, partial.gs, gs, protected) { - SET(gs, gs, ReorderMode); - } + anv_batch_emit_gfx(batch, GENX(3DSTATE_GS), gs); } +#if GFX_VER >= 11 + if (IS_DIRTY(CPS)) { #if GFX_VER >= 30 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_PIXEL)) { - anv_batch_emit(batch, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) { - coarse_pixel.DisableCPSPointers = true; - SET(coarse_pixel, coarse_pixel, CPSizeX); - SET(coarse_pixel, coarse_pixel, CPSizeY); - SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode); - SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode); - } - } -#else - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) { -#if GFX_VER == 11 - anv_batch_emit(batch, GENX(3DSTATE_CPS), cps) { - SET(cps, cps, CoarsePixelShadingMode); - SET(cps, cps, MinCPSizeX); - SET(cps, cps, MinCPSizeY); - } + anv_batch_emit_gfx(batch, GENX(3DSTATE_COARSE_PIXEL), cps); #elif GFX_VER >= 12 /* TODO: we can optimize this flush in the following cases: * @@ -3134,160 +3675,38 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) pc.PSDSyncEnable = true; #endif } - - anv_batch_emit(batch, GENX(3DSTATE_CPS_POINTERS), cps) { - SET(cps, cps, CoarsePixelShadingStateArrayPointer); - } + anv_batch_emit_gfx(batch, GENX(3DSTATE_CPS_POINTERS), cps); +#else + anv_batch_emit_gfx(batch, GENX(3DSTATE_CPS), cps); #endif } -#endif /* GFX_VER >= 30 */ +#endif /* GFX_VER >= 11 */ - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) { - anv_batch_emit(batch, GENX(3DSTATE_SF), sf) { - /* Fixed values */ - sf.ViewportTransformEnable = true; - sf.StatisticsEnable = true; - sf.VertexSubPixelPrecisionSelect = _8Bit; - sf.AALineDistanceMode = true; - sf.PointWidth = 1.0; + if (IS_DIRTY(SF)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_SF), sf); + + if (IS_DIRTY(RASTER)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_RASTER), raster); + + if (IS_DIRTY(MULTISAMPLE)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_MULTISAMPLE), ms); + + if (IS_DIRTY(CC_STATE)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc_state); + + if (IS_DIRTY(SAMPLE_MASK)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_SAMPLE_MASK), sm); + + if (IS_DIRTY(WM_DEPTH_STENCIL)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm_ds); #if GFX_VER >= 12 - SET(sf, sf, DerefBlockSize); -#endif - SET(sf, sf, PointWidthSource); - SET(sf, sf, LineWidth); - SET(sf, sf, TriangleStripListProvokingVertexSelect); - SET(sf, sf, LineStripListProvokingVertexSelect); - SET(sf, sf, TriangleFanProvokingVertexSelect); -#if GFX_VERx10 >= 200 - SET(sf, sf, TriangleStripOddProvokingVertexSelect); -#endif - SET(sf, sf, LegacyGlobalDepthBiasEnable); - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) { - anv_batch_emit(batch, GENX(3DSTATE_RASTER), raster) { - /* For details on 3DSTATE_RASTER multisample state, see the BSpec - * table "Multisample Modes State". - * - * NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix - * computations. If we ever set this bit to a different value, they - * will need to be updated accordingly. - */ - raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0; - raster.ForceMultisampling = false; - raster.ScissorRectangleEnable = true; - - SET(raster, raster, APIMode); - SET(raster, raster, DXMultisampleRasterizationEnable); - SET(raster, raster, AntialiasingEnable); - SET(raster, raster, CullMode); - SET(raster, raster, FrontWinding); - SET(raster, raster, GlobalDepthOffsetEnableSolid); - SET(raster, raster, GlobalDepthOffsetEnableWireframe); - SET(raster, raster, GlobalDepthOffsetEnablePoint); - SET(raster, raster, GlobalDepthOffsetConstant); - SET(raster, raster, GlobalDepthOffsetScale); - SET(raster, raster, GlobalDepthOffsetClamp); - SET(raster, raster, FrontFaceFillMode); - SET(raster, raster, BackFaceFillMode); - SET(raster, raster, ViewportZFarClipTestEnable); - SET(raster, raster, ViewportZNearClipTestEnable); - SET(raster, raster, ConservativeRasterizationEnable); -#if GFX_VERx10 >= 200 - SET(raster, raster, LegacyBaryAssignmentDisable); -#endif - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE)) { - anv_batch_emit(batch, GENX(3DSTATE_MULTISAMPLE), ms) { - ms.PixelLocation = CENTER; - - /* The PRM says that this bit is valid only for DX9: - * - * SW can choose to set this bit only for DX9 API. DX10/OGL API's - * should not have any effect by setting or not setting this bit. - */ - ms.PixelPositionOffsetEnable = false; - - SET(ms, ms, NumberofMultisamples); - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) { - hw_state->cc.state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GENX(COLOR_CALC_STATE_length) * 4, - 64); - struct GENX(COLOR_CALC_STATE) cc = { - INIT(cc, BlendConstantColorRed), - INIT(cc, BlendConstantColorGreen), - INIT(cc, BlendConstantColorBlue), - INIT(cc, BlendConstantColorAlpha), - }; - GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc); - - /* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below - */ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR); - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) { - anv_batch_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { - ccp.ColorCalcStatePointer = hw_state->cc.state.offset; - ccp.ColorCalcStatePointerValid = true; - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) { - anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_MASK), sm) { - SET(sm, sm, SampleMask); - } - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) { - anv_batch_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { - SET(ds, ds, DoubleSidedStencilEnable); - SET(ds, ds, StencilTestMask); - SET(ds, ds, StencilWriteMask); - SET(ds, ds, BackfaceStencilTestMask); - SET(ds, ds, BackfaceStencilWriteMask); - SET(ds, ds, StencilReferenceValue); - SET(ds, ds, BackfaceStencilReferenceValue); - SET(ds, ds, DepthTestEnable); - SET(ds, ds, DepthBufferWriteEnable); - SET(ds, ds, DepthTestFunction); - SET(ds, ds, StencilTestEnable); - SET(ds, ds, StencilBufferWriteEnable); - SET(ds, ds, StencilFailOp); - SET(ds, ds, StencilPassDepthPassOp); - SET(ds, ds, StencilPassDepthFailOp); - SET(ds, ds, StencilTestFunction); - SET(ds, ds, BackfaceStencilFailOp); - SET(ds, ds, BackfaceStencilPassDepthPassOp); - SET(ds, ds, BackfaceStencilPassDepthFailOp); - SET(ds, ds, BackfaceStencilTestFunction); - } - } - -#if GFX_VER >= 12 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) { - anv_batch_emit(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { - SET(db, db, DepthBoundsTestEnable); - SET(db, db, DepthBoundsTestMinValue); - SET(db, db, DepthBoundsTestMaxValue); - } - } + if (IS_DIRTY(DEPTH_BOUNDS)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_DEPTH_BOUNDS), db); #endif - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) { - anv_batch_emit(batch, GENX(3DSTATE_LINE_STIPPLE), ls) { - SET(ls, ls, LineStipplePattern); - SET(ls, ls, LineStippleInverseRepeatCount); - SET(ls, ls, LineStippleRepeatCount); - } + if (IS_DIRTY(LINE_STIPPLE)) { + anv_batch_emit_gfx(batch, GENX(3DSTATE_LINE_STIPPLE), ls); #if GFX_VER >= 11 /* ICL PRMs, Volume 2a - Command Reference: Instructions, * 3DSTATE_LINE_STIPPLE: @@ -3301,157 +3720,39 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) #endif } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) { - anv_batch_emit(batch, GENX(3DSTATE_VF), vf) { -#if GFX_VERx10 >= 125 - vf.GeometryDistributionEnable = - device->physical->instance->enable_vf_distribution; -#endif - vf.ComponentPackingEnable = - device->physical->instance->vf_component_packing; - SET(vf, vf, IndexedDrawCutIndexEnable); - SET(vf, vf, CutIndex); - } - } + if (IS_DIRTY(VF)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VF), vf); #if GFX_VER >= 12 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION)) { - anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) { - SET(pr, pr, ReplicaMask); - SET(pr, pr, ReplicationCount); - SET_ARRAY(pr, pr, RTAIOffset); - } - } + if (IS_DIRTY(PRIMITIVE_REPLICATION)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr); #endif - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) { - anv_batch_emit(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { - ib.IndexFormat = vk_to_intel_index_type(gfx->index_type); - ib.MOCS = gfx->index_addr == 0 ? - anv_mocs(cmd_buffer->device, NULL, ISL_SURF_USAGE_INDEX_BUFFER_BIT) : - gfx->index_mocs; -#if GFX_VER >= 12 - ib.L3BypassDisable = true; -#endif - ib.BufferStartingAddress = anv_address_from_u64(gfx->index_addr); - ib.BufferSize = gfx->index_size; - } - } + if (IS_DIRTY(INDEX_BUFFER)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_INDEX_BUFFER), ib); #if GFX_VERx10 >= 125 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) { - anv_batch_emit(batch, GENX(3DSTATE_VFG), vfg) { - /* 192 vertices for TRILIST_ADJ */ - vfg.ListNBatchSizeScale = 0; - /* Batch size of 384 vertices */ - vfg.List3BatchSizeScale = 2; - /* Batch size of 128 vertices */ - vfg.List2BatchSizeScale = 1; - /* Batch size of 128 vertices */ - vfg.List1BatchSizeScale = 2; - /* Batch size of 256 vertices for STRIP topologies */ - vfg.StripBatchSizeScale = 3; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeScale = 1; - /* 192 control points for PATCHLIST_3 */ - vfg.PatchBatchSizeMultiplier = 31; - - SET(vfg, vfg, DistributionGranularity); - SET(vfg, vfg, DistributionMode); - SET(vfg, vfg, GranularityThresholdDisable); - SET(vfg, vfg, ListCutIndexEnable); - } - } + if (IS_DIRTY(VFG)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_VFG), vfg); #endif - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) { + if (IS_DIRTY(SAMPLE_PATTERN)) { genX(emit_sample_pattern)(batch, dyn->ms.sample_locations_enable ? dyn->ms.sample_locations : NULL); } - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) { - anv_batch_emit_merge(batch, GENX(3DSTATE_WM), - pipeline, partial.wm, wm) { - SET(wm, wm, LineStippleEnable); - SET(wm, wm, BarycentricInterpolationMode); - } - } + if (IS_DIRTY(WM)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_WM), wm); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) { - anv_batch_emit(batch, GENX(3DSTATE_PS_BLEND), blend) { - SET(blend, ps_blend, HasWriteableRT); - SET(blend, ps_blend, ColorBufferBlendEnable); - SET(blend, ps_blend, SourceAlphaBlendFactor); - SET(blend, ps_blend, DestinationAlphaBlendFactor); - SET(blend, ps_blend, SourceBlendFactor); - SET(blend, ps_blend, DestinationBlendFactor); - SET(blend, ps_blend, AlphaTestEnable); - SET(blend, ps_blend, IndependentAlphaBlendEnable); - SET(blend, ps_blend, AlphaToCoverageEnable); - } - } + if (IS_DIRTY(PS_BLEND)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_PS_BLEND), ps_blend); - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) { - const uint32_t num_dwords = GENX(BLEND_STATE_length) + - GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; - hw_state->blend.state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - num_dwords * 4, - 64); - - uint32_t *dws = hw_state->blend.state.map; - - struct GENX(BLEND_STATE) blend_state = { - INIT(blend, AlphaToCoverageEnable), - INIT(blend, AlphaToOneEnable), - INIT(blend, IndependentAlphaBlendEnable), - INIT(blend, ColorDitherEnable), - }; - GENX(BLEND_STATE_pack)(NULL, dws, &blend_state); - - /* Jump to blend entries. */ - dws += GENX(BLEND_STATE_length); - for (uint32_t i = 0; i < MAX_RTS; i++) { - struct GENX(BLEND_STATE_ENTRY) entry = { - INIT(blend.rts[i], WriteDisableAlpha), - INIT(blend.rts[i], WriteDisableRed), - INIT(blend.rts[i], WriteDisableGreen), - INIT(blend.rts[i], WriteDisableBlue), - INIT(blend.rts[i], LogicOpFunction), - INIT(blend.rts[i], LogicOpEnable), - INIT(blend.rts[i], ColorBufferBlendEnable), - INIT(blend.rts[i], ColorClampRange), -#if GFX_VER >= 30 - INIT(blend.rts[i], SimpleFloatBlendEnable), -#endif - INIT(blend.rts[i], PreBlendColorClampEnable), - INIT(blend.rts[i], PostBlendColorClampEnable), - INIT(blend.rts[i], SourceBlendFactor), - INIT(blend.rts[i], DestinationBlendFactor), - INIT(blend.rts[i], ColorBlendFunction), - INIT(blend.rts[i], SourceAlphaBlendFactor), - INIT(blend.rts[i], DestinationAlphaBlendFactor), - INIT(blend.rts[i], AlphaBlendFunction), - }; - - GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); - dws += GENX(BLEND_STATE_ENTRY_length); - } - - /* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR); - } - - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) { - anv_batch_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { - bsp.BlendStatePointer = hw_state->blend.state.offset; - bsp.BlendStatePointerValid = true; - } - } + if (IS_DIRTY(BLEND_STATE)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), blend_state); #if INTEL_WA_18019816803_GFX_VER - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) { + if (IS_DIRTY(WA_18019816803)) { genx_batch_emit_pipe_control(batch, device->info, cmd_buffer->state.current_pipeline, ANV_PIPE_PSS_STALL_SYNC_BIT); @@ -3459,36 +3760,29 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer) #endif #if INTEL_WA_14018283232_GFX_VER - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_14018283232)) + if (IS_DIRTY(WA_14018283232)) genX(batch_emit_wa_14018283232)(batch); #endif #if GFX_VER == 9 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX)) + if (IS_DIRTY(PMA_FIX)) genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix); #endif #if GFX_VERx10 >= 125 - if (hw_state->use_tbimr && - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) { - anv_batch_emit(batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO), - tbimr) { - SET(tbimr, tbimr, TileRectangleHeight); - SET(tbimr, tbimr, TileRectangleWidth); - SET(tbimr, tbimr, VerticalTileCount); - SET(tbimr, tbimr, HorizontalTileCount); - SET(tbimr, tbimr, TBIMRBatchSize); - SET(tbimr, tbimr, TileBoxCheck); - } - } + if (hw_state->use_tbimr && IS_DIRTY(TBIMR_TILE_PASS_INFO)) + anv_batch_emit_gfx(batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO), tbimr); #endif +#undef anv_batch_emit_gfx +#undef anv_batch_emit_gfx_variable #undef INIT #undef SET #undef SET_ARRAY +#undef IS_DIRTY #undef DEBUG_SHADER_HASH - BITSET_ZERO(hw_state->dirty); + BITSET_ZERO(hw_state->emit_dirty); } /** @@ -3505,7 +3799,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state; if (INTEL_DEBUG(DEBUG_REEMIT)) { - BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty, + BITSET_OR(gfx->dyn_state.emit_dirty, + gfx->dyn_state.emit_dirty, device->gfx_dirty_state); } @@ -3522,17 +3817,17 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) * also takes care of Wa_14016820455 which requires SF_CLIP to be * reprogrammed whenever 3DSTATE_CLIP::ClipEnable is enabled. */ - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) || - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) { - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR); + if (BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) || + BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_CC)) { + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_CC); } /* Wa_16012775297 - Emit dummy VF statistics before each 3DSTATE_VF. */ #if INTEL_WA_16012775297_GFX_VER if (intel_needs_workaround(device->info, 16012775297) && - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); + BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_VF)) + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_STATISTICS); #endif /* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit @@ -3540,8 +3835,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) */ if (intel_needs_workaround(device->info, 16011773973) && pipeline->uses_xfb && - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) { - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); + BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_SO_DECL_LIST)) { + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT); } #if INTEL_WA_18038825448_GFX_VER @@ -3557,40 +3852,39 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781 */ #if GFX_VER == 11 - if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE); + if (BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_BLEND_STATE)) + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MULTISAMPLE); #endif /* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */ if (intel_needs_workaround(device->info, 18020335297) && - (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) || - BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) && + BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_CC) && cmd_buffer->state.gfx.viewport_set) { /* For mesh, we implement the WA using CS stall. This is for * simplicity and takes care of possible interaction with Wa_16014390852. */ - if (anv_pipeline_is_mesh(pipeline)) { + if (anv_gfx_has_stage(gfx, MESA_SHADER_MESH)) { genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT); } else { /* Mask off all instructions that we program. */ - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VFG); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VF); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_RASTER); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VF_STATISTICS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS_2); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_CLIP); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VERTEX_INPUT); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VF_TOPOLOGY); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE); - BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_VS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_GS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_HS); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_TE); + BITSET_CLEAR(hw_state->emit_dirty, ANV_GFX_STATE_DS); cmd_buffer_gfx_state_emission(cmd_buffer); @@ -3599,23 +3893,23 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) /* Dirty all emitted WA state to make sure that current real * state is restored. */ - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VFG); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_RASTER); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_STATISTICS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS_2); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_CLIP); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VERTEX_INPUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_TOPOLOGY); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_GS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_HS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_DS); } } diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index b665c511d4f..14f3828e8dc 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -333,33 +333,33 @@ genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state) genX(cmd_buffer_ensure_wa_14018283232)(state->cmd_buffer, false); #endif - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_URB); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_URB); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_STATISTICS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_TOPOLOGY); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VERTEX_INPUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS); #if GFX_VER >= 11 - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS_2); #endif #if GFX_VER >= 12 - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); #endif - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SO_DECL_LIST); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SAMPLE_MASK); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MULTISAMPLE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SBE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_HS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_DS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_GS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS); if (state->cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) { - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MESH_CONTROL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TASK_CONTROL); } state->cmd_buffer->state.gfx.dirty |= diff --git a/src/intel/vulkan/genX_simple_shader.c b/src/intel/vulkan/genX_simple_shader.c index 3c3f95af6f4..fb39d3fce97 100644 --- a/src/intel/vulkan/genX_simple_shader.c +++ b/src/intel/vulkan/genX_simple_shader.c @@ -346,40 +346,40 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state) struct anv_gfx_dynamic_state *hw_state = &state->cmd_buffer->state.gfx.dyn_state; - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_URB); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_URB); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_STATISTICS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_TOPOLOGY); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VERTEX_INPUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS); #if GFX_VER >= 11 - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VF_SGVS_2); #endif #if GFX_VER >= 12 - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION); #endif - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SF); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SBE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_BLEND); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VIEWPORT_CC); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_CLIP); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_RASTER); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SAMPLE_MASK); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MULTISAMPLE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_DEPTH_BOUNDS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WM); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SF); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_SBE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_VS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_HS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_DS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TE); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_GS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS_EXTRA); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_PS_BLEND); if (device->vk.enabled_extensions.EXT_mesh_shader) { - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL); - BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_MESH_CONTROL); + BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_TASK_CONTROL); } /* Update urb config after simple shader. */