anv: switch over to runtime pipelines

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
This commit is contained in:
Lionel Landwerlin 2024-08-08 14:42:07 +03:00 committed by Marge Bot
parent 4d9dd5c3a2
commit e76ed91d3f
13 changed files with 697 additions and 529 deletions

View file

@ -5,6 +5,8 @@
#include "anv_private.h"
#include "vk_common_entrypoints.h"
#include "compiler/nir/nir_builder.h"
static void
@ -293,7 +295,8 @@ astc_emu_flush_denorm_slice(struct anv_cmd_buffer *cmd_buffer,
set_writes);
VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
vk_common_CmdBindPipeline(cmd_buffer_,
VK_PIPELINE_BIND_POINT_COMPUTE,
astc_emu->pipeline);
VkPushConstantsInfoKHR push_info = {
@ -351,7 +354,9 @@ astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer,
return;
}
anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
vk_common_CmdBindPipeline(cmd_buffer_,
VK_PIPELINE_BIND_POINT_COMPUTE,
pipeline);
struct vk_texcompress_astc_write_descriptor_set writes;
vk_texcompress_astc_fill_write_descriptor_sets(astc_emu->texcompress,

View file

@ -30,6 +30,7 @@
#include "anv_private.h"
#include "anv_measure.h"
#include "vk_common_entrypoints.h"
#include "vk_util.h"
/** \file anv_cmd_buffer.c
@ -435,17 +436,16 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
}
static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
anv_cmd_buffer_set_rt_query_buffer(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipeline_state,
struct anv_pipeline *pipeline,
uint32_t ray_queries,
VkShaderStageFlags stages)
{
struct anv_device *device = cmd_buffer->device;
uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
uint64_t ray_shadow_size =
align64(brw_rt_ray_queries_shadow_stacks_size(device->info,
pipeline->ray_queries),
align64(brw_rt_ray_queries_shadow_stacks_size(device->info, ray_queries),
4096);
if (ray_shadow_size > 0 &&
(!cmd_buffer->state.ray_query_shadow_bo ||
@ -497,112 +497,6 @@ anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
pipeline_state->push_constants_data_dirty = true;
}
/**
* This function compute changes between 2 pipelines and flags the dirty HW
* state appropriately.
*/
static void
anv_cmd_buffer_flush_pipeline_hw_state(struct anv_cmd_buffer *cmd_buffer,
struct anv_graphics_pipeline *old_pipeline,
struct anv_graphics_pipeline *new_pipeline)
{
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
#define diff_fix_state(bit, name) \
do { \
/* Fixed states should always have matching sizes */ \
assert(old_pipeline == NULL || \
old_pipeline->name.len == new_pipeline->name.len); \
/* Don't bother memcmp if the state is already dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \
(old_pipeline == NULL || \
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
&new_pipeline->batch_data[new_pipeline->name.offset], \
4 * new_pipeline->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_var_state(bit, name) \
do { \
/* Don't bother memcmp if the state is already dirty */ \
/* Also if the new state is empty, avoid marking dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \
new_pipeline->name.len != 0 && \
(old_pipeline == NULL || \
old_pipeline->name.len != new_pipeline->name.len || \
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
&new_pipeline->batch_data[new_pipeline->name.offset], \
4 * new_pipeline->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define assert_identical(bit, name) \
do { \
/* Fixed states should always have matching sizes */ \
assert(old_pipeline == NULL || \
old_pipeline->name.len == new_pipeline->name.len); \
assert(old_pipeline == NULL || \
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
&new_pipeline->batch_data[new_pipeline->name.offset], \
4 * new_pipeline->name.len) == 0); \
} while (0)
#define assert_empty(name) assert(new_pipeline->name.len == 0)
/* Compare all states, including partial packed ones, the dynamic part is
* left at 0 but the static part could still change.
*
* We avoid comparing protected packets as all the fields but the scratch
* surface are identical. we just need to select the right one at emission.
*/
diff_fix_state(VF_SGVS, final.vf_sgvs);
if (cmd_buffer->device->info->ver >= 11)
diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);
diff_fix_state(VF_COMPONENT_PACKING, final.vf_component_packing);
diff_fix_state(VS, final.vs);
diff_fix_state(HS, final.hs);
diff_fix_state(DS, final.ds);
diff_fix_state(WM, partial.wm);
diff_fix_state(STREAMOUT, partial.so);
diff_fix_state(GS, partial.gs);
diff_fix_state(TE, partial.te);
diff_fix_state(PS, partial.ps);
diff_fix_state(PS_EXTRA, partial.ps_extra);
if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
diff_fix_state(TASK_CONTROL, final.task_control);
diff_fix_state(TASK_SHADER, final.task_shader);
diff_fix_state(TASK_REDISTRIB, final.task_redistrib);
diff_fix_state(MESH_CONTROL, final.mesh_control);
diff_fix_state(MESH_SHADER, final.mesh_shader);
diff_fix_state(MESH_DISTRIB, final.mesh_distrib);
diff_fix_state(CLIP_MESH, final.clip_mesh);
} else {
assert_empty(final.task_control);
assert_empty(final.task_shader);
assert_empty(final.task_redistrib);
assert_empty(final.mesh_control);
assert_empty(final.mesh_shader);
assert_empty(final.mesh_distrib);
assert_empty(final.clip_mesh);
}
/* States that can vary in length */
diff_var_state(VF_SGVS_INSTANCING, final.vf_sgvs_instancing);
diff_var_state(SO_DECL_LIST, final.so_decl_list);
#undef diff_fix_state
#undef diff_var_state
#undef assert_identical
#undef assert_empty
/* We're not diffing the following :
* - anv_graphics_pipeline::vertex_input_data
* - anv_graphics_pipeline::final::vf_instancing
*
* since they are tracked by the runtime.
*/
}
static enum anv_cmd_dirty_bits
get_pipeline_dirty_stages(struct anv_device *device,
struct anv_graphics_pipeline *old_pipeline,
@ -636,7 +530,7 @@ get_pipeline_dirty_stages(struct anv_device *device,
static void
update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
struct anv_shader_bin **shaders,
struct anv_shader ** const shaders,
uint32_t shader_count)
{
state->push_buffer_stages = 0;
@ -646,7 +540,7 @@ update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
if (shaders[i] == NULL)
continue;
VkShaderStageFlags stage = mesa_to_vk_shader_stage(shaders[i]->stage);
VkShaderStageFlags stage = mesa_to_vk_shader_stage(shaders[i]->vk.stage);
if (shaders[i]->push_desc_info.used_descriptors)
state->push_descriptor_stages |= stage;
@ -656,145 +550,6 @@ update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
}
}
void anv_CmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
struct anv_cmd_pipeline_state *state;
VkShaderStageFlags stages = 0;
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE: {
if (cmd_buffer->state.compute.base.pipeline == pipeline)
return;
struct anv_compute_pipeline *compute_pipeline =
anv_pipeline_to_compute(pipeline);
cmd_buffer->state.compute.shader = compute_pipeline->cs;
cmd_buffer->state.compute.pipeline_dirty = true;
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
&compute_pipeline->cs->bind_map);
state = &cmd_buffer->state.compute.base;
stages = VK_SHADER_STAGE_COMPUTE_BIT;
update_push_descriptor_flags(state, &compute_pipeline->cs, 1);
break;
}
case VK_PIPELINE_BIND_POINT_GRAPHICS: {
struct anv_graphics_pipeline *new_pipeline =
anv_pipeline_to_graphics(pipeline);
/* Apply the non dynamic state from the pipeline */
vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
&new_pipeline->dynamic_state);
if (cmd_buffer->state.gfx.base.pipeline == pipeline)
return;
struct anv_graphics_pipeline *old_pipeline =
cmd_buffer->state.gfx.base.pipeline == NULL ? NULL :
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
cmd_buffer->state.gfx.dirty |=
get_pipeline_dirty_stages(cmd_buffer->device,
old_pipeline, new_pipeline);
STATIC_ASSERT(sizeof(cmd_buffer->state.gfx.shaders) ==
sizeof(new_pipeline->base.shaders));
memcpy(cmd_buffer->state.gfx.shaders,
new_pipeline->base.shaders,
sizeof(cmd_buffer->state.gfx.shaders));
cmd_buffer->state.gfx.active_stages = pipeline->active_stages;
anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
set_dirty_for_bind_map(cmd_buffer, stage,
&new_pipeline->base.shaders[stage]->bind_map);
}
state = &cmd_buffer->state.gfx.base;
stages = new_pipeline->base.base.active_stages;
update_push_descriptor_flags(state,
new_pipeline->base.shaders,
ARRAY_SIZE(new_pipeline->base.shaders));
/* When the pipeline is using independent states and dynamic buffers,
* this will trigger an update of anv_push_constants::dynamic_base_index
* & anv_push_constants::dynamic_offsets.
*/
struct anv_push_constants *push =
&cmd_buffer->state.gfx.base.push_constants;
struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
bool modified = false;
for (uint32_t s = 0; s < layout->num_sets; s++) {
if (layout->set_layouts[s] == NULL)
continue;
assert(layout->dynamic_offset_start[s] < MAX_DYNAMIC_BUFFERS);
if (layout->set_layouts[s]->vk.dynamic_descriptor_count > 0 &&
(push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
layout->dynamic_offset_start[s]) {
push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
push->desc_surface_offsets[s] |= (layout->dynamic_offset_start[s] &
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
modified = true;
}
}
if (modified) {
cmd_buffer->state.push_constants_dirty |= stages;
state->push_constants_data_dirty = true;
}
}
cmd_buffer->state.gfx.vs_source_hash = new_pipeline->vs_source_hash;
cmd_buffer->state.gfx.fs_source_hash = new_pipeline->fs_source_hash;
cmd_buffer->state.gfx.instance_multiplier = new_pipeline->instance_multiplier;
anv_cmd_buffer_flush_pipeline_hw_state(cmd_buffer, old_pipeline, new_pipeline);
break;
}
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
if (cmd_buffer->state.rt.base.pipeline == pipeline)
return;
cmd_buffer->state.rt.pipeline_dirty = true;
struct anv_ray_tracing_pipeline *rt_pipeline =
anv_pipeline_to_ray_tracing(pipeline);
if (rt_pipeline->stack_size > 0) {
anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
rt_pipeline->stack_size);
}
state = &cmd_buffer->state.rt.base;
state->push_buffer_stages = pipeline->use_push_descriptor_buffer;
state->push_descriptor_stages = pipeline->use_push_descriptor_buffer;
state->push_descriptor_index = pipeline->layout.push_descriptor_set_index;
break;
}
default:
UNREACHABLE("invalid bind point");
break;
}
state->pipeline = pipeline;
if (pipeline->ray_queries > 0)
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
}
static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
@ -1519,20 +1274,37 @@ void anv_CmdPushDescriptorSetWithTemplate2KHR(
NULL, NULL);
}
void anv_CmdSetRayTracingPipelineStackSizeKHR(
VkCommandBuffer commandBuffer,
uint32_t pipelineStackSize)
void
anv_cmd_buffer_set_rt_state(struct vk_command_buffer *vk_cmd_buffer,
VkDeviceSize scratch_size,
uint32_t ray_queries)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
rt->scratch_size = MAX2(rt->scratch_size, scratch_size);
if (ray_queries > 0) {
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &rt->base, ray_queries,
ANV_RT_STAGE_BITS);
}
}
void
anv_cmd_buffer_set_stack_size(struct vk_command_buffer *vk_cmd_buffer,
VkDeviceSize stack_size)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
uint32_t stack_ids_per_dss = 2048; /* TODO */
unsigned stack_size_log2 = util_logbase2_ceil(pipelineStackSize);
unsigned stack_size_log2 = util_logbase2_ceil(stack_size);
if (stack_size_log2 < 10)
stack_size_log2 = 10;
@ -1585,7 +1357,7 @@ anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
&cmd_buffer->state.compute.base;
if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
state->pipeline = pipe_state->pipeline;
state->shader = &cmd_buffer->state.compute.shader->vk;
if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
state->descriptor_set[0] = pipe_state->descriptors[0];
@ -1614,11 +1386,11 @@ anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
if (state->pipeline) {
anv_CmdBindPipeline(cmd_buffer_, bind_point,
anv_pipeline_to_handle(state->pipeline));
if (state->shader) {
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
anv_cmd_buffer_bind_shaders(&cmd_buffer->vk, 1, &stage, &state->shader);
} else {
pipe_state->pipeline = NULL;
cmd_buffer->state.compute.shader = NULL;
}
}
@ -1693,3 +1465,285 @@ anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,
anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
(commandBuffer, invocations_x, invocations_y, invocations_z);
}
static void
bind_compute_shader(struct anv_cmd_buffer *cmd_buffer,
struct anv_shader *shader)
{
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
cmd_buffer->state.compute.shader = shader;
if (shader == NULL)
return;
cmd_buffer->state.compute.pipeline_dirty = true;
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE, &shader->bind_map);
update_push_descriptor_flags(&comp_state->base,
&cmd_buffer->state.compute.shader, 1);
if (shader->vk.ray_queries > 0) {
assert(cmd_buffer->device->info->verx10 >= 125);
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &comp_state->base,
shader->vk.ray_queries,
VK_SHADER_STAGE_COMPUTE_BIT);
}
}
static void
bind_graphics_shaders(struct anv_cmd_buffer *cmd_buffer,
struct anv_shader *new_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT])
{
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
uint32_t ray_queries = 0;
static const enum anv_cmd_dirty_bits mesa_stage_to_dirty_bit[] = {
[MESA_SHADER_VERTEX] = ANV_CMD_DIRTY_VS,
[MESA_SHADER_TESS_CTRL] = ANV_CMD_DIRTY_HS,
[MESA_SHADER_TESS_EVAL] = ANV_CMD_DIRTY_DS,
[MESA_SHADER_GEOMETRY] = ANV_CMD_DIRTY_GS,
[MESA_SHADER_TASK] = ANV_CMD_DIRTY_TASK,
[MESA_SHADER_MESH] = ANV_CMD_DIRTY_MESH,
[MESA_SHADER_FRAGMENT] = ANV_CMD_DIRTY_PS,
};
gfx->active_stages = 0;
gfx->instance_multiplier = 0;
mesa_shader_stage new_streamout_stage = -1;
/* Find the last pre-rasterization stage */
for (uint32_t i = 0; i < ANV_GRAPHICS_SHADER_STAGE_COUNT; i++) {
mesa_shader_stage s = ANV_GRAPHICS_SHADER_STAGE_COUNT - i - 1;
if (new_shaders[s] == NULL)
continue;
assert(gfx->instance_multiplier == 0 ||
gfx->instance_multiplier == new_shaders[s]->instance_multiplier);
gfx->active_stages |= mesa_to_vk_shader_stage(s);
gfx->instance_multiplier = new_shaders[s]->instance_multiplier;
if (s == MESA_SHADER_FRAGMENT ||
s == MESA_SHADER_TASK ||
s == MESA_SHADER_TESS_CTRL)
continue;
new_streamout_stage = MAX2(new_streamout_stage, s);
}
for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
struct anv_shader *shader = new_shaders[s];
if (shader != NULL) {
gfx->active_stages |= mesa_to_vk_shader_stage(s);
ray_queries = MAX2(ray_queries, shader->vk.ray_queries);
if (gfx->shaders[s] != shader)
set_dirty_for_bind_map(cmd_buffer, s, &shader->bind_map);
}
if (gfx->shaders[s] != shader)
gfx->dirty |= mesa_stage_to_dirty_bit[s];
else
continue;
#define diff_fix_state(bit, name) \
do { \
/* Fixed states should always have matching sizes */ \
assert(gfx->shaders[s] == NULL || \
gfx->shaders[s]->name.len == shader->name.len); \
/* Don't bother memcmp if the state is already dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, \
ANV_GFX_STATE_##bit) && \
(gfx->shaders[s] == NULL || \
memcmp(&gfx->shaders[s]->cmd_data[ \
gfx->shaders[s]->name.offset], \
&shader->cmd_data[ \
shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_var_state(bit, name) \
do { \
/* Don't bother memcmp if the state is already dirty */ \
/* Also if the new state is empty, avoid marking dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, \
ANV_GFX_STATE_##bit) && \
shader->name.len != 0 && \
(gfx->shaders[s] == NULL || \
gfx->shaders[s]->name.len != shader->name.len || \
memcmp(&gfx->shaders[s]->cmd_data[ \
gfx->shaders[s]->name.offset], \
&shader->cmd_data[shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_fix_state_stage(bit, name, old_stage) \
do { \
/* Fixed states should always have matching sizes */ \
assert(old_stage == MESA_SHADER_NONE || \
gfx->shaders[old_stage] == NULL || \
gfx->shaders[old_stage]->name.len == shader->name.len); \
/* Don't bother memcmp if the state is already dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, \
ANV_GFX_STATE_##bit) && \
(old_stage == MESA_SHADER_NONE || \
gfx->shaders[old_stage] == NULL || \
memcmp(&gfx->shaders[old_stage]->cmd_data[ \
gfx->shaders[old_stage]->name.offset], \
&shader->cmd_data[ \
shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
#define diff_var_state_stage(bit, name, old_stage) \
do { \
/* Don't bother memcmp if the state is already dirty */ \
/* Also if the new state is empty, avoid marking dirty */ \
if (!BITSET_TEST(hw_state->pack_dirty, \
ANV_GFX_STATE_##bit) && \
shader->name.len != 0 && \
(gfx->shaders[old_stage] == NULL || \
gfx->shaders[old_stage]->name.len != shader->name.len || \
memcmp(&gfx->shaders[old_stage]->cmd_data[ \
gfx->shaders[old_stage]->name.offset], \
&shader->cmd_data[shader->name.offset], \
4 * shader->name.len) != 0)) \
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
} while (0)
switch (s) {
case MESA_SHADER_VERTEX:
if (shader != NULL) {
diff_fix_state(VS, vs.vs);
diff_fix_state(VF_SGVS, vs.vf_sgvs);
if (cmd_buffer->device->info->ver >= 11)
diff_fix_state(VF_SGVS_2, vs.vf_sgvs_2);
diff_fix_state(VF_COMPONENT_PACKING, vs.vf_component_packing);
diff_var_state(VF_SGVS_INSTANCING, vs.vf_sgvs_instancing);
gfx->vs_source_hash = shader->prog_data->source_hash;
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_VS);
}
break;
case MESA_SHADER_TESS_CTRL:
if (shader != NULL)
diff_fix_state(HS, hs.hs);
else
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_HS);
break;
case MESA_SHADER_TESS_EVAL:
if (shader != NULL) {
diff_fix_state(DS, ds.ds);
diff_fix_state(TE, ds.te);
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_DS);
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_TE);
}
break;
case MESA_SHADER_GEOMETRY:
if (shader != NULL)
diff_fix_state(GS, gs.gs);
else
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_GS);
break;
case MESA_SHADER_MESH:
if (shader != NULL) {
diff_fix_state(MESH_CONTROL, ms.control);
diff_fix_state(MESH_SHADER, ms.shader);
diff_fix_state(MESH_DISTRIB, ms.distrib);
diff_fix_state(CLIP_MESH, ms.clip);
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_MESH_CONTROL);
}
break;
case MESA_SHADER_TASK:
if (shader != NULL) {
diff_fix_state(TASK_CONTROL, ts.control);
diff_fix_state(TASK_SHADER, ts.shader);
diff_fix_state(TASK_REDISTRIB, ts.redistrib);
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_TASK_CONTROL);
}
break;
case MESA_SHADER_FRAGMENT:
if (shader != NULL) {
diff_fix_state(WM, ps.wm);
diff_fix_state(PS, ps.ps);
diff_fix_state(PS_EXTRA, ps.ps_extra);
gfx->fs_source_hash = shader->prog_data->source_hash;
} else {
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_PS_EXTRA);
}
break;
default:
UNREACHABLE("Invalid shader stage");
}
/* Only diff those field on the streamout stage */
if (s == new_streamout_stage) {
diff_fix_state_stage(STREAMOUT, so, gfx->streamout_stage);
diff_var_state_stage(SO_DECL_LIST, so_decl_list, gfx->streamout_stage);
}
gfx->shaders[s] = shader;
}
gfx->streamout_stage = new_streamout_stage;
#undef diff_fix_state
#undef diff_var_state
#undef diff_fix_state_stage
#undef diff_var_state_stage
update_push_descriptor_flags(&gfx->base,
cmd_buffer->state.gfx.shaders,
ARRAY_SIZE(cmd_buffer->state.gfx.shaders));
if (ray_queries > 0) {
assert(cmd_buffer->device->info->verx10 >= 125);
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &gfx->base, ray_queries,
cmd_buffer->state.gfx.active_stages);
}
}
void
anv_cmd_buffer_bind_shaders(struct vk_command_buffer *vk_cmd_buffer,
uint32_t stage_count,
const mesa_shader_stage *stages,
struct vk_shader ** const vk_shaders)
{
struct anv_shader ** const shaders = (struct anv_shader ** const)vk_shaders;
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
/* Append any scratch surface used by the shaders */
for (uint32_t i = 0; i < stage_count; i++) {
if (shaders[i] != NULL) {
anv_reloc_list_append(cmd_buffer->batch.relocs,
&shaders[i]->relocs);
}
}
struct anv_shader *cs_shader = cmd_buffer->state.compute.shader;
struct anv_shader *gfx_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
memcpy(gfx_shaders, cmd_buffer->state.gfx.shaders, sizeof(gfx_shaders));
for (uint32_t i = 0; i < stage_count; i++) {
if (mesa_shader_stage_is_compute(stages[i]))
cs_shader = shaders[i];
else
gfx_shaders[stages[i]] = shaders[i];
}
if (cs_shader != cmd_buffer->state.compute.shader)
bind_compute_shader(cmd_buffer, cs_shader);
if (memcmp(gfx_shaders, cmd_buffer->state.gfx.shaders, sizeof(gfx_shaders)))
bind_graphics_shaders(cmd_buffer, gfx_shaders);
}

View file

@ -31,6 +31,7 @@
#include "anv_private.h"
#include "anv_measure.h"
#include "anv_shader.h"
#include "anv_slab_bo.h"
#include "util/u_debug.h"
#include "util/os_file.h"
@ -380,6 +381,8 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
goto fail_alloc;
device->vk.shader_ops = &anv_device_shader_ops;
if (INTEL_DEBUG(DEBUG_BATCH) || INTEL_DEBUG(DEBUG_BATCH_STATS)) {
for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
struct intel_batch_decode_ctx *decoder = &device->decoder[i];

View file

@ -223,7 +223,7 @@ uint32_t
genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state,
const VkShaderStageFlags dirty,
const struct anv_shader_bin **shaders,
const struct anv_shader **shaders,
uint32_t num_shaders);
void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer);

View file

@ -173,17 +173,29 @@ anv_pipeline_finish(struct anv_pipeline *pipeline,
vk_object_base_finish(&pipeline->vk.base);
}
VKAPI_ATTR void VKAPI_CALL
vk_common_DestroyPipeline(VkDevice _device,
VkPipeline _pipeline,
const VkAllocationCallbacks *pAllocator);
void anv_DestroyPipeline(
VkDevice _device,
VkPipeline _pipeline,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
VK_FROM_HANDLE(vk_pipeline, vk_pipeline, _pipeline);
if (!pipeline)
if (!vk_pipeline)
return;
if (vk_pipeline->ops != NULL) {
vk_common_DestroyPipeline(_device, _pipeline, pAllocator);
return;
}
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
ANV_RMV(resource_destroy, device, pipeline);
switch (pipeline->type) {
@ -2851,6 +2863,7 @@ anv_compute_pipeline_create(struct anv_device *device,
return pipeline->base.batch.status;
}
#if 0
VkResult anv_CreateComputePipelines(
VkDevice _device,
VkPipelineCache pipelineCache,
@ -2885,6 +2898,7 @@ VkResult anv_CreateComputePipelines(
return result;
}
#endif
static uint32_t
get_vs_input_elements(const struct brw_vs_prog_data *vs_prog_data)
@ -3343,6 +3357,7 @@ anv_graphics_pipeline_create(struct anv_device *device,
return pipeline->base.base.batch.status;
}
#if 0
VkResult anv_CreateGraphicsPipelines(
VkDevice _device,
VkPipelineCache pipelineCache,
@ -3388,6 +3403,7 @@ VkResult anv_CreateGraphicsPipelines(
return result;
}
#endif
static bool
should_remat_cb(nir_instr *instr, void *data)
@ -4083,6 +4099,7 @@ anv_ray_tracing_pipeline_create(
return pipeline->base.batch.status;
}
#if 0
VkResult
anv_CreateRayTracingPipelinesKHR(
VkDevice _device,
@ -4491,3 +4508,4 @@ anv_GetRayTracingShaderGroupStackSizeKHR(
return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
}
#endif

View file

@ -1224,7 +1224,6 @@ struct anv_shader {
struct anv_state kernel;
const struct brw_stage_prog_data *prog_data;
uint32_t prog_data_size;
struct brw_compile_stats stats[3];
uint32_t num_stats;
@ -2186,6 +2185,11 @@ struct anv_gfx_dynamic_state {
uint32_t PrimitiveTopologyType;
} vft;
/* 3DSTATE_VS */
struct {
bool VertexCacheDisable;
} vs;
/* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
struct {
uint32_t count;
@ -4422,7 +4426,7 @@ struct anv_cmd_graphics_state {
struct anv_cmd_pipeline_state base;
/* Shaders bound */
struct anv_shader_bin *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
struct anv_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
/* Bitfield of valid entries in the shaders array */
VkShaderStageFlags active_stages;
@ -4436,6 +4440,9 @@ struct anv_cmd_graphics_state {
bool kill_pixel;
bool uses_xfb;
/* Shader stage in base.shaders[] responsible for streamout */
mesa_shader_stage streamout_stage;
/* Render pass information */
VkRenderingFlags rendering_flags;
VkRect2D render_area;
@ -4530,7 +4537,7 @@ struct anv_cmd_graphics_state {
struct anv_cmd_compute_state {
struct anv_cmd_pipeline_state base;
struct anv_shader_bin *shader;
struct anv_shader *shader;
bool pipeline_dirty;
@ -4551,6 +4558,8 @@ struct anv_cmd_ray_tracing_state {
struct brw_rt_scratch_layout layout;
} scratch;
VkDeviceSize scratch_size;
uint32_t debug_marker_count;
uint32_t num_tlas;
uint32_t num_blas;
@ -5022,6 +5031,12 @@ void
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
enum anv_pipe_bits flushed_bits);
void
anv_cmd_buffer_bind_shaders(struct vk_command_buffer *cmd_buffer,
uint32_t stage_count,
const mesa_shader_stage *stages,
struct vk_shader ** const shaders);
/**
* A allocation tied to a command buffer.
*
@ -5083,7 +5098,7 @@ enum anv_cmd_saved_state_flags {
struct anv_cmd_saved_state {
uint32_t flags;
struct anv_pipeline *pipeline;
struct vk_shader *shader;
struct anv_descriptor_set *descriptor_set[MAX_SETS];
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
};
@ -5444,7 +5459,6 @@ struct anv_graphics_pipeline {
4 * _cmd_state->len); \
} while (0)
struct anv_compute_pipeline {
struct anv_pipeline base;
@ -6484,6 +6498,15 @@ anv_cmd_flush_buffer_write_cp(VkCommandBuffer cmd_buffer);
VkResult
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
void
anv_cmd_buffer_set_rt_state(struct vk_command_buffer *vk_cmd_buffer,
VkDeviceSize scratch_size,
uint32_t ray_queries);
void
anv_cmd_buffer_set_stack_size(struct vk_command_buffer *vk_cmd_buffer,
VkDeviceSize stack_size);
bool
anv_can_hiz_clear_image(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,

View file

@ -1886,5 +1886,8 @@ struct vk_device_shader_ops anv_device_shader_ops = {
.deserialize = anv_shader_deserialize,
.write_rt_shader_group = anv_write_rt_shader_group,
.write_rt_shader_group_replay_handle = anv_write_rt_shader_group_replay_handle,
.cmd_bind_shaders = anv_cmd_buffer_bind_shaders,
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
.cmd_set_rt_state = anv_cmd_buffer_set_rt_state,
.cmd_set_stack_size = anv_cmd_buffer_set_stack_size,
};

View file

@ -2121,7 +2121,7 @@ emit_direct_descriptor_binding_table_entry(struct anv_cmd_buffer *cmd_buffer,
static VkResult
emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state,
const struct anv_shader_bin *shader,
const struct anv_shader *shader,
struct anv_state *bt_state)
{
uint32_t state_offset;
@ -2153,7 +2153,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
/* Color attachment binding */
assert(shader->stage == MESA_SHADER_FRAGMENT);
assert(shader->vk.stage == MESA_SHADER_FRAGMENT);
uint32_t index = binding->index < MAX_RTS ?
cmd_buffer->state.gfx.color_output_mapping[binding->index] :
binding->index;
@ -2268,7 +2268,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
static VkResult
emit_samplers(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state,
const struct anv_shader_bin *shader,
const struct anv_shader *shader,
struct anv_state *state)
{
const struct anv_pipeline_bind_map *map = &shader->bind_map;
@ -2312,7 +2312,7 @@ uint32_t
genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state,
const VkShaderStageFlags dirty,
const struct anv_shader_bin **shaders,
const struct anv_shader **shaders,
uint32_t num_shaders)
{
VkShaderStageFlags flushed = 0;
@ -2322,7 +2322,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
if (!shaders[i])
continue;
mesa_shader_stage stage = shaders[i]->stage;
mesa_shader_stage stage = shaders[i]->vk.stage;
VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(stage);
if ((vk_stage & dirty) == 0)
continue;
@ -2361,7 +2361,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
if (!shaders[i])
continue;
mesa_shader_stage stage = shaders[i]->stage;
mesa_shader_stage stage = shaders[i]->vk.stage;
result = emit_samplers(cmd_buffer, pipe_state, shaders[i],
&cmd_buffer->state.samplers[stage]);

View file

@ -105,13 +105,11 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const UNUSED struct intel_device_info *devinfo = cmd_buffer->device->info;
struct anv_compute_pipeline *pipeline =
anv_pipeline_to_compute(comp_state->base.pipeline);
assert(comp_state->shader);
genX(cmd_buffer_config_l3)(cmd_buffer,
pipeline->cs->prog_data->total_shared > 0 ?
comp_state->shader->prog_data->total_shared > 0 ?
device->l3_slm_config : device->l3_config);
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
@ -127,7 +125,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
*/
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
if (cmd_buffer->state.compute.pipeline_dirty) {
if (comp_state->pipeline_dirty) {
#if GFX_VERx10 < 125
/* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
*
@ -143,13 +141,28 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
#endif
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
#define anv_batch_emit_cs(batch, cmd, field) ({ \
void *__dst = anv_batch_emit_dwords( \
batch, __anv_cmd_length(cmd)); \
memcpy(__dst, \
&comp_state->shader->cmd_data[ \
comp_state->shader->field.offset], \
4 * __anv_cmd_length(cmd)); \
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
__dst, __anv_cmd_length(cmd) * 4)); \
__dst; \
})
#if GFX_VERx10 >= 125
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state);
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, prog_data->base.total_scratch);
#else
anv_batch_emit_cs(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), cs.gfx9.vfe);
#endif
#undef anv_batch_emit_cs
/* Changing the pipeline affects the push constants layout (different
* amount of cross/per thread allocations). The allocation is also
* bounded to just the amount consummed by the pipeline (see
@ -179,7 +192,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer,
&cmd_buffer->state.compute.base,
VK_SHADER_STAGE_COMPUTE_BIT,
(const struct anv_shader_bin **)&comp_state->shader, 1);
(const struct anv_shader **)&comp_state->shader, 1);
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
#if GFX_VERx10 < 125
@ -194,7 +207,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
struct anv_state state =
anv_cmd_buffer_merge_dynamic(cmd_buffer, iface_desc_data_dw,
pipeline->gfx9.interface_descriptor_data,
comp_state->shader->cs.gfx9.idd,
GENX(INTERFACE_DESCRIPTOR_DATA_length),
64);
@ -439,7 +452,7 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
&cmd_buffer->batch,
GENX(EXECUTE_INDIRECT_DISPATCH_length),
GENX(EXECUTE_INDIRECT_DISPATCH_body_start) / 32,
anv_pipeline_to_compute(comp_state->base.pipeline)->gfx125.compute_walker_body,
comp_state->shader->cs.gfx125.compute_walker_body,
GENX(EXECUTE_INDIRECT_DISPATCH),
.PredicateEnable = predicate,
.MaxCount = 1,
@ -520,7 +533,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
&cmd_buffer->batch,
GENX(COMPUTE_WALKER_length),
GENX(COMPUTE_WALKER_body_start) / 32,
anv_pipeline_to_compute(comp_state->base.pipeline)->gfx125.compute_walker_body,
comp_state->shader->cs.gfx125.compute_walker_body,
GENX(COMPUTE_WALKER),
.IndirectParameterEnable = !anv_address_is_null(indirect_addr),
.PredicateEnable = predicate,
@ -1051,8 +1064,6 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
{
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
struct anv_ray_tracing_pipeline *pipeline =
anv_pipeline_to_ray_tracing(rt->base.pipeline);
if (INTEL_DEBUG(DEBUG_RT_NO_TRACE))
return;
@ -1211,18 +1222,18 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
*/
btd.PerDSSMemoryBackedBufferSize = 6;
btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo };
if (pipeline->base.scratch_size > 0) {
if (rt->scratch_size > 0) {
struct anv_bo *scratch_bo =
anv_scratch_pool_alloc(device,
&device->scratch_pool,
MESA_SHADER_COMPUTE,
pipeline->base.scratch_size);
rt->scratch_size);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
scratch_bo);
uint32_t scratch_surf =
anv_scratch_pool_get_surf(cmd_buffer->device,
&device->scratch_pool,
pipeline->base.scratch_size);
rt->scratch_size);
btd.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT(GFX_VER);
}
#if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436
@ -1234,7 +1245,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
#endif
}
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, pipeline->base.scratch_size);
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, rt->scratch_size);
const struct brw_cs_prog_data *cs_prog_data =
brw_cs_prog_data_const(device->rt_trampoline->prog_data);
@ -1273,7 +1284,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
.ThreadGroupIDZDimension = global_size[2],
.ExecutionMask = 0xff,
.EmitInlineParameter = true,
.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
.PostSync.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
#if GFX_VER >= 30
/* HSD 14016252163 */
.DispatchWalkOrder = cs_prog_data->uses_sampler ? MortonWalk : LinearWalk,

View file

@ -162,7 +162,7 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
static struct anv_address
get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
const struct anv_shader_bin *shader,
const struct anv_shader *shader,
const struct anv_push_range *range)
{
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
@ -242,10 +242,10 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
*/
static uint32_t
get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
const struct anv_shader_bin *shader,
const struct anv_shader *shader,
const struct anv_push_range *range)
{
assert(shader->stage != MESA_SHADER_COMPUTE);
assert(shader->vk.stage != MESA_SHADER_COMPUTE);
const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
switch (range->set) {
case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
@ -443,7 +443,7 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
if (!anv_gfx_has_stage(gfx, stage))
continue;
const struct anv_shader_bin *shader = gfx->shaders[stage];
const struct anv_shader *shader = gfx->shaders[stage];
if (shader->prog_data->robust_ubo_ranges) {
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
struct anv_push_constants *push = &gfx->base.push_constants;
@ -509,7 +509,7 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
struct anv_address buffers[4] = {};
if (anv_gfx_has_stage(gfx, stage)) {
const struct anv_shader_bin *shader = gfx->shaders[stage];
const struct anv_shader *shader = gfx->shaders[stage];
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
/* We have to gather buffer addresses as a second step because the
@ -593,7 +593,7 @@ get_mesh_task_push_addr64(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_graphics_state *gfx,
mesa_shader_stage stage)
{
const struct anv_shader_bin *shader = gfx->shaders[stage];
const struct anv_shader *shader = gfx->shaders[stage];
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
if (bind_map->push_ranges[0].length == 0)
return 0;
@ -645,31 +645,50 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
ALWAYS_INLINE static void
cmd_buffer_maybe_flush_rt_writes(struct anv_cmd_buffer *cmd_buffer,
const struct anv_graphics_pipeline *pipeline)
struct anv_cmd_graphics_state *gfx,
const struct vk_dynamic_graphics_state *dyn)
{
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
if (!anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT))
return;
UNUSED bool need_rt_flush = false;
for (uint32_t rt = 0; rt < pipeline->num_color_outputs; rt++) {
/* No writes going to this render target so it won't affect the RT cache
*/
if (pipeline->color_output_mapping[rt] == ANV_COLOR_OUTPUT_UNUSED)
continue;
/* Count the number of color attachments in the binding table */
const struct anv_pipeline_bind_map *bind_map =
&gfx->shaders[MESA_SHADER_FRAGMENT]->bind_map;
/* No change */
if (cmd_buffer->state.gfx.color_output_mapping[rt] ==
pipeline->color_output_mapping[rt])
continue;
cmd_buffer->state.gfx.color_output_mapping[rt] =
pipeline->color_output_mapping[rt];
need_rt_flush = true;
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
/* Build a map of fragment color output to attachment */
uint8_t rt_to_att[MAX_RTS];
memset(rt_to_att, ANV_COLOR_OUTPUT_DISABLED, MAX_RTS);
for (uint32_t i = 0; i < MAX_RTS; i++) {
if (dyn->cal.color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
rt_to_att[dyn->cal.color_map[i]] = i;
}
/* For each fragment shader output if not unused apply the remapping to
* pipeline->color_output_mapping
*/
UNUSED bool need_rt_flush = false;
for (unsigned rt = 0; rt < MIN2(bind_map->surface_count, MAX_RTS); rt++) {
if (bind_map->surface_to_descriptor[rt].set !=
ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
break;
uint32_t index = bind_map->surface_to_descriptor[rt].index;
if (index == ANV_COLOR_OUTPUT_UNUSED)
continue;
if (index == ANV_COLOR_OUTPUT_DISABLED &&
gfx->color_output_mapping[rt] != index) {
gfx->color_output_mapping[rt] = index;
need_rt_flush = true;
} else if (gfx->color_output_mapping[rt] != rt_to_att[rt]) {
gfx->color_output_mapping[rt] = rt_to_att[rt];
need_rt_flush = true;
}
}
#if GFX_VER >= 11
if (need_rt_flush) {
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
#if GFX_VER >= 11
/* The PIPE_CONTROL command description says:
*
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
@ -689,8 +708,8 @@ cmd_buffer_maybe_flush_rt_writes(struct anv_cmd_buffer *cmd_buffer,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"change RT due to shader outputs");
}
#endif
}
}
ALWAYS_INLINE static void
@ -750,8 +769,6 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(gfx->base.pipeline);
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
@ -772,16 +789,16 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
*
* Apply task URB workaround when switching from task to primitive.
*/
if (anv_pipeline_is_primitive(pipeline)) {
if (!anv_gfx_has_stage(gfx, MESA_SHADER_MESH)) {
genX(apply_task_urb_workaround)(cmd_buffer);
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
} else if (anv_gfx_has_stage(gfx, MESA_SHADER_TASK)) {
cmd_buffer->state.gfx.used_task_shader = true;
}
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP) ||
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PS))
cmd_buffer_maybe_flush_rt_writes(cmd_buffer, pipeline);
cmd_buffer_maybe_flush_rt_writes(cmd_buffer, gfx, dyn);
/* Apply any pending pipeline flushes we may have. We want to apply them
* now because, if any of those flushes are for things like push constants,
@ -887,17 +904,29 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
/* If the pipeline changed, we may need to re-allocate push constant space
* in the URB.
*/
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PUSH_CONSTANT_SHADERS) {
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PUSH_CONSTANT_SHADERS)
cmd_buffer_alloc_gfx_push_constants(cmd_buffer);
#if GFX_VERx10 < 125
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_VS |
ANV_CMD_DIRTY_HS |
ANV_CMD_DIRTY_DS |
ANV_CMD_DIRTY_GS |
ANV_CMD_DIRTY_PS)) {
for (unsigned s = 0; s <= MESA_SHADER_FRAGMENT; s++) {
if (gfx->shaders[s] == NULL)
continue;
/* Also add the relocations (scratch buffers) */
VkResult result = anv_reloc_list_append(cmd_buffer->batch.relocs,
pipeline->base.base.batch.relocs);
&gfx->shaders[s]->relocs);
if (result != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
}
}
#endif
/* Render targets live in the same binding table as fragment descriptors */
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS)
@ -916,7 +945,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer,
&cmd_buffer->state.gfx.base,
descriptors_dirty,
(const struct anv_shader_bin **)gfx->shaders,
(const struct anv_shader **)gfx->shaders,
ARRAY_SIZE(gfx->shaders));
cmd_buffer->state.descriptors_dirty &= ~dirty;
}
@ -989,23 +1018,13 @@ anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count)
ALWAYS_INLINE static void
cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
{
UNUSED const struct anv_device *device = cmd_buffer->device;
UNUSED const struct anv_instance *instance =
device->physical->instance;
UNUSED const bool protected = cmd_buffer->vk.pool->flags &
VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
UNUSED struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
UNUSED struct anv_device *device = cmd_buffer->device;
UNUSED struct anv_instance *instance = device->physical->instance;
#define DEBUG_SHADER_HASH(stage) do { \
if (unlikely( \
(instance->debug & ANV_DEBUG_SHADER_HASH) && \
anv_pipeline_has_stage(pipeline, stage))) { \
mi_store(&b, \
mi_mem32(device->workaround_address), \
mi_imm(pipeline->base.shaders[stage]-> \
prog_data->source_hash)); \
} \
} while (0)
UNUSED struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
UNUSED struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
struct mi_builder b;
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) {
@ -1013,18 +1032,35 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
}
#define DEBUG_SHADER_HASH(stage) do { \
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) { \
mi_store(&b, \
mi_mem32(device->workaround_address), \
mi_imm(gfx->shaders[stage]->prog_data->source_hash)); \
} \
} while (0)
#define anv_batch_emit_gfx(batch, cmd, name) ({ \
void *__dst = anv_batch_emit_dwords( \
batch, __anv_cmd_length(cmd)); \
memcpy(__dst, hw_state->packed.name, \
4 * __anv_cmd_length(cmd)); \
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
__dst, __anv_cmd_length(cmd) * 4)); \
__dst; \
})
#if INTEL_WA_16011107343_GFX_VER
if (intel_needs_workaround(cmd_buffer->device->info, 16011107343) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
anv_gfx_has_stage(gfx, MESA_SHADER_TESS_CTRL)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL);
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.hs, protected);
anv_batch_emit_gfx(&cmd_buffer->batch, GENX(3DSTATE_HS), hs);
}
#endif
#if INTEL_WA_22018402687_GFX_VER
if (intel_needs_workaround(cmd_buffer->device->info, 22018402687) &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL);
/* Wa_22018402687:
* In any 3D enabled context, just before any Tessellation enabled
@ -1038,13 +1074,13 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
* said switch, as it matters at the HW level, and can be triggered even
* across processes, so we apply the Wa at all times.
*/
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
final.ds, protected);
anv_batch_emit_gfx(&cmd_buffer->batch, GENX(3DSTATE_DS), ds);
}
#endif
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
#undef anv_batch_emit_gfx
#undef DEBUG_SHADER_HASH
}

View file

@ -96,18 +96,10 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
ANV_STATE_NULL;
UNUSED uint32_t wa_insts_offset = 0;
#if INTEL_WA_16011107343_GFX_VER || INTEL_WA_22018402687_GFX_VER
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(gfx->base.pipeline);
#endif
#if INTEL_WA_16011107343_GFX_VER
if (wa_16011107343) {
memcpy(wa_insts_state.map + wa_insts_offset,
&pipeline->batch_data[
protected ?
pipeline->final.hs_protected.offset :
pipeline->final.hs.offset],
gfx->dyn_state.packed.hs,
GENX(3DSTATE_HS_length) * 4);
wa_insts_offset += GENX(3DSTATE_HS_length) * 4;
}
@ -116,10 +108,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
#if INTEL_WA_22018402687_GFX_VER
if (wa_22018402687) {
memcpy(wa_insts_state.map + wa_insts_offset,
&pipeline->batch_data[
protected ?
pipeline->final.ds_protected.offset :
pipeline->final.ds.offset],
gfx->dyn_state.packed.ds,
GENX(3DSTATE_DS_length) * 4);
wa_insts_offset += GENX(3DSTATE_DS_length) * 4;
}

View file

@ -209,7 +209,7 @@ genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer,
if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
return;
if (gfx->uses_xfb) {
if (gfx->shaders[gfx->streamout_stage]->xfb_info != NULL) {
genX(cmd_buffer_set_preemption)(cmd_buffer, false);
return;
}
@ -417,10 +417,10 @@ want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
*/
struct anv_shader_bin *fs_bin = gfx->shaders[MESA_SHADER_FRAGMENT];
struct anv_shader *fs = gfx->shaders[MESA_SHADER_FRAGMENT];
return kill_pixel(wm_prog_data, dyn) ||
has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
has_ds_feedback_loop(&fs->bind_map, dyn) ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
@ -1012,21 +1012,21 @@ update_ps(struct anv_gfx_dynamic_state *hw_state,
return;
}
const struct anv_shader_bin *fs_bin = gfx->shaders[MESA_SHADER_FRAGMENT];
const struct anv_shader *fs = gfx->shaders[MESA_SHADER_FRAGMENT];
struct GENX(3DSTATE_PS) ps = {};
intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
MAX2(dyn->ms.rasterization_samples, 1),
hw_state->fs_msaa_flags);
SET(PS, ps.KernelStartPointer0,
fs_bin->kernel.offset +
fs->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
SET(PS, ps.KernelStartPointer1,
fs_bin->kernel.offset +
fs->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
#if GFX_VER < 20
SET(PS, ps.KernelStartPointer2,
fs_bin->kernel.offset +
fs->kernel.offset +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
#endif
@ -1124,12 +1124,12 @@ update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
const struct vk_dynamic_graphics_state *dyn,
const struct anv_cmd_graphics_state *gfx)
{
struct anv_shader_bin *fs_bin = gfx->shaders[MESA_SHADER_FRAGMENT];
struct anv_shader *fs = gfx->shaders[MESA_SHADER_FRAGMENT];
const struct brw_wm_prog_data *wm_prog_data = get_gfx_wm_prog_data(gfx);
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
wm_prog_data &&
(has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
(has_ds_feedback_loop(&fs->bind_map, dyn) ||
wm_prog_data->uses_kill),
FRAGMENT);
}
@ -2174,6 +2174,35 @@ update_tbimr_info(struct anv_gfx_dynamic_state *hw_state,
}
#endif
#if GFX_VERx10 == 90
ALWAYS_INLINE static void
update_vs(struct anv_gfx_dynamic_state *hw_state,
const struct anv_cmd_graphics_state *gfx,
const struct anv_device *device)
{
if (device->info->gt < 4)
return;
/* On Sky Lake GT4, we have experienced some hangs related to the VS cache
* and tessellation. It is unknown exactly what is happening but the
* Haswell docs for the "VS Reference Count Full Force Miss Enable" field
* of the "Thread Mode" register refer to a HSW bug in which the VUE handle
* reference count would overflow resulting in internal reference counting
* bugs. My (Faith's) best guess is that this bug cropped back up on SKL
* GT4 when we suddenly had more threads in play than any previous gfx9
* hardware.
*
* What we do know for sure is that setting this bit when tessellation
* shaders are in use fixes a GPU hang in Batman: Arkham City when playing
* with DXVK (https://bugs.freedesktop.org/107280). Disabling the vertex
* cache with tessellation shaders should only have a minor performance
* impact as the tessellation shaders are likely generating and processing
* far more geometry than the vertex stage.
*/
SET(VS, vs.VertexCacheDisable, anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL));
}
#endif
#if INTEL_WA_18019110168_GFX_VER
static inline unsigned
compute_mesh_provoking_vertex(const struct brw_mesh_prog_data *mesh_prog_data,
@ -2215,11 +2244,13 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
const struct anv_device *device,
const struct vk_dynamic_graphics_state *dyn,
struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline,
VkCommandBufferLevel cmd_buffer_level)
{
UNUSED bool fs_msaa_changed = false;
assert(gfx->shaders[gfx->streamout_stage] != NULL);
assert(gfx->instance_multiplier != 0);
/* Do this before update_fs_msaa_flags() for primitive_id_index */
if (gfx->dirty & ANV_CMD_DIRTY_ALL_SHADERS(device))
update_sbe(hw_state, gfx, device);
@ -2234,6 +2265,11 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS)
update_urb_config(hw_state, gfx, device);
#if GFX_VERx10 == 90
if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS)
update_vs(hw_state, gfx, device);
#endif
if ((gfx->dirty & ANV_CMD_DIRTY_PS) ||
BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
update_ps(hw_state, device, dyn, gfx);
@ -2482,8 +2518,7 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
static void
cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
struct anv_cmd_buffer *cmd_buffer,
const struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline)
const struct anv_cmd_graphics_state *gfx)
{
struct anv_device *device = cmd_buffer->device;
struct anv_instance *instance = device->physical->instance;
@ -2502,73 +2537,107 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
} while (0)
#define IS_DIRTY(name) BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##name)
#define anv_gfx_copy(field, cmd, source) ({ \
#define anv_gfx_copy(field, cmd, stage, source) ({ \
if (gfx->shaders[stage] != NULL) { \
assert(sizeof(hw_state->packed.field) >= \
4 * __anv_cmd_length(cmd)); \
assert((source).len == __anv_cmd_length(cmd)); \
assert((gfx->shaders[stage]->source).len == \
__anv_cmd_length(cmd)); \
memcpy(&hw_state->packed.field, \
&pipeline->batch_data[(source).offset], \
&gfx->shaders[stage]->cmd_data[ \
(gfx->shaders[stage]->source).offset], \
4 * __anv_cmd_length(cmd)); \
} else { \
anv_gfx_pack(field, cmd, __unused_name); \
} \
})
#define anv_gfx_copy_variable(field, source) ({ \
#define anv_gfx_copy_variable(field, stage, source) ({ \
if (gfx->shaders[stage] != NULL) { \
assert(sizeof(hw_state->packed.field) >= \
4 * (source).len); \
4 * gfx->shaders[stage]->source.len); \
memcpy(&hw_state->packed.field, \
&pipeline->batch_data[(source).offset], \
4 * (source).len); \
hw_state->packed.field##_len = (source).len; \
&gfx->shaders[stage]->cmd_data[ \
(gfx->shaders[stage]->source).offset], \
4 * gfx->shaders[stage]->source.len); \
hw_state->packed.field##_len = \
gfx->shaders[stage]->source.len; \
} \
})
#define anv_gfx_copy_protected(field, cmd, source) ({ \
#define anv_gfx_copy_protected(field, cmd, stage, source) ({ \
const bool __protected = (cmd_buffer->vk.pool->flags & \
VK_COMMAND_POOL_CREATE_PROTECTED_BIT); \
assert(sizeof(hw_state->packed.field) >= \
4 * __anv_cmd_length(cmd)); \
assert((source).len == __anv_cmd_length(cmd)); \
if (gfx->shaders[stage] != NULL) { \
assert((gfx->shaders[stage]->source).len == \
__anv_cmd_length(cmd)); \
memcpy(&hw_state->packed.field, \
&pipeline->batch_data[ \
&gfx->shaders[stage]->cmd_data[ \
__protected ? \
(source##_protected).offset : \
(source).offset], \
gfx->shaders[stage]->source##_protected.offset : \
gfx->shaders[stage]->source.offset], \
4 * __anv_cmd_length(cmd)); \
} else { \
memcpy(&hw_state->packed.field, \
device->physical->gfx_default.field, \
4 * __anv_cmd_length(cmd)); \
} \
})
#define anv_gfx_pack_merge(field, cmd, prepacked, name) \
for (struct cmd name = { 0 }, \
#define anv_gfx_pack_merge(field, cmd, stage, source, name) \
for (struct cmd name = (struct cmd) { 0 }, \
*_dst = (struct cmd *)hw_state->packed.field; \
__builtin_expect(_dst != NULL, 1); \
({ const struct anv_gfx_state_ptr *_cmd_state = &prepacked; \
({ \
uint32_t _partial[__anv_cmd_length(cmd)]; \
assert(_cmd_state->len == __anv_cmd_length(cmd)); \
assert(sizeof(hw_state->packed.field) >= \
4 * __anv_cmd_length(cmd)); \
__anv_cmd_pack(cmd)(NULL, _partial, &name); \
if (gfx->shaders[stage] != NULL) { \
const struct anv_gfx_state_ptr *_cmd_state = \
&gfx->shaders[stage]->source; \
assert(_cmd_state->len == __anv_cmd_length(cmd)); \
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
assert((_partial[i] & \
(pipeline)->batch_data[ \
(prepacked).offset + i]) == 0); \
gfx->shaders[stage]->cmd_data[ \
_cmd_state->offset + i]) == 0); \
((uint32_t *)_dst)[i] = _partial[i] | \
(pipeline)->batch_data[_cmd_state->offset + i]; \
gfx->shaders[stage]->cmd_data[_cmd_state->offset + i]; \
} \
} else { \
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
assert((_partial[i] & \
device->physical->gfx_default.field[i]) == 0); \
((uint32_t *)_dst)[i] = _partial[i] | \
device->physical->gfx_default.field[i]; \
} \
} \
_dst = NULL; \
}))
#define anv_gfx_pack_merge_protected(field, cmd, prepacked, name) \
for (struct cmd name = { 0 }, \
#define anv_gfx_pack_merge_protected(field, cmd, stage, source, name) \
for (struct cmd name = (struct cmd) { 0 }, \
*_dst = (struct cmd *)hw_state->packed.field; \
__builtin_expect(_dst != NULL, 1); \
({ const struct anv_gfx_state_ptr *_cmd_state = \
(cmd_buffer->vk.pool->flags & \
VK_COMMAND_POOL_CREATE_PROTECTED_BIT) ? \
&prepacked##_protected : &prepacked; \
({ \
uint32_t _partial[__anv_cmd_length(cmd)]; \
assert(_cmd_state->len == __anv_cmd_length(cmd)); \
assert(sizeof(hw_state->packed.field) >= \
4 * __anv_cmd_length(cmd)); \
__anv_cmd_pack(cmd)(NULL, _partial, &name); \
const struct anv_gfx_state_ptr *_cmd_state = \
gfx->shaders[stage] != NULL ? \
((cmd_buffer->vk.pool->flags & \
VK_COMMAND_POOL_CREATE_PROTECTED_BIT) ? \
&gfx->shaders[stage]->source##_protected : \
&gfx->shaders[stage]->source) : \
NULL; \
assert(_cmd_state == NULL || \
_cmd_state->len == __anv_cmd_length(cmd)); \
const uint32_t *_inst_data = \
gfx->shaders[stage] != NULL ? \
&gfx->shaders[stage]->cmd_data[_cmd_state->offset] : \
device->physical->gfx_default.field; \
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
assert((_partial[i] & \
(pipeline)->batch_data[ \
(prepacked).offset + i]) == 0); \
((uint32_t *)_dst)[i] = _partial[i] | \
(pipeline)->batch_data[_cmd_state->offset + i]; \
assert((_partial[i] & _inst_data[i]) == 0); \
((uint32_t *)_dst)[i] = _partial[i] | _inst_data[i]; \
} \
_dst = NULL; \
}))
@ -2624,19 +2693,19 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
#endif
if (IS_DIRTY(VF_SGVS))
anv_gfx_copy(vf_sgvs, GENX(3DSTATE_VF_SGVS), pipeline->final.vf_sgvs);
anv_gfx_copy(vf_sgvs, GENX(3DSTATE_VF_SGVS), MESA_SHADER_VERTEX, vs.vf_sgvs);
#if GFX_VER >= 11
if (IS_DIRTY(VF_SGVS_2))
anv_gfx_copy(vf_sgvs_2, GENX(3DSTATE_VF_SGVS_2), pipeline->final.vf_sgvs_2);
anv_gfx_copy(vf_sgvs_2, GENX(3DSTATE_VF_SGVS_2), MESA_SHADER_VERTEX, vs.vf_sgvs_2);
#endif
if (IS_DIRTY(VF_SGVS_INSTANCING))
anv_gfx_copy_variable(vf_sgvs_instancing, pipeline->final.vf_sgvs_instancing);
anv_gfx_copy_variable(vf_sgvs_instancing, MESA_SHADER_VERTEX, vs.vf_sgvs_instancing);
if (instance->vf_component_packing && IS_DIRTY(VF_COMPONENT_PACKING)) {
anv_gfx_copy(vf_component_packing, GENX(3DSTATE_VF_COMPONENT_PACKING),
pipeline->final.vf_component_packing);
MESA_SHADER_VERTEX, vs.vf_component_packing);
}
if (IS_DIRTY(INDEX_BUFFER)) {
@ -2655,7 +2724,7 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
if (IS_DIRTY(STREAMOUT)) {
anv_gfx_pack_merge(so, GENX(3DSTATE_STREAMOUT),
pipeline->partial.so, so) {
gfx->streamout_stage, so, so) {
SET(so, so, RenderingDisable);
SET(so, so, RenderStreamSelect);
SET(so, so, ReorderMode);
@ -2664,7 +2733,7 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
}
if (IS_DIRTY(SO_DECL_LIST))
anv_gfx_copy_variable(so_decl_list, pipeline->final.so_decl_list);
anv_gfx_copy_variable(so_decl_list, gfx->streamout_stage, so_decl_list);
if (IS_DIRTY(CLIP)) {
anv_gfx_pack(clip, GENX(3DSTATE_CLIP), clip) {
@ -2886,7 +2955,8 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
if (IS_DIRTY(TE)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
anv_gfx_pack_merge(te, GENX(3DSTATE_TE), pipeline->partial.te, te) {
anv_gfx_pack_merge(te, GENX(3DSTATE_TE),
MESA_SHADER_TESS_EVAL, ds.te, te) {
SET(te, te, OutputTopology);
#if GFX_VERx10 >= 125
SET(te, te, TessellationDistributionMode);
@ -2986,7 +3056,8 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
}
if (IS_DIRTY(WM)) {
anv_gfx_pack_merge(wm, GENX(3DSTATE_WM), pipeline->partial.wm, wm) {
anv_gfx_pack_merge(wm, GENX(3DSTATE_WM),
MESA_SHADER_FRAGMENT, ps.wm, wm) {
SET(wm, wm, LineStippleEnable);
SET(wm, wm, BarycentricInterpolationMode);
}
@ -3079,12 +3150,12 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_features.meshShader) {
if (device->vk.enabled_extensions.EXT_mesh_shader) {
if (IS_DIRTY(MESH_CONTROL)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_MESH)) {
anv_gfx_copy_protected(mesh_control,
GENX(3DSTATE_MESH_CONTROL),
pipeline->final.mesh_control);
MESA_SHADER_MESH, ms.control);
} else {
anv_gfx_pack(mesh_control, GENX(3DSTATE_MESH_CONTROL), mc);
}
@ -3092,8 +3163,9 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
if (IS_DIRTY(TASK_CONTROL)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_TASK)) {
anv_gfx_copy_protected(task_control, GENX(3DSTATE_TASK_CONTROL),
pipeline->final.task_control);
anv_gfx_copy_protected(task_control,
GENX(3DSTATE_TASK_CONTROL),
MESA_SHADER_TASK, ts.control);
} else {
anv_gfx_pack(task_control, GENX(3DSTATE_TASK_CONTROL), tc);
}
@ -3101,70 +3173,58 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
if (IS_DIRTY(MESH_SHADER)) {
anv_gfx_copy(mesh_shader, GENX(3DSTATE_MESH_SHADER),
pipeline->final.mesh_shader);
MESA_SHADER_MESH, ms.shader);
}
if (IS_DIRTY(MESH_DISTRIB)) {
anv_gfx_copy(mesh_distrib, GENX(3DSTATE_MESH_DISTRIB),
pipeline->final.mesh_distrib);
MESA_SHADER_MESH, ms.distrib);
}
if (IS_DIRTY(CLIP_MESH)) {
anv_gfx_copy(clip_mesh, GENX(3DSTATE_CLIP_MESH),
pipeline->final.clip_mesh);
MESA_SHADER_MESH, ms.clip);
}
if (IS_DIRTY(TASK_SHADER)) {
anv_gfx_copy(task_shader, GENX(3DSTATE_TASK_SHADER),
pipeline->final.task_shader);
MESA_SHADER_TASK, ts.shader);
}
if (IS_DIRTY(TASK_REDISTRIB)) {
anv_gfx_copy(task_redistrib, GENX(3DSTATE_TASK_REDISTRIB),
pipeline->final.task_redistrib);
MESA_SHADER_TASK, ts.redistrib);
}
}
#endif /* GFX_VERx10 >= 125 */
if (IS_DIRTY(VS)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_VERTEX)) {
anv_gfx_copy_protected(vs, GENX(3DSTATE_VS), pipeline->final.vs);
} else {
anv_gfx_pack(vs, GENX(3DSTATE_VS), vs);
#if GFX_VERx10 == 90
anv_gfx_pack_merge_protected(vs, GENX(3DSTATE_VS),
MESA_SHADER_VERTEX, vs.vs, vs) {
SET(vs, vs, VertexCacheDisable);
}
#else
anv_gfx_copy_protected(vs, GENX(3DSTATE_VS), MESA_SHADER_VERTEX, vs.vs);
#endif
}
if (IS_DIRTY(HS)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_CTRL)) {
anv_gfx_copy_protected(hs, GENX(3DSTATE_HS), pipeline->final.hs);
} else {
anv_gfx_pack(hs, GENX(3DSTATE_HS), hs);
}
}
if (IS_DIRTY(HS))
anv_gfx_copy_protected(hs, GENX(3DSTATE_HS), MESA_SHADER_TESS_CTRL, hs.hs);
if (IS_DIRTY(DS)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
anv_gfx_copy_protected(ds, GENX(3DSTATE_DS), pipeline->final.ds);
} else {
anv_gfx_pack(ds, GENX(3DSTATE_DS), ds);
}
}
if (IS_DIRTY(DS))
anv_gfx_copy_protected(ds, GENX(3DSTATE_DS), MESA_SHADER_TESS_EVAL, ds.ds);
if (IS_DIRTY(GS)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_GEOMETRY)) {
anv_gfx_pack_merge_protected(gs, GENX(3DSTATE_GS),
pipeline->partial.gs, gs) {
MESA_SHADER_GEOMETRY, gs.gs, gs) {
SET(gs, gs, ReorderMode);
}
} else {
anv_gfx_pack(gs, GENX(3DSTATE_GS), gs);
}
}
if (IS_DIRTY(PS)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT)) {
anv_gfx_pack_merge_protected(ps, GENX(3DSTATE_PS),
pipeline->partial.ps, ps) {
MESA_SHADER_FRAGMENT, ps.ps, ps) {
SET(ps, ps, KernelStartPointer0);
SET(ps, ps, KernelStartPointer1);
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
@ -3187,15 +3247,12 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
#endif
SET(ps, ps, PositionXYOffsetSelect);
}
} else {
anv_gfx_pack(ps, GENX(3DSTATE_PS), ps);
}
}
if (IS_DIRTY(PS_EXTRA)) {
if (anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT)) {
anv_gfx_pack_merge(ps_extra, GENX(3DSTATE_PS_EXTRA),
pipeline->partial.ps_extra, pse) {
MESA_SHADER_FRAGMENT, ps.ps_extra, pse) {
SET(pse, ps_extra, PixelShaderHasUAV);
SET(pse, ps_extra, PixelShaderIsPerSample);
#if GFX_VER >= 11
@ -3213,7 +3270,7 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
* change through pre-rasterization shader) or if we notice a change.
*/
anv_gfx_pack_merge(ps_extra_dep, GENX(3DSTATE_PS_EXTRA),
pipeline->partial.ps_extra, pse) {
MESA_SHADER_FRAGMENT, ps.ps_extra, pse) {
SET(pse, ps_extra, PixelShaderHasUAV);
SET(pse, ps_extra, PixelShaderIsPerSample);
#if GFX_VER >= 11
@ -3269,15 +3326,13 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->device,
&cmd_buffer->vk.dynamic_graphics_state,
&cmd_buffer->state.gfx,
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline),
cmd_buffer->vk.level);
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
cmd_buffer_repack_gfx_state(&cmd_buffer->state.gfx.dyn_state,
cmd_buffer,
&cmd_buffer->state.gfx,
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline));
&cmd_buffer->state.gfx);
}
static void
@ -3431,8 +3486,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
struct anv_device *device = cmd_buffer->device;
struct anv_instance *instance = device->physical->instance;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(gfx->base.pipeline);
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
struct anv_push_constants *push_consts =
@ -3493,7 +3546,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
if (mesh_prog_data) {
push_consts->gfx.fs_per_prim_remap_offset =
pipeline->base.shaders[MESA_SHADER_MESH]->kernel.offset +
gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
mesh_prog_data->wa_18019110168_mapping_offset;
}
@ -3576,7 +3629,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
* 3. Send 3D State SOL with SOL Enabled
*/
if (intel_needs_workaround(device->info, 16011773973) &&
pipeline->uses_xfb)
gfx->shaders[gfx->streamout_stage]->xfb_info != NULL)
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so);
anv_batch_emit_gfx_variable(batch, so_decl_list);
@ -3597,7 +3650,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
}
#if GFX_VERx10 >= 125
if (device->vk.enabled_features.meshShader) {
if (device->vk.enabled_extensions.EXT_mesh_shader) {
if (IS_DIRTY(MESH_CONTROL))
anv_batch_emit_gfx(batch, GENX(3DSTATE_MESH_CONTROL), mesh_control);
@ -3670,8 +3723,8 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_TOPOLOGY), vft);
if (IS_DIRTY(VERTEX_INPUT)) {
genX(batch_emit_pipeline_vertex_input)(batch, device,
pipeline, dyn->vi);
genX(batch_emit_vertex_input)(batch, device,
gfx->shaders[MESA_SHADER_VERTEX], dyn->vi);
}
if (IS_DIRTY(TE))
@ -3823,8 +3876,6 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline =
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
if (INTEL_DEBUG(DEBUG_REEMIT)) {
@ -3863,7 +3914,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
* it after.
*/
if (intel_needs_workaround(device->info, 16011773973) &&
pipeline->uses_xfb &&
gfx->shaders[gfx->streamout_stage]->xfb_info != NULL &&
BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT);
}

View file

@ -569,31 +569,6 @@ emit_vs_shader(struct anv_batch *batch,
vs.SoftwareExceptionEnable = false;
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
#if 0
/* TODO: move to shader binding */
if (GFX_VER == 9 && devinfo->gt == 4 &&
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
/* On Sky Lake GT4, we have experienced some hangs related to the VS
* cache and tessellation. It is unknown exactly what is happening
* but the Haswell docs for the "VS Reference Count Full Force Miss
* Enable" field of the "Thread Mode" register refer to a HSW bug in
* which the VUE handle reference count would overflow resulting in
* internal reference counting bugs. My (Faith's) best guess is that
* this bug cropped back up on SKL GT4 when we suddenly had more
* threads in play than any previous gfx9 hardware.
*
* What we do know for sure is that setting this bit when
* tessellation shaders are in use fixes a GPU hang in Batman: Arkham
* City when playing with DXVK (https://bugs.freedesktop.org/107280).
* Disabling the vertex cache with tessellation shaders should only
* have a minor performance impact as the tessellation shaders are
* likely generating and processing far more geometry than the vertex
* stage.
*/
vs.VertexCacheDisable = true;
}
#endif
vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
vs.VertexURBEntryReadOffset = 0;
vs.DispatchGRFStartRegisterForURBData =