mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 11:40:10 +01:00
anv: switch over to runtime pipelines
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34872>
This commit is contained in:
parent
4d9dd5c3a2
commit
e76ed91d3f
13 changed files with 697 additions and 529 deletions
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "vk_common_entrypoints.h"
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
static void
|
||||
|
|
@ -293,7 +295,8 @@ astc_emu_flush_denorm_slice(struct anv_cmd_buffer *cmd_buffer,
|
|||
set_writes);
|
||||
VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
|
||||
|
||||
anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
vk_common_CmdBindPipeline(cmd_buffer_,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
astc_emu->pipeline);
|
||||
|
||||
VkPushConstantsInfoKHR push_info = {
|
||||
|
|
@ -351,7 +354,9 @@ astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer,
|
|||
return;
|
||||
}
|
||||
|
||||
anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
vk_common_CmdBindPipeline(cmd_buffer_,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
pipeline);
|
||||
|
||||
struct vk_texcompress_astc_write_descriptor_set writes;
|
||||
vk_texcompress_astc_fill_write_descriptor_sets(astc_emu->texcompress,
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
#include "anv_private.h"
|
||||
#include "anv_measure.h"
|
||||
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
/** \file anv_cmd_buffer.c
|
||||
|
|
@ -435,17 +436,16 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
|
||||
static void
|
||||
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||
anv_cmd_buffer_set_rt_query_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipeline_state,
|
||||
struct anv_pipeline *pipeline,
|
||||
uint32_t ray_queries,
|
||||
VkShaderStageFlags stages)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
|
||||
|
||||
uint64_t ray_shadow_size =
|
||||
align64(brw_rt_ray_queries_shadow_stacks_size(device->info,
|
||||
pipeline->ray_queries),
|
||||
align64(brw_rt_ray_queries_shadow_stacks_size(device->info, ray_queries),
|
||||
4096);
|
||||
if (ray_shadow_size > 0 &&
|
||||
(!cmd_buffer->state.ray_query_shadow_bo ||
|
||||
|
|
@ -497,112 +497,6 @@ anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
pipeline_state->push_constants_data_dirty = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function compute changes between 2 pipelines and flags the dirty HW
|
||||
* state appropriately.
|
||||
*/
|
||||
static void
|
||||
anv_cmd_buffer_flush_pipeline_hw_state(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_graphics_pipeline *old_pipeline,
|
||||
struct anv_graphics_pipeline *new_pipeline)
|
||||
{
|
||||
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
|
||||
|
||||
#define diff_fix_state(bit, name) \
|
||||
do { \
|
||||
/* Fixed states should always have matching sizes */ \
|
||||
assert(old_pipeline == NULL || \
|
||||
old_pipeline->name.len == new_pipeline->name.len); \
|
||||
/* Don't bother memcmp if the state is already dirty */ \
|
||||
if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \
|
||||
(old_pipeline == NULL || \
|
||||
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
|
||||
&new_pipeline->batch_data[new_pipeline->name.offset], \
|
||||
4 * new_pipeline->name.len) != 0)) \
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
|
||||
} while (0)
|
||||
#define diff_var_state(bit, name) \
|
||||
do { \
|
||||
/* Don't bother memcmp if the state is already dirty */ \
|
||||
/* Also if the new state is empty, avoid marking dirty */ \
|
||||
if (!BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##bit) && \
|
||||
new_pipeline->name.len != 0 && \
|
||||
(old_pipeline == NULL || \
|
||||
old_pipeline->name.len != new_pipeline->name.len || \
|
||||
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
|
||||
&new_pipeline->batch_data[new_pipeline->name.offset], \
|
||||
4 * new_pipeline->name.len) != 0)) \
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
|
||||
} while (0)
|
||||
#define assert_identical(bit, name) \
|
||||
do { \
|
||||
/* Fixed states should always have matching sizes */ \
|
||||
assert(old_pipeline == NULL || \
|
||||
old_pipeline->name.len == new_pipeline->name.len); \
|
||||
assert(old_pipeline == NULL || \
|
||||
memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
|
||||
&new_pipeline->batch_data[new_pipeline->name.offset], \
|
||||
4 * new_pipeline->name.len) == 0); \
|
||||
} while (0)
|
||||
#define assert_empty(name) assert(new_pipeline->name.len == 0)
|
||||
|
||||
/* Compare all states, including partial packed ones, the dynamic part is
|
||||
* left at 0 but the static part could still change.
|
||||
*
|
||||
* We avoid comparing protected packets as all the fields but the scratch
|
||||
* surface are identical. we just need to select the right one at emission.
|
||||
*/
|
||||
diff_fix_state(VF_SGVS, final.vf_sgvs);
|
||||
if (cmd_buffer->device->info->ver >= 11)
|
||||
diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);
|
||||
diff_fix_state(VF_COMPONENT_PACKING, final.vf_component_packing);
|
||||
diff_fix_state(VS, final.vs);
|
||||
diff_fix_state(HS, final.hs);
|
||||
diff_fix_state(DS, final.ds);
|
||||
|
||||
diff_fix_state(WM, partial.wm);
|
||||
diff_fix_state(STREAMOUT, partial.so);
|
||||
diff_fix_state(GS, partial.gs);
|
||||
diff_fix_state(TE, partial.te);
|
||||
diff_fix_state(PS, partial.ps);
|
||||
diff_fix_state(PS_EXTRA, partial.ps_extra);
|
||||
|
||||
if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
|
||||
diff_fix_state(TASK_CONTROL, final.task_control);
|
||||
diff_fix_state(TASK_SHADER, final.task_shader);
|
||||
diff_fix_state(TASK_REDISTRIB, final.task_redistrib);
|
||||
diff_fix_state(MESH_CONTROL, final.mesh_control);
|
||||
diff_fix_state(MESH_SHADER, final.mesh_shader);
|
||||
diff_fix_state(MESH_DISTRIB, final.mesh_distrib);
|
||||
diff_fix_state(CLIP_MESH, final.clip_mesh);
|
||||
} else {
|
||||
assert_empty(final.task_control);
|
||||
assert_empty(final.task_shader);
|
||||
assert_empty(final.task_redistrib);
|
||||
assert_empty(final.mesh_control);
|
||||
assert_empty(final.mesh_shader);
|
||||
assert_empty(final.mesh_distrib);
|
||||
assert_empty(final.clip_mesh);
|
||||
}
|
||||
|
||||
/* States that can vary in length */
|
||||
diff_var_state(VF_SGVS_INSTANCING, final.vf_sgvs_instancing);
|
||||
diff_var_state(SO_DECL_LIST, final.so_decl_list);
|
||||
|
||||
#undef diff_fix_state
|
||||
#undef diff_var_state
|
||||
#undef assert_identical
|
||||
#undef assert_empty
|
||||
|
||||
/* We're not diffing the following :
|
||||
* - anv_graphics_pipeline::vertex_input_data
|
||||
* - anv_graphics_pipeline::final::vf_instancing
|
||||
*
|
||||
* since they are tracked by the runtime.
|
||||
*/
|
||||
}
|
||||
|
||||
static enum anv_cmd_dirty_bits
|
||||
get_pipeline_dirty_stages(struct anv_device *device,
|
||||
struct anv_graphics_pipeline *old_pipeline,
|
||||
|
|
@ -636,7 +530,7 @@ get_pipeline_dirty_stages(struct anv_device *device,
|
|||
|
||||
static void
|
||||
update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
|
||||
struct anv_shader_bin **shaders,
|
||||
struct anv_shader ** const shaders,
|
||||
uint32_t shader_count)
|
||||
{
|
||||
state->push_buffer_stages = 0;
|
||||
|
|
@ -646,7 +540,7 @@ update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
|
|||
if (shaders[i] == NULL)
|
||||
continue;
|
||||
|
||||
VkShaderStageFlags stage = mesa_to_vk_shader_stage(shaders[i]->stage);
|
||||
VkShaderStageFlags stage = mesa_to_vk_shader_stage(shaders[i]->vk.stage);
|
||||
|
||||
if (shaders[i]->push_desc_info.used_descriptors)
|
||||
state->push_descriptor_stages |= stage;
|
||||
|
|
@ -656,145 +550,6 @@ update_push_descriptor_flags(struct anv_cmd_pipeline_state *state,
|
|||
}
|
||||
}
|
||||
|
||||
void anv_CmdBindPipeline(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkPipelineBindPoint pipelineBindPoint,
|
||||
VkPipeline _pipeline)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
struct anv_cmd_pipeline_state *state;
|
||||
VkShaderStageFlags stages = 0;
|
||||
|
||||
switch (pipelineBindPoint) {
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE: {
|
||||
if (cmd_buffer->state.compute.base.pipeline == pipeline)
|
||||
return;
|
||||
|
||||
struct anv_compute_pipeline *compute_pipeline =
|
||||
anv_pipeline_to_compute(pipeline);
|
||||
|
||||
cmd_buffer->state.compute.shader = compute_pipeline->cs;
|
||||
cmd_buffer->state.compute.pipeline_dirty = true;
|
||||
|
||||
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
|
||||
&compute_pipeline->cs->bind_map);
|
||||
|
||||
state = &cmd_buffer->state.compute.base;
|
||||
stages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
update_push_descriptor_flags(state, &compute_pipeline->cs, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_GRAPHICS: {
|
||||
struct anv_graphics_pipeline *new_pipeline =
|
||||
anv_pipeline_to_graphics(pipeline);
|
||||
|
||||
/* Apply the non dynamic state from the pipeline */
|
||||
vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
|
||||
&new_pipeline->dynamic_state);
|
||||
|
||||
if (cmd_buffer->state.gfx.base.pipeline == pipeline)
|
||||
return;
|
||||
|
||||
struct anv_graphics_pipeline *old_pipeline =
|
||||
cmd_buffer->state.gfx.base.pipeline == NULL ? NULL :
|
||||
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
|
||||
|
||||
cmd_buffer->state.gfx.dirty |=
|
||||
get_pipeline_dirty_stages(cmd_buffer->device,
|
||||
old_pipeline, new_pipeline);
|
||||
|
||||
STATIC_ASSERT(sizeof(cmd_buffer->state.gfx.shaders) ==
|
||||
sizeof(new_pipeline->base.shaders));
|
||||
memcpy(cmd_buffer->state.gfx.shaders,
|
||||
new_pipeline->base.shaders,
|
||||
sizeof(cmd_buffer->state.gfx.shaders));
|
||||
cmd_buffer->state.gfx.active_stages = pipeline->active_stages;
|
||||
|
||||
anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
|
||||
set_dirty_for_bind_map(cmd_buffer, stage,
|
||||
&new_pipeline->base.shaders[stage]->bind_map);
|
||||
}
|
||||
|
||||
state = &cmd_buffer->state.gfx.base;
|
||||
stages = new_pipeline->base.base.active_stages;
|
||||
|
||||
update_push_descriptor_flags(state,
|
||||
new_pipeline->base.shaders,
|
||||
ARRAY_SIZE(new_pipeline->base.shaders));
|
||||
|
||||
/* When the pipeline is using independent states and dynamic buffers,
|
||||
* this will trigger an update of anv_push_constants::dynamic_base_index
|
||||
* & anv_push_constants::dynamic_offsets.
|
||||
*/
|
||||
struct anv_push_constants *push =
|
||||
&cmd_buffer->state.gfx.base.push_constants;
|
||||
struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
|
||||
if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
|
||||
bool modified = false;
|
||||
for (uint32_t s = 0; s < layout->num_sets; s++) {
|
||||
if (layout->set_layouts[s] == NULL)
|
||||
continue;
|
||||
|
||||
assert(layout->dynamic_offset_start[s] < MAX_DYNAMIC_BUFFERS);
|
||||
if (layout->set_layouts[s]->vk.dynamic_descriptor_count > 0 &&
|
||||
(push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
|
||||
layout->dynamic_offset_start[s]) {
|
||||
push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
|
||||
push->desc_surface_offsets[s] |= (layout->dynamic_offset_start[s] &
|
||||
ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
if (modified) {
|
||||
cmd_buffer->state.push_constants_dirty |= stages;
|
||||
state->push_constants_data_dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->state.gfx.vs_source_hash = new_pipeline->vs_source_hash;
|
||||
cmd_buffer->state.gfx.fs_source_hash = new_pipeline->fs_source_hash;
|
||||
|
||||
cmd_buffer->state.gfx.instance_multiplier = new_pipeline->instance_multiplier;
|
||||
|
||||
anv_cmd_buffer_flush_pipeline_hw_state(cmd_buffer, old_pipeline, new_pipeline);
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
|
||||
if (cmd_buffer->state.rt.base.pipeline == pipeline)
|
||||
return;
|
||||
|
||||
cmd_buffer->state.rt.pipeline_dirty = true;
|
||||
|
||||
struct anv_ray_tracing_pipeline *rt_pipeline =
|
||||
anv_pipeline_to_ray_tracing(pipeline);
|
||||
if (rt_pipeline->stack_size > 0) {
|
||||
anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
|
||||
rt_pipeline->stack_size);
|
||||
}
|
||||
|
||||
state = &cmd_buffer->state.rt.base;
|
||||
|
||||
state->push_buffer_stages = pipeline->use_push_descriptor_buffer;
|
||||
state->push_descriptor_stages = pipeline->use_push_descriptor_buffer;
|
||||
state->push_descriptor_index = pipeline->layout.push_descriptor_set_index;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
UNREACHABLE("invalid bind point");
|
||||
break;
|
||||
}
|
||||
|
||||
state->pipeline = pipeline;
|
||||
|
||||
if (pipeline->ray_queries > 0)
|
||||
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
|
||||
}
|
||||
|
||||
static struct anv_cmd_pipeline_state *
|
||||
anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer,
|
||||
VkPipelineBindPoint bind_point,
|
||||
|
|
@ -1519,20 +1274,37 @@ void anv_CmdPushDescriptorSetWithTemplate2KHR(
|
|||
NULL, NULL);
|
||||
}
|
||||
|
||||
void anv_CmdSetRayTracingPipelineStackSizeKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t pipelineStackSize)
|
||||
void
|
||||
anv_cmd_buffer_set_rt_state(struct vk_command_buffer *vk_cmd_buffer,
|
||||
VkDeviceSize scratch_size,
|
||||
uint32_t ray_queries)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_cmd_buffer *cmd_buffer =
|
||||
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
|
||||
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
|
||||
|
||||
rt->scratch_size = MAX2(rt->scratch_size, scratch_size);
|
||||
if (ray_queries > 0) {
|
||||
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &rt->base, ray_queries,
|
||||
ANV_RT_STAGE_BITS);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_cmd_buffer_set_stack_size(struct vk_command_buffer *vk_cmd_buffer,
|
||||
VkDeviceSize stack_size)
|
||||
{
|
||||
struct anv_cmd_buffer *cmd_buffer =
|
||||
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
|
||||
|
||||
if (anv_batch_has_error(&cmd_buffer->batch))
|
||||
return;
|
||||
|
||||
uint32_t stack_ids_per_dss = 2048; /* TODO */
|
||||
|
||||
unsigned stack_size_log2 = util_logbase2_ceil(pipelineStackSize);
|
||||
unsigned stack_size_log2 = util_logbase2_ceil(stack_size);
|
||||
if (stack_size_log2 < 10)
|
||||
stack_size_log2 = 10;
|
||||
|
||||
|
|
@ -1585,7 +1357,7 @@ anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
|
|||
&cmd_buffer->state.compute.base;
|
||||
|
||||
if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
|
||||
state->pipeline = pipe_state->pipeline;
|
||||
state->shader = &cmd_buffer->state.compute.shader->vk;
|
||||
|
||||
if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
|
||||
state->descriptor_set[0] = pipe_state->descriptors[0];
|
||||
|
|
@ -1614,11 +1386,11 @@ anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
|
|||
struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
|
||||
|
||||
if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
|
||||
if (state->pipeline) {
|
||||
anv_CmdBindPipeline(cmd_buffer_, bind_point,
|
||||
anv_pipeline_to_handle(state->pipeline));
|
||||
if (state->shader) {
|
||||
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
|
||||
anv_cmd_buffer_bind_shaders(&cmd_buffer->vk, 1, &stage, &state->shader);
|
||||
} else {
|
||||
pipe_state->pipeline = NULL;
|
||||
cmd_buffer->state.compute.shader = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1693,3 +1465,285 @@ anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,
|
|||
anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
|
||||
(commandBuffer, invocations_x, invocations_y, invocations_z);
|
||||
}
|
||||
|
||||
static void
|
||||
bind_compute_shader(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_shader *shader)
|
||||
{
|
||||
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
|
||||
|
||||
cmd_buffer->state.compute.shader = shader;
|
||||
if (shader == NULL)
|
||||
return;
|
||||
|
||||
cmd_buffer->state.compute.pipeline_dirty = true;
|
||||
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE, &shader->bind_map);
|
||||
|
||||
update_push_descriptor_flags(&comp_state->base,
|
||||
&cmd_buffer->state.compute.shader, 1);
|
||||
|
||||
if (shader->vk.ray_queries > 0) {
|
||||
assert(cmd_buffer->device->info->verx10 >= 125);
|
||||
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &comp_state->base,
|
||||
shader->vk.ray_queries,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bind_graphics_shaders(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_shader *new_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT])
|
||||
{
|
||||
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
|
||||
uint32_t ray_queries = 0;
|
||||
|
||||
static const enum anv_cmd_dirty_bits mesa_stage_to_dirty_bit[] = {
|
||||
[MESA_SHADER_VERTEX] = ANV_CMD_DIRTY_VS,
|
||||
[MESA_SHADER_TESS_CTRL] = ANV_CMD_DIRTY_HS,
|
||||
[MESA_SHADER_TESS_EVAL] = ANV_CMD_DIRTY_DS,
|
||||
[MESA_SHADER_GEOMETRY] = ANV_CMD_DIRTY_GS,
|
||||
[MESA_SHADER_TASK] = ANV_CMD_DIRTY_TASK,
|
||||
[MESA_SHADER_MESH] = ANV_CMD_DIRTY_MESH,
|
||||
[MESA_SHADER_FRAGMENT] = ANV_CMD_DIRTY_PS,
|
||||
};
|
||||
|
||||
gfx->active_stages = 0;
|
||||
gfx->instance_multiplier = 0;
|
||||
|
||||
mesa_shader_stage new_streamout_stage = -1;
|
||||
/* Find the last pre-rasterization stage */
|
||||
for (uint32_t i = 0; i < ANV_GRAPHICS_SHADER_STAGE_COUNT; i++) {
|
||||
mesa_shader_stage s = ANV_GRAPHICS_SHADER_STAGE_COUNT - i - 1;
|
||||
if (new_shaders[s] == NULL)
|
||||
continue;
|
||||
|
||||
assert(gfx->instance_multiplier == 0 ||
|
||||
gfx->instance_multiplier == new_shaders[s]->instance_multiplier);
|
||||
gfx->active_stages |= mesa_to_vk_shader_stage(s);
|
||||
gfx->instance_multiplier = new_shaders[s]->instance_multiplier;
|
||||
|
||||
if (s == MESA_SHADER_FRAGMENT ||
|
||||
s == MESA_SHADER_TASK ||
|
||||
s == MESA_SHADER_TESS_CTRL)
|
||||
continue;
|
||||
|
||||
new_streamout_stage = MAX2(new_streamout_stage, s);
|
||||
}
|
||||
|
||||
for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
|
||||
struct anv_shader *shader = new_shaders[s];
|
||||
|
||||
if (shader != NULL) {
|
||||
gfx->active_stages |= mesa_to_vk_shader_stage(s);
|
||||
|
||||
ray_queries = MAX2(ray_queries, shader->vk.ray_queries);
|
||||
if (gfx->shaders[s] != shader)
|
||||
set_dirty_for_bind_map(cmd_buffer, s, &shader->bind_map);
|
||||
}
|
||||
|
||||
if (gfx->shaders[s] != shader)
|
||||
gfx->dirty |= mesa_stage_to_dirty_bit[s];
|
||||
else
|
||||
continue;
|
||||
|
||||
#define diff_fix_state(bit, name) \
|
||||
do { \
|
||||
/* Fixed states should always have matching sizes */ \
|
||||
assert(gfx->shaders[s] == NULL || \
|
||||
gfx->shaders[s]->name.len == shader->name.len); \
|
||||
/* Don't bother memcmp if the state is already dirty */ \
|
||||
if (!BITSET_TEST(hw_state->pack_dirty, \
|
||||
ANV_GFX_STATE_##bit) && \
|
||||
(gfx->shaders[s] == NULL || \
|
||||
memcmp(&gfx->shaders[s]->cmd_data[ \
|
||||
gfx->shaders[s]->name.offset], \
|
||||
&shader->cmd_data[ \
|
||||
shader->name.offset], \
|
||||
4 * shader->name.len) != 0)) \
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
|
||||
} while (0)
|
||||
#define diff_var_state(bit, name) \
|
||||
do { \
|
||||
/* Don't bother memcmp if the state is already dirty */ \
|
||||
/* Also if the new state is empty, avoid marking dirty */ \
|
||||
if (!BITSET_TEST(hw_state->pack_dirty, \
|
||||
ANV_GFX_STATE_##bit) && \
|
||||
shader->name.len != 0 && \
|
||||
(gfx->shaders[s] == NULL || \
|
||||
gfx->shaders[s]->name.len != shader->name.len || \
|
||||
memcmp(&gfx->shaders[s]->cmd_data[ \
|
||||
gfx->shaders[s]->name.offset], \
|
||||
&shader->cmd_data[shader->name.offset], \
|
||||
4 * shader->name.len) != 0)) \
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
|
||||
} while (0)
|
||||
#define diff_fix_state_stage(bit, name, old_stage) \
|
||||
do { \
|
||||
/* Fixed states should always have matching sizes */ \
|
||||
assert(old_stage == MESA_SHADER_NONE || \
|
||||
gfx->shaders[old_stage] == NULL || \
|
||||
gfx->shaders[old_stage]->name.len == shader->name.len); \
|
||||
/* Don't bother memcmp if the state is already dirty */ \
|
||||
if (!BITSET_TEST(hw_state->pack_dirty, \
|
||||
ANV_GFX_STATE_##bit) && \
|
||||
(old_stage == MESA_SHADER_NONE || \
|
||||
gfx->shaders[old_stage] == NULL || \
|
||||
memcmp(&gfx->shaders[old_stage]->cmd_data[ \
|
||||
gfx->shaders[old_stage]->name.offset], \
|
||||
&shader->cmd_data[ \
|
||||
shader->name.offset], \
|
||||
4 * shader->name.len) != 0)) \
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
|
||||
} while (0)
|
||||
#define diff_var_state_stage(bit, name, old_stage) \
|
||||
do { \
|
||||
/* Don't bother memcmp if the state is already dirty */ \
|
||||
/* Also if the new state is empty, avoid marking dirty */ \
|
||||
if (!BITSET_TEST(hw_state->pack_dirty, \
|
||||
ANV_GFX_STATE_##bit) && \
|
||||
shader->name.len != 0 && \
|
||||
(gfx->shaders[old_stage] == NULL || \
|
||||
gfx->shaders[old_stage]->name.len != shader->name.len || \
|
||||
memcmp(&gfx->shaders[old_stage]->cmd_data[ \
|
||||
gfx->shaders[old_stage]->name.offset], \
|
||||
&shader->cmd_data[shader->name.offset], \
|
||||
4 * shader->name.len) != 0)) \
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_##bit); \
|
||||
} while (0)
|
||||
|
||||
switch (s) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (shader != NULL) {
|
||||
diff_fix_state(VS, vs.vs);
|
||||
diff_fix_state(VF_SGVS, vs.vf_sgvs);
|
||||
if (cmd_buffer->device->info->ver >= 11)
|
||||
diff_fix_state(VF_SGVS_2, vs.vf_sgvs_2);
|
||||
diff_fix_state(VF_COMPONENT_PACKING, vs.vf_component_packing);
|
||||
diff_var_state(VF_SGVS_INSTANCING, vs.vf_sgvs_instancing);
|
||||
gfx->vs_source_hash = shader->prog_data->source_hash;
|
||||
} else {
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_VS);
|
||||
}
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (shader != NULL)
|
||||
diff_fix_state(HS, hs.hs);
|
||||
else
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_HS);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (shader != NULL) {
|
||||
diff_fix_state(DS, ds.ds);
|
||||
diff_fix_state(TE, ds.te);
|
||||
} else {
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_DS);
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_TE);
|
||||
}
|
||||
break;
|
||||
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
if (shader != NULL)
|
||||
diff_fix_state(GS, gs.gs);
|
||||
else
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_GS);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_MESH:
|
||||
if (shader != NULL) {
|
||||
diff_fix_state(MESH_CONTROL, ms.control);
|
||||
diff_fix_state(MESH_SHADER, ms.shader);
|
||||
diff_fix_state(MESH_DISTRIB, ms.distrib);
|
||||
diff_fix_state(CLIP_MESH, ms.clip);
|
||||
} else {
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_MESH_CONTROL);
|
||||
}
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TASK:
|
||||
if (shader != NULL) {
|
||||
diff_fix_state(TASK_CONTROL, ts.control);
|
||||
diff_fix_state(TASK_SHADER, ts.shader);
|
||||
diff_fix_state(TASK_REDISTRIB, ts.redistrib);
|
||||
} else {
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_TASK_CONTROL);
|
||||
}
|
||||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
if (shader != NULL) {
|
||||
diff_fix_state(WM, ps.wm);
|
||||
diff_fix_state(PS, ps.ps);
|
||||
diff_fix_state(PS_EXTRA, ps.ps_extra);
|
||||
gfx->fs_source_hash = shader->prog_data->source_hash;
|
||||
} else {
|
||||
BITSET_SET(hw_state->pack_dirty, ANV_GFX_STATE_PS_EXTRA);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Invalid shader stage");
|
||||
}
|
||||
|
||||
/* Only diff those field on the streamout stage */
|
||||
if (s == new_streamout_stage) {
|
||||
diff_fix_state_stage(STREAMOUT, so, gfx->streamout_stage);
|
||||
diff_var_state_stage(SO_DECL_LIST, so_decl_list, gfx->streamout_stage);
|
||||
}
|
||||
|
||||
gfx->shaders[s] = shader;
|
||||
}
|
||||
|
||||
gfx->streamout_stage = new_streamout_stage;
|
||||
|
||||
#undef diff_fix_state
|
||||
#undef diff_var_state
|
||||
#undef diff_fix_state_stage
|
||||
#undef diff_var_state_stage
|
||||
|
||||
update_push_descriptor_flags(&gfx->base,
|
||||
cmd_buffer->state.gfx.shaders,
|
||||
ARRAY_SIZE(cmd_buffer->state.gfx.shaders));
|
||||
|
||||
if (ray_queries > 0) {
|
||||
assert(cmd_buffer->device->info->verx10 >= 125);
|
||||
anv_cmd_buffer_set_rt_query_buffer(cmd_buffer, &gfx->base, ray_queries,
|
||||
cmd_buffer->state.gfx.active_stages);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_cmd_buffer_bind_shaders(struct vk_command_buffer *vk_cmd_buffer,
|
||||
uint32_t stage_count,
|
||||
const mesa_shader_stage *stages,
|
||||
struct vk_shader ** const vk_shaders)
|
||||
{
|
||||
struct anv_shader ** const shaders = (struct anv_shader ** const)vk_shaders;
|
||||
struct anv_cmd_buffer *cmd_buffer =
|
||||
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
|
||||
|
||||
/* Append any scratch surface used by the shaders */
|
||||
for (uint32_t i = 0; i < stage_count; i++) {
|
||||
if (shaders[i] != NULL) {
|
||||
anv_reloc_list_append(cmd_buffer->batch.relocs,
|
||||
&shaders[i]->relocs);
|
||||
}
|
||||
}
|
||||
|
||||
struct anv_shader *cs_shader = cmd_buffer->state.compute.shader;
|
||||
struct anv_shader *gfx_shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
|
||||
memcpy(gfx_shaders, cmd_buffer->state.gfx.shaders, sizeof(gfx_shaders));
|
||||
for (uint32_t i = 0; i < stage_count; i++) {
|
||||
if (mesa_shader_stage_is_compute(stages[i]))
|
||||
cs_shader = shaders[i];
|
||||
else
|
||||
gfx_shaders[stages[i]] = shaders[i];
|
||||
}
|
||||
|
||||
if (cs_shader != cmd_buffer->state.compute.shader)
|
||||
bind_compute_shader(cmd_buffer, cs_shader);
|
||||
if (memcmp(gfx_shaders, cmd_buffer->state.gfx.shaders, sizeof(gfx_shaders)))
|
||||
bind_graphics_shaders(cmd_buffer, gfx_shaders);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "anv_private.h"
|
||||
#include "anv_measure.h"
|
||||
#include "anv_shader.h"
|
||||
#include "anv_slab_bo.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/os_file.h"
|
||||
|
|
@ -380,6 +381,8 @@ VkResult anv_CreateDevice(
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_alloc;
|
||||
|
||||
device->vk.shader_ops = &anv_device_shader_ops;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BATCH) || INTEL_DEBUG(DEBUG_BATCH_STATS)) {
|
||||
for (unsigned i = 0; i < physical_device->queue.family_count; i++) {
|
||||
struct intel_batch_decode_ctx *decoder = &device->decoder[i];
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ uint32_t
|
|||
genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipe_state,
|
||||
const VkShaderStageFlags dirty,
|
||||
const struct anv_shader_bin **shaders,
|
||||
const struct anv_shader **shaders,
|
||||
uint32_t num_shaders);
|
||||
|
||||
void genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
|
|
|||
|
|
@ -173,17 +173,29 @@ anv_pipeline_finish(struct anv_pipeline *pipeline,
|
|||
vk_object_base_finish(&pipeline->vk.base);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
vk_common_DestroyPipeline(VkDevice _device,
|
||||
VkPipeline _pipeline,
|
||||
const VkAllocationCallbacks *pAllocator);
|
||||
|
||||
void anv_DestroyPipeline(
|
||||
VkDevice _device,
|
||||
VkPipeline _pipeline,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
VK_FROM_HANDLE(vk_pipeline, vk_pipeline, _pipeline);
|
||||
|
||||
if (!pipeline)
|
||||
if (!vk_pipeline)
|
||||
return;
|
||||
|
||||
if (vk_pipeline->ops != NULL) {
|
||||
vk_common_DestroyPipeline(_device, _pipeline, pAllocator);
|
||||
return;
|
||||
}
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
|
||||
ANV_RMV(resource_destroy, device, pipeline);
|
||||
|
||||
switch (pipeline->type) {
|
||||
|
|
@ -2851,6 +2863,7 @@ anv_compute_pipeline_create(struct anv_device *device,
|
|||
return pipeline->base.batch.status;
|
||||
}
|
||||
|
||||
#if 0
|
||||
VkResult anv_CreateComputePipelines(
|
||||
VkDevice _device,
|
||||
VkPipelineCache pipelineCache,
|
||||
|
|
@ -2885,6 +2898,7 @@ VkResult anv_CreateComputePipelines(
|
|||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static uint32_t
|
||||
get_vs_input_elements(const struct brw_vs_prog_data *vs_prog_data)
|
||||
|
|
@ -3343,6 +3357,7 @@ anv_graphics_pipeline_create(struct anv_device *device,
|
|||
return pipeline->base.base.batch.status;
|
||||
}
|
||||
|
||||
#if 0
|
||||
VkResult anv_CreateGraphicsPipelines(
|
||||
VkDevice _device,
|
||||
VkPipelineCache pipelineCache,
|
||||
|
|
@ -3388,6 +3403,7 @@ VkResult anv_CreateGraphicsPipelines(
|
|||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
should_remat_cb(nir_instr *instr, void *data)
|
||||
|
|
@ -4083,6 +4099,7 @@ anv_ray_tracing_pipeline_create(
|
|||
return pipeline->base.batch.status;
|
||||
}
|
||||
|
||||
#if 0
|
||||
VkResult
|
||||
anv_CreateRayTracingPipelinesKHR(
|
||||
VkDevice _device,
|
||||
|
|
@ -4491,3 +4508,4 @@ anv_GetRayTracingShaderGroupStackSizeKHR(
|
|||
|
||||
return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1224,7 +1224,6 @@ struct anv_shader {
|
|||
struct anv_state kernel;
|
||||
|
||||
const struct brw_stage_prog_data *prog_data;
|
||||
uint32_t prog_data_size;
|
||||
|
||||
struct brw_compile_stats stats[3];
|
||||
uint32_t num_stats;
|
||||
|
|
@ -2186,6 +2185,11 @@ struct anv_gfx_dynamic_state {
|
|||
uint32_t PrimitiveTopologyType;
|
||||
} vft;
|
||||
|
||||
/* 3DSTATE_VS */
|
||||
struct {
|
||||
bool VertexCacheDisable;
|
||||
} vs;
|
||||
|
||||
/* 3DSTATE_VIEWPORT_STATE_POINTERS_CC */
|
||||
struct {
|
||||
uint32_t count;
|
||||
|
|
@ -4422,7 +4426,7 @@ struct anv_cmd_graphics_state {
|
|||
struct anv_cmd_pipeline_state base;
|
||||
|
||||
/* Shaders bound */
|
||||
struct anv_shader_bin *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
|
||||
struct anv_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
|
||||
|
||||
/* Bitfield of valid entries in the shaders array */
|
||||
VkShaderStageFlags active_stages;
|
||||
|
|
@ -4436,6 +4440,9 @@ struct anv_cmd_graphics_state {
|
|||
bool kill_pixel;
|
||||
bool uses_xfb;
|
||||
|
||||
/* Shader stage in base.shaders[] responsible for streamout */
|
||||
mesa_shader_stage streamout_stage;
|
||||
|
||||
/* Render pass information */
|
||||
VkRenderingFlags rendering_flags;
|
||||
VkRect2D render_area;
|
||||
|
|
@ -4530,7 +4537,7 @@ struct anv_cmd_graphics_state {
|
|||
struct anv_cmd_compute_state {
|
||||
struct anv_cmd_pipeline_state base;
|
||||
|
||||
struct anv_shader_bin *shader;
|
||||
struct anv_shader *shader;
|
||||
|
||||
bool pipeline_dirty;
|
||||
|
||||
|
|
@ -4551,6 +4558,8 @@ struct anv_cmd_ray_tracing_state {
|
|||
struct brw_rt_scratch_layout layout;
|
||||
} scratch;
|
||||
|
||||
VkDeviceSize scratch_size;
|
||||
|
||||
uint32_t debug_marker_count;
|
||||
uint32_t num_tlas;
|
||||
uint32_t num_blas;
|
||||
|
|
@ -5022,6 +5031,12 @@ void
|
|||
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
|
||||
enum anv_pipe_bits flushed_bits);
|
||||
|
||||
void
|
||||
anv_cmd_buffer_bind_shaders(struct vk_command_buffer *cmd_buffer,
|
||||
uint32_t stage_count,
|
||||
const mesa_shader_stage *stages,
|
||||
struct vk_shader ** const shaders);
|
||||
|
||||
/**
|
||||
* A allocation tied to a command buffer.
|
||||
*
|
||||
|
|
@ -5083,7 +5098,7 @@ enum anv_cmd_saved_state_flags {
|
|||
struct anv_cmd_saved_state {
|
||||
uint32_t flags;
|
||||
|
||||
struct anv_pipeline *pipeline;
|
||||
struct vk_shader *shader;
|
||||
struct anv_descriptor_set *descriptor_set[MAX_SETS];
|
||||
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
|
||||
};
|
||||
|
|
@ -5444,7 +5459,6 @@ struct anv_graphics_pipeline {
|
|||
4 * _cmd_state->len); \
|
||||
} while (0)
|
||||
|
||||
|
||||
struct anv_compute_pipeline {
|
||||
struct anv_pipeline base;
|
||||
|
||||
|
|
@ -6484,6 +6498,15 @@ anv_cmd_flush_buffer_write_cp(VkCommandBuffer cmd_buffer);
|
|||
VkResult
|
||||
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void
|
||||
anv_cmd_buffer_set_rt_state(struct vk_command_buffer *vk_cmd_buffer,
|
||||
VkDeviceSize scratch_size,
|
||||
uint32_t ray_queries);
|
||||
|
||||
void
|
||||
anv_cmd_buffer_set_stack_size(struct vk_command_buffer *vk_cmd_buffer,
|
||||
VkDeviceSize stack_size);
|
||||
|
||||
bool
|
||||
anv_can_hiz_clear_image(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_image *image,
|
||||
|
|
|
|||
|
|
@ -1886,5 +1886,8 @@ struct vk_device_shader_ops anv_device_shader_ops = {
|
|||
.deserialize = anv_shader_deserialize,
|
||||
.write_rt_shader_group = anv_write_rt_shader_group,
|
||||
.write_rt_shader_group_replay_handle = anv_write_rt_shader_group_replay_handle,
|
||||
.cmd_bind_shaders = anv_cmd_buffer_bind_shaders,
|
||||
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
|
||||
.cmd_set_rt_state = anv_cmd_buffer_set_rt_state,
|
||||
.cmd_set_stack_size = anv_cmd_buffer_set_stack_size,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -2121,7 +2121,7 @@ emit_direct_descriptor_binding_table_entry(struct anv_cmd_buffer *cmd_buffer,
|
|||
static VkResult
|
||||
emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipe_state,
|
||||
const struct anv_shader_bin *shader,
|
||||
const struct anv_shader *shader,
|
||||
struct anv_state *bt_state)
|
||||
{
|
||||
uint32_t state_offset;
|
||||
|
|
@ -2153,7 +2153,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
|
||||
/* Color attachment binding */
|
||||
assert(shader->stage == MESA_SHADER_FRAGMENT);
|
||||
assert(shader->vk.stage == MESA_SHADER_FRAGMENT);
|
||||
uint32_t index = binding->index < MAX_RTS ?
|
||||
cmd_buffer->state.gfx.color_output_mapping[binding->index] :
|
||||
binding->index;
|
||||
|
|
@ -2268,7 +2268,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
static VkResult
|
||||
emit_samplers(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipe_state,
|
||||
const struct anv_shader_bin *shader,
|
||||
const struct anv_shader *shader,
|
||||
struct anv_state *state)
|
||||
{
|
||||
const struct anv_pipeline_bind_map *map = &shader->bind_map;
|
||||
|
|
@ -2312,7 +2312,7 @@ uint32_t
|
|||
genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipe_state,
|
||||
const VkShaderStageFlags dirty,
|
||||
const struct anv_shader_bin **shaders,
|
||||
const struct anv_shader **shaders,
|
||||
uint32_t num_shaders)
|
||||
{
|
||||
VkShaderStageFlags flushed = 0;
|
||||
|
|
@ -2322,7 +2322,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (!shaders[i])
|
||||
continue;
|
||||
|
||||
mesa_shader_stage stage = shaders[i]->stage;
|
||||
mesa_shader_stage stage = shaders[i]->vk.stage;
|
||||
VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(stage);
|
||||
if ((vk_stage & dirty) == 0)
|
||||
continue;
|
||||
|
|
@ -2361,7 +2361,7 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (!shaders[i])
|
||||
continue;
|
||||
|
||||
mesa_shader_stage stage = shaders[i]->stage;
|
||||
mesa_shader_stage stage = shaders[i]->vk.stage;
|
||||
|
||||
result = emit_samplers(cmd_buffer, pipe_state, shaders[i],
|
||||
&cmd_buffer->state.samplers[stage]);
|
||||
|
|
|
|||
|
|
@ -105,13 +105,11 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
|
||||
const UNUSED struct intel_device_info *devinfo = cmd_buffer->device->info;
|
||||
struct anv_compute_pipeline *pipeline =
|
||||
anv_pipeline_to_compute(comp_state->base.pipeline);
|
||||
|
||||
assert(comp_state->shader);
|
||||
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer,
|
||||
pipeline->cs->prog_data->total_shared > 0 ?
|
||||
comp_state->shader->prog_data->total_shared > 0 ?
|
||||
device->l3_slm_config : device->l3_config);
|
||||
|
||||
genX(cmd_buffer_update_color_aux_op(cmd_buffer, ISL_AUX_OP_NONE));
|
||||
|
|
@ -127,7 +125,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
*/
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.compute.pipeline_dirty) {
|
||||
if (comp_state->pipeline_dirty) {
|
||||
#if GFX_VERx10 < 125
|
||||
/* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE:
|
||||
*
|
||||
|
|
@ -143,13 +141,28 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
|
||||
#define anv_batch_emit_cs(batch, cmd, field) ({ \
|
||||
void *__dst = anv_batch_emit_dwords( \
|
||||
batch, __anv_cmd_length(cmd)); \
|
||||
memcpy(__dst, \
|
||||
&comp_state->shader->cmd_data[ \
|
||||
comp_state->shader->field.offset], \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
|
||||
__dst, __anv_cmd_length(cmd) * 4)); \
|
||||
__dst; \
|
||||
})
|
||||
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(comp_state);
|
||||
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, prog_data->base.total_scratch);
|
||||
#else
|
||||
anv_batch_emit_cs(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), cs.gfx9.vfe);
|
||||
#endif
|
||||
|
||||
#undef anv_batch_emit_cs
|
||||
|
||||
/* Changing the pipeline affects the push constants layout (different
|
||||
* amount of cross/per thread allocations). The allocation is also
|
||||
* bounded to just the amount consummed by the pipeline (see
|
||||
|
|
@ -179,7 +192,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
cmd_buffer,
|
||||
&cmd_buffer->state.compute.base,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const struct anv_shader_bin **)&comp_state->shader, 1);
|
||||
(const struct anv_shader **)&comp_state->shader, 1);
|
||||
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
#if GFX_VERx10 < 125
|
||||
|
|
@ -194,7 +207,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
struct anv_state state =
|
||||
anv_cmd_buffer_merge_dynamic(cmd_buffer, iface_desc_data_dw,
|
||||
pipeline->gfx9.interface_descriptor_data,
|
||||
comp_state->shader->cs.gfx9.idd,
|
||||
GENX(INTERFACE_DESCRIPTOR_DATA_length),
|
||||
64);
|
||||
|
||||
|
|
@ -439,7 +452,7 @@ emit_indirect_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
&cmd_buffer->batch,
|
||||
GENX(EXECUTE_INDIRECT_DISPATCH_length),
|
||||
GENX(EXECUTE_INDIRECT_DISPATCH_body_start) / 32,
|
||||
anv_pipeline_to_compute(comp_state->base.pipeline)->gfx125.compute_walker_body,
|
||||
comp_state->shader->cs.gfx125.compute_walker_body,
|
||||
GENX(EXECUTE_INDIRECT_DISPATCH),
|
||||
.PredicateEnable = predicate,
|
||||
.MaxCount = 1,
|
||||
|
|
@ -520,7 +533,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
&cmd_buffer->batch,
|
||||
GENX(COMPUTE_WALKER_length),
|
||||
GENX(COMPUTE_WALKER_body_start) / 32,
|
||||
anv_pipeline_to_compute(comp_state->base.pipeline)->gfx125.compute_walker_body,
|
||||
comp_state->shader->cs.gfx125.compute_walker_body,
|
||||
GENX(COMPUTE_WALKER),
|
||||
.IndirectParameterEnable = !anv_address_is_null(indirect_addr),
|
||||
.PredicateEnable = predicate,
|
||||
|
|
@ -1051,8 +1064,6 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
|
||||
struct anv_ray_tracing_pipeline *pipeline =
|
||||
anv_pipeline_to_ray_tracing(rt->base.pipeline);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_RT_NO_TRACE))
|
||||
return;
|
||||
|
|
@ -1211,18 +1222,18 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
*/
|
||||
btd.PerDSSMemoryBackedBufferSize = 6;
|
||||
btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo };
|
||||
if (pipeline->base.scratch_size > 0) {
|
||||
if (rt->scratch_size > 0) {
|
||||
struct anv_bo *scratch_bo =
|
||||
anv_scratch_pool_alloc(device,
|
||||
&device->scratch_pool,
|
||||
MESA_SHADER_COMPUTE,
|
||||
pipeline->base.scratch_size);
|
||||
rt->scratch_size);
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
scratch_bo);
|
||||
uint32_t scratch_surf =
|
||||
anv_scratch_pool_get_surf(cmd_buffer->device,
|
||||
&device->scratch_pool,
|
||||
pipeline->base.scratch_size);
|
||||
rt->scratch_size);
|
||||
btd.ScratchSpaceBuffer = scratch_surf >> ANV_SCRATCH_SPACE_SHIFT(GFX_VER);
|
||||
}
|
||||
#if INTEL_NEEDS_WA_14017794102 || INTEL_NEEDS_WA_14023061436
|
||||
|
|
@ -1234,7 +1245,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
#endif
|
||||
}
|
||||
|
||||
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, pipeline->base.scratch_size);
|
||||
genX(cmd_buffer_ensure_cfe_state)(cmd_buffer, rt->scratch_size);
|
||||
|
||||
const struct brw_cs_prog_data *cs_prog_data =
|
||||
brw_cs_prog_data_const(device->rt_trampoline->prog_data);
|
||||
|
|
@ -1273,7 +1284,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
|
|||
.ThreadGroupIDZDimension = global_size[2],
|
||||
.ExecutionMask = 0xff,
|
||||
.EmitInlineParameter = true,
|
||||
.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
|
||||
.PostSync.MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
|
||||
#if GFX_VER >= 30
|
||||
/* HSD 14016252163 */
|
||||
.DispatchWalkOrder = cs_prog_data->uses_sampler ? MortonWalk : LinearWalk,
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
static struct anv_address
|
||||
get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_shader_bin *shader,
|
||||
const struct anv_shader *shader,
|
||||
const struct anv_push_range *range)
|
||||
{
|
||||
struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
|
||||
|
|
@ -242,10 +242,10 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
|
|||
*/
|
||||
static uint32_t
|
||||
get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_shader_bin *shader,
|
||||
const struct anv_shader *shader,
|
||||
const struct anv_push_range *range)
|
||||
{
|
||||
assert(shader->stage != MESA_SHADER_COMPUTE);
|
||||
assert(shader->vk.stage != MESA_SHADER_COMPUTE);
|
||||
const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
|
||||
switch (range->set) {
|
||||
case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
|
||||
|
|
@ -443,7 +443,7 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (!anv_gfx_has_stage(gfx, stage))
|
||||
continue;
|
||||
|
||||
const struct anv_shader_bin *shader = gfx->shaders[stage];
|
||||
const struct anv_shader *shader = gfx->shaders[stage];
|
||||
if (shader->prog_data->robust_ubo_ranges) {
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
struct anv_push_constants *push = &gfx->base.push_constants;
|
||||
|
|
@ -509,7 +509,7 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
struct anv_address buffers[4] = {};
|
||||
if (anv_gfx_has_stage(gfx, stage)) {
|
||||
const struct anv_shader_bin *shader = gfx->shaders[stage];
|
||||
const struct anv_shader *shader = gfx->shaders[stage];
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
|
||||
/* We have to gather buffer addresses as a second step because the
|
||||
|
|
@ -593,7 +593,7 @@ get_mesh_task_push_addr64(struct anv_cmd_buffer *cmd_buffer,
|
|||
struct anv_cmd_graphics_state *gfx,
|
||||
mesa_shader_stage stage)
|
||||
{
|
||||
const struct anv_shader_bin *shader = gfx->shaders[stage];
|
||||
const struct anv_shader *shader = gfx->shaders[stage];
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||
if (bind_map->push_ranges[0].length == 0)
|
||||
return 0;
|
||||
|
|
@ -645,31 +645,50 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
ALWAYS_INLINE static void
|
||||
cmd_buffer_maybe_flush_rt_writes(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_graphics_pipeline *pipeline)
|
||||
struct anv_cmd_graphics_state *gfx,
|
||||
const struct vk_dynamic_graphics_state *dyn)
|
||||
{
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
|
||||
if (!anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT))
|
||||
return;
|
||||
|
||||
UNUSED bool need_rt_flush = false;
|
||||
for (uint32_t rt = 0; rt < pipeline->num_color_outputs; rt++) {
|
||||
/* No writes going to this render target so it won't affect the RT cache
|
||||
*/
|
||||
if (pipeline->color_output_mapping[rt] == ANV_COLOR_OUTPUT_UNUSED)
|
||||
continue;
|
||||
/* Count the number of color attachments in the binding table */
|
||||
const struct anv_pipeline_bind_map *bind_map =
|
||||
&gfx->shaders[MESA_SHADER_FRAGMENT]->bind_map;
|
||||
|
||||
/* No change */
|
||||
if (cmd_buffer->state.gfx.color_output_mapping[rt] ==
|
||||
pipeline->color_output_mapping[rt])
|
||||
continue;
|
||||
|
||||
cmd_buffer->state.gfx.color_output_mapping[rt] =
|
||||
pipeline->color_output_mapping[rt];
|
||||
need_rt_flush = true;
|
||||
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
/* Build a map of fragment color output to attachment */
|
||||
uint8_t rt_to_att[MAX_RTS];
|
||||
memset(rt_to_att, ANV_COLOR_OUTPUT_DISABLED, MAX_RTS);
|
||||
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
||||
if (dyn->cal.color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
|
||||
rt_to_att[dyn->cal.color_map[i]] = i;
|
||||
}
|
||||
|
||||
/* For each fragment shader output if not unused apply the remapping to
|
||||
* pipeline->color_output_mapping
|
||||
*/
|
||||
UNUSED bool need_rt_flush = false;
|
||||
for (unsigned rt = 0; rt < MIN2(bind_map->surface_count, MAX_RTS); rt++) {
|
||||
if (bind_map->surface_to_descriptor[rt].set !=
|
||||
ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
|
||||
break;
|
||||
|
||||
uint32_t index = bind_map->surface_to_descriptor[rt].index;
|
||||
if (index == ANV_COLOR_OUTPUT_UNUSED)
|
||||
continue;
|
||||
|
||||
if (index == ANV_COLOR_OUTPUT_DISABLED &&
|
||||
gfx->color_output_mapping[rt] != index) {
|
||||
gfx->color_output_mapping[rt] = index;
|
||||
need_rt_flush = true;
|
||||
} else if (gfx->color_output_mapping[rt] != rt_to_att[rt]) {
|
||||
gfx->color_output_mapping[rt] = rt_to_att[rt];
|
||||
need_rt_flush = true;
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 11
|
||||
if (need_rt_flush) {
|
||||
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
#if GFX_VER >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
|
|
@ -689,9 +708,9 @@ cmd_buffer_maybe_flush_rt_writes(struct anv_cmd_buffer *cmd_buffer,
|
|||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
|
||||
"change RT due to shader outputs");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
cmd_buffer_flush_vertex_buffers(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
|
@ -750,8 +769,6 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
struct anv_graphics_pipeline *pipeline =
|
||||
anv_pipeline_to_graphics(gfx->base.pipeline);
|
||||
const struct vk_dynamic_graphics_state *dyn =
|
||||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
|
||||
|
|
@ -772,16 +789,16 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
*
|
||||
* Apply task URB workaround when switching from task to primitive.
|
||||
*/
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
if (!anv_gfx_has_stage(gfx, MESA_SHADER_MESH)) {
|
||||
genX(apply_task_urb_workaround)(cmd_buffer);
|
||||
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
|
||||
} else if (anv_gfx_has_stage(gfx, MESA_SHADER_TASK)) {
|
||||
cmd_buffer->state.gfx.used_task_shader = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP) ||
|
||||
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PS))
|
||||
cmd_buffer_maybe_flush_rt_writes(cmd_buffer, pipeline);
|
||||
cmd_buffer_maybe_flush_rt_writes(cmd_buffer, gfx, dyn);
|
||||
|
||||
/* Apply any pending pipeline flushes we may have. We want to apply them
|
||||
* now because, if any of those flushes are for things like push constants,
|
||||
|
|
@ -887,17 +904,29 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
/* If the pipeline changed, we may need to re-allocate push constant space
|
||||
* in the URB.
|
||||
*/
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PUSH_CONSTANT_SHADERS) {
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PUSH_CONSTANT_SHADERS)
|
||||
cmd_buffer_alloc_gfx_push_constants(cmd_buffer);
|
||||
|
||||
#if GFX_VERx10 < 125
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_VS |
|
||||
ANV_CMD_DIRTY_HS |
|
||||
ANV_CMD_DIRTY_DS |
|
||||
ANV_CMD_DIRTY_GS |
|
||||
ANV_CMD_DIRTY_PS)) {
|
||||
for (unsigned s = 0; s <= MESA_SHADER_FRAGMENT; s++) {
|
||||
if (gfx->shaders[s] == NULL)
|
||||
continue;
|
||||
|
||||
/* Also add the relocations (scratch buffers) */
|
||||
VkResult result = anv_reloc_list_append(cmd_buffer->batch.relocs,
|
||||
pipeline->base.base.batch.relocs);
|
||||
&gfx->shaders[s]->relocs);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Render targets live in the same binding table as fragment descriptors */
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS)
|
||||
|
|
@ -916,7 +945,7 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
|
|||
cmd_buffer,
|
||||
&cmd_buffer->state.gfx.base,
|
||||
descriptors_dirty,
|
||||
(const struct anv_shader_bin **)gfx->shaders,
|
||||
(const struct anv_shader **)gfx->shaders,
|
||||
ARRAY_SIZE(gfx->shaders));
|
||||
cmd_buffer->state.descriptors_dirty &= ~dirty;
|
||||
}
|
||||
|
|
@ -989,23 +1018,13 @@ anv_use_generated_draws(const struct anv_cmd_buffer *cmd_buffer, uint32_t count)
|
|||
ALWAYS_INLINE static void
|
||||
cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
UNUSED const struct anv_device *device = cmd_buffer->device;
|
||||
UNUSED const struct anv_instance *instance =
|
||||
device->physical->instance;
|
||||
UNUSED const bool protected = cmd_buffer->vk.pool->flags &
|
||||
VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
|
||||
UNUSED struct anv_graphics_pipeline *pipeline =
|
||||
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
|
||||
UNUSED struct anv_device *device = cmd_buffer->device;
|
||||
UNUSED struct anv_instance *instance = device->physical->instance;
|
||||
|
||||
#define DEBUG_SHADER_HASH(stage) do { \
|
||||
if (unlikely( \
|
||||
(instance->debug & ANV_DEBUG_SHADER_HASH) && \
|
||||
anv_pipeline_has_stage(pipeline, stage))) { \
|
||||
mi_store(&b, \
|
||||
mi_mem32(device->workaround_address), \
|
||||
mi_imm(pipeline->base.shaders[stage]-> \
|
||||
prog_data->source_hash)); \
|
||||
} \
|
||||
} while (0)
|
||||
UNUSED struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
UNUSED struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
|
||||
|
||||
struct mi_builder b;
|
||||
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) {
|
||||
|
|
@ -1013,18 +1032,35 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
|
|||
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
|
||||
}
|
||||
|
||||
#define DEBUG_SHADER_HASH(stage) do { \
|
||||
if (unlikely(instance->debug & ANV_DEBUG_SHADER_HASH)) { \
|
||||
mi_store(&b, \
|
||||
mi_mem32(device->workaround_address), \
|
||||
mi_imm(gfx->shaders[stage]->prog_data->source_hash)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define anv_batch_emit_gfx(batch, cmd, name) ({ \
|
||||
void *__dst = anv_batch_emit_dwords( \
|
||||
batch, __anv_cmd_length(cmd)); \
|
||||
memcpy(__dst, hw_state->packed.name, \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
VG(VALGRIND_CHECK_MEM_IS_DEFINED( \
|
||||
__dst, __anv_cmd_length(cmd) * 4)); \
|
||||
__dst; \
|
||||
})
|
||||
|
||||
#if INTEL_WA_16011107343_GFX_VER
|
||||
if (intel_needs_workaround(cmd_buffer->device->info, 16011107343) &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
|
||||
anv_gfx_has_stage(gfx, MESA_SHADER_TESS_CTRL)) {
|
||||
DEBUG_SHADER_HASH(MESA_SHADER_TESS_CTRL);
|
||||
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
|
||||
final.hs, protected);
|
||||
anv_batch_emit_gfx(&cmd_buffer->batch, GENX(3DSTATE_HS), hs);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INTEL_WA_22018402687_GFX_VER
|
||||
if (intel_needs_workaround(cmd_buffer->device->info, 22018402687) &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
|
||||
anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
|
||||
DEBUG_SHADER_HASH(MESA_SHADER_TESS_EVAL);
|
||||
/* Wa_22018402687:
|
||||
* In any 3D enabled context, just before any Tessellation enabled
|
||||
|
|
@ -1038,13 +1074,13 @@ cmd_buffer_pre_draw_wa(struct anv_cmd_buffer *cmd_buffer)
|
|||
* said switch, as it matters at the HW level, and can be triggered even
|
||||
* across processes, so we apply the Wa at all times.
|
||||
*/
|
||||
anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
|
||||
final.ds, protected);
|
||||
anv_batch_emit_gfx(&cmd_buffer->batch, GENX(3DSTATE_DS), ds);
|
||||
}
|
||||
#endif
|
||||
|
||||
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
|
||||
|
||||
#undef anv_batch_emit_gfx
|
||||
#undef DEBUG_SHADER_HASH
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -96,18 +96,10 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
|||
ANV_STATE_NULL;
|
||||
UNUSED uint32_t wa_insts_offset = 0;
|
||||
|
||||
#if INTEL_WA_16011107343_GFX_VER || INTEL_WA_22018402687_GFX_VER
|
||||
struct anv_graphics_pipeline *pipeline =
|
||||
anv_pipeline_to_graphics(gfx->base.pipeline);
|
||||
#endif
|
||||
|
||||
#if INTEL_WA_16011107343_GFX_VER
|
||||
if (wa_16011107343) {
|
||||
memcpy(wa_insts_state.map + wa_insts_offset,
|
||||
&pipeline->batch_data[
|
||||
protected ?
|
||||
pipeline->final.hs_protected.offset :
|
||||
pipeline->final.hs.offset],
|
||||
gfx->dyn_state.packed.hs,
|
||||
GENX(3DSTATE_HS_length) * 4);
|
||||
wa_insts_offset += GENX(3DSTATE_HS_length) * 4;
|
||||
}
|
||||
|
|
@ -116,10 +108,7 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
|||
#if INTEL_WA_22018402687_GFX_VER
|
||||
if (wa_22018402687) {
|
||||
memcpy(wa_insts_state.map + wa_insts_offset,
|
||||
&pipeline->batch_data[
|
||||
protected ?
|
||||
pipeline->final.ds_protected.offset :
|
||||
pipeline->final.ds.offset],
|
||||
gfx->dyn_state.packed.ds,
|
||||
GENX(3DSTATE_DS_length) * 4);
|
||||
wa_insts_offset += GENX(3DSTATE_DS_length) * 4;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer,
|
|||
if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
|
||||
return;
|
||||
|
||||
if (gfx->uses_xfb) {
|
||||
if (gfx->shaders[gfx->streamout_stage]->xfb_info != NULL) {
|
||||
genX(cmd_buffer_set_preemption)(cmd_buffer, false);
|
||||
return;
|
||||
}
|
||||
|
|
@ -417,10 +417,10 @@ want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
|
|||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
|
||||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
|
||||
*/
|
||||
struct anv_shader_bin *fs_bin = gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
struct anv_shader *fs = gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
return kill_pixel(wm_prog_data, dyn) ||
|
||||
has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
|
||||
has_ds_feedback_loop(&fs->bind_map, dyn) ||
|
||||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
|
||||
}
|
||||
|
||||
|
|
@ -1012,21 +1012,21 @@ update_ps(struct anv_gfx_dynamic_state *hw_state,
|
|||
return;
|
||||
}
|
||||
|
||||
const struct anv_shader_bin *fs_bin = gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
const struct anv_shader *fs = gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
struct GENX(3DSTATE_PS) ps = {};
|
||||
intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
|
||||
MAX2(dyn->ms.rasterization_samples, 1),
|
||||
hw_state->fs_msaa_flags);
|
||||
|
||||
SET(PS, ps.KernelStartPointer0,
|
||||
fs_bin->kernel.offset +
|
||||
fs->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
|
||||
SET(PS, ps.KernelStartPointer1,
|
||||
fs_bin->kernel.offset +
|
||||
fs->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
|
||||
#if GFX_VER < 20
|
||||
SET(PS, ps.KernelStartPointer2,
|
||||
fs_bin->kernel.offset +
|
||||
fs->kernel.offset +
|
||||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
|
||||
#endif
|
||||
|
||||
|
|
@ -1124,12 +1124,12 @@ update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
|
|||
const struct vk_dynamic_graphics_state *dyn,
|
||||
const struct anv_cmd_graphics_state *gfx)
|
||||
{
|
||||
struct anv_shader_bin *fs_bin = gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
struct anv_shader *fs = gfx->shaders[MESA_SHADER_FRAGMENT];
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_gfx_wm_prog_data(gfx);
|
||||
|
||||
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
|
||||
wm_prog_data &&
|
||||
(has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
|
||||
(has_ds_feedback_loop(&fs->bind_map, dyn) ||
|
||||
wm_prog_data->uses_kill),
|
||||
FRAGMENT);
|
||||
}
|
||||
|
|
@ -2174,6 +2174,35 @@ update_tbimr_info(struct anv_gfx_dynamic_state *hw_state,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 90
|
||||
ALWAYS_INLINE static void
|
||||
update_vs(struct anv_gfx_dynamic_state *hw_state,
|
||||
const struct anv_cmd_graphics_state *gfx,
|
||||
const struct anv_device *device)
|
||||
{
|
||||
if (device->info->gt < 4)
|
||||
return;
|
||||
|
||||
/* On Sky Lake GT4, we have experienced some hangs related to the VS cache
|
||||
* and tessellation. It is unknown exactly what is happening but the
|
||||
* Haswell docs for the "VS Reference Count Full Force Miss Enable" field
|
||||
* of the "Thread Mode" register refer to a HSW bug in which the VUE handle
|
||||
* reference count would overflow resulting in internal reference counting
|
||||
* bugs. My (Faith's) best guess is that this bug cropped back up on SKL
|
||||
* GT4 when we suddenly had more threads in play than any previous gfx9
|
||||
* hardware.
|
||||
*
|
||||
* What we do know for sure is that setting this bit when tessellation
|
||||
* shaders are in use fixes a GPU hang in Batman: Arkham City when playing
|
||||
* with DXVK (https://bugs.freedesktop.org/107280). Disabling the vertex
|
||||
* cache with tessellation shaders should only have a minor performance
|
||||
* impact as the tessellation shaders are likely generating and processing
|
||||
* far more geometry than the vertex stage.
|
||||
*/
|
||||
SET(VS, vs.VertexCacheDisable, anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if INTEL_WA_18019110168_GFX_VER
|
||||
static inline unsigned
|
||||
compute_mesh_provoking_vertex(const struct brw_mesh_prog_data *mesh_prog_data,
|
||||
|
|
@ -2215,11 +2244,13 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
const struct anv_device *device,
|
||||
const struct vk_dynamic_graphics_state *dyn,
|
||||
struct anv_cmd_graphics_state *gfx,
|
||||
const struct anv_graphics_pipeline *pipeline,
|
||||
VkCommandBufferLevel cmd_buffer_level)
|
||||
{
|
||||
UNUSED bool fs_msaa_changed = false;
|
||||
|
||||
assert(gfx->shaders[gfx->streamout_stage] != NULL);
|
||||
assert(gfx->instance_multiplier != 0);
|
||||
|
||||
/* Do this before update_fs_msaa_flags() for primitive_id_index */
|
||||
if (gfx->dirty & ANV_CMD_DIRTY_ALL_SHADERS(device))
|
||||
update_sbe(hw_state, gfx, device);
|
||||
|
|
@ -2234,6 +2265,11 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS)
|
||||
update_urb_config(hw_state, gfx, device);
|
||||
|
||||
#if GFX_VERx10 == 90
|
||||
if (gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS)
|
||||
update_vs(hw_state, gfx, device);
|
||||
#endif
|
||||
|
||||
if ((gfx->dirty & ANV_CMD_DIRTY_PS) ||
|
||||
BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
|
||||
update_ps(hw_state, device, dyn, gfx);
|
||||
|
|
@ -2482,8 +2518,7 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
static void
|
||||
cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
||||
struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_cmd_graphics_state *gfx,
|
||||
const struct anv_graphics_pipeline *pipeline)
|
||||
const struct anv_cmd_graphics_state *gfx)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_instance *instance = device->physical->instance;
|
||||
|
|
@ -2502,73 +2537,107 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
} while (0)
|
||||
#define IS_DIRTY(name) BITSET_TEST(hw_state->pack_dirty, ANV_GFX_STATE_##name)
|
||||
|
||||
#define anv_gfx_copy(field, cmd, source) ({ \
|
||||
#define anv_gfx_copy(field, cmd, stage, source) ({ \
|
||||
if (gfx->shaders[stage] != NULL) { \
|
||||
assert(sizeof(hw_state->packed.field) >= \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
assert((source).len == __anv_cmd_length(cmd)); \
|
||||
assert((gfx->shaders[stage]->source).len == \
|
||||
__anv_cmd_length(cmd)); \
|
||||
memcpy(&hw_state->packed.field, \
|
||||
&pipeline->batch_data[(source).offset], \
|
||||
&gfx->shaders[stage]->cmd_data[ \
|
||||
(gfx->shaders[stage]->source).offset], \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
} else { \
|
||||
anv_gfx_pack(field, cmd, __unused_name); \
|
||||
} \
|
||||
})
|
||||
#define anv_gfx_copy_variable(field, source) ({ \
|
||||
#define anv_gfx_copy_variable(field, stage, source) ({ \
|
||||
if (gfx->shaders[stage] != NULL) { \
|
||||
assert(sizeof(hw_state->packed.field) >= \
|
||||
4 * (source).len); \
|
||||
4 * gfx->shaders[stage]->source.len); \
|
||||
memcpy(&hw_state->packed.field, \
|
||||
&pipeline->batch_data[(source).offset], \
|
||||
4 * (source).len); \
|
||||
hw_state->packed.field##_len = (source).len; \
|
||||
&gfx->shaders[stage]->cmd_data[ \
|
||||
(gfx->shaders[stage]->source).offset], \
|
||||
4 * gfx->shaders[stage]->source.len); \
|
||||
hw_state->packed.field##_len = \
|
||||
gfx->shaders[stage]->source.len; \
|
||||
} \
|
||||
})
|
||||
#define anv_gfx_copy_protected(field, cmd, source) ({ \
|
||||
#define anv_gfx_copy_protected(field, cmd, stage, source) ({ \
|
||||
const bool __protected = (cmd_buffer->vk.pool->flags & \
|
||||
VK_COMMAND_POOL_CREATE_PROTECTED_BIT); \
|
||||
assert(sizeof(hw_state->packed.field) >= \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
assert((source).len == __anv_cmd_length(cmd)); \
|
||||
if (gfx->shaders[stage] != NULL) { \
|
||||
assert((gfx->shaders[stage]->source).len == \
|
||||
__anv_cmd_length(cmd)); \
|
||||
memcpy(&hw_state->packed.field, \
|
||||
&pipeline->batch_data[ \
|
||||
&gfx->shaders[stage]->cmd_data[ \
|
||||
__protected ? \
|
||||
(source##_protected).offset : \
|
||||
(source).offset], \
|
||||
gfx->shaders[stage]->source##_protected.offset : \
|
||||
gfx->shaders[stage]->source.offset], \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
} else { \
|
||||
memcpy(&hw_state->packed.field, \
|
||||
device->physical->gfx_default.field, \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
} \
|
||||
})
|
||||
#define anv_gfx_pack_merge(field, cmd, prepacked, name) \
|
||||
for (struct cmd name = { 0 }, \
|
||||
#define anv_gfx_pack_merge(field, cmd, stage, source, name) \
|
||||
for (struct cmd name = (struct cmd) { 0 }, \
|
||||
*_dst = (struct cmd *)hw_state->packed.field; \
|
||||
__builtin_expect(_dst != NULL, 1); \
|
||||
({ const struct anv_gfx_state_ptr *_cmd_state = &prepacked; \
|
||||
({ \
|
||||
uint32_t _partial[__anv_cmd_length(cmd)]; \
|
||||
assert(_cmd_state->len == __anv_cmd_length(cmd)); \
|
||||
assert(sizeof(hw_state->packed.field) >= \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
__anv_cmd_pack(cmd)(NULL, _partial, &name); \
|
||||
if (gfx->shaders[stage] != NULL) { \
|
||||
const struct anv_gfx_state_ptr *_cmd_state = \
|
||||
&gfx->shaders[stage]->source; \
|
||||
assert(_cmd_state->len == __anv_cmd_length(cmd)); \
|
||||
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
|
||||
assert((_partial[i] & \
|
||||
(pipeline)->batch_data[ \
|
||||
(prepacked).offset + i]) == 0); \
|
||||
gfx->shaders[stage]->cmd_data[ \
|
||||
_cmd_state->offset + i]) == 0); \
|
||||
((uint32_t *)_dst)[i] = _partial[i] | \
|
||||
(pipeline)->batch_data[_cmd_state->offset + i]; \
|
||||
gfx->shaders[stage]->cmd_data[_cmd_state->offset + i]; \
|
||||
} \
|
||||
} else { \
|
||||
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
|
||||
assert((_partial[i] & \
|
||||
device->physical->gfx_default.field[i]) == 0); \
|
||||
((uint32_t *)_dst)[i] = _partial[i] | \
|
||||
device->physical->gfx_default.field[i]; \
|
||||
} \
|
||||
} \
|
||||
_dst = NULL; \
|
||||
}))
|
||||
#define anv_gfx_pack_merge_protected(field, cmd, prepacked, name) \
|
||||
for (struct cmd name = { 0 }, \
|
||||
#define anv_gfx_pack_merge_protected(field, cmd, stage, source, name) \
|
||||
for (struct cmd name = (struct cmd) { 0 }, \
|
||||
*_dst = (struct cmd *)hw_state->packed.field; \
|
||||
__builtin_expect(_dst != NULL, 1); \
|
||||
({ const struct anv_gfx_state_ptr *_cmd_state = \
|
||||
(cmd_buffer->vk.pool->flags & \
|
||||
VK_COMMAND_POOL_CREATE_PROTECTED_BIT) ? \
|
||||
&prepacked##_protected : &prepacked; \
|
||||
({ \
|
||||
uint32_t _partial[__anv_cmd_length(cmd)]; \
|
||||
assert(_cmd_state->len == __anv_cmd_length(cmd)); \
|
||||
assert(sizeof(hw_state->packed.field) >= \
|
||||
4 * __anv_cmd_length(cmd)); \
|
||||
__anv_cmd_pack(cmd)(NULL, _partial, &name); \
|
||||
const struct anv_gfx_state_ptr *_cmd_state = \
|
||||
gfx->shaders[stage] != NULL ? \
|
||||
((cmd_buffer->vk.pool->flags & \
|
||||
VK_COMMAND_POOL_CREATE_PROTECTED_BIT) ? \
|
||||
&gfx->shaders[stage]->source##_protected : \
|
||||
&gfx->shaders[stage]->source) : \
|
||||
NULL; \
|
||||
assert(_cmd_state == NULL || \
|
||||
_cmd_state->len == __anv_cmd_length(cmd)); \
|
||||
const uint32_t *_inst_data = \
|
||||
gfx->shaders[stage] != NULL ? \
|
||||
&gfx->shaders[stage]->cmd_data[_cmd_state->offset] : \
|
||||
device->physical->gfx_default.field; \
|
||||
for (uint32_t i = 0; i < __anv_cmd_length(cmd); i++) { \
|
||||
assert((_partial[i] & \
|
||||
(pipeline)->batch_data[ \
|
||||
(prepacked).offset + i]) == 0); \
|
||||
((uint32_t *)_dst)[i] = _partial[i] | \
|
||||
(pipeline)->batch_data[_cmd_state->offset + i]; \
|
||||
assert((_partial[i] & _inst_data[i]) == 0); \
|
||||
((uint32_t *)_dst)[i] = _partial[i] | _inst_data[i]; \
|
||||
} \
|
||||
_dst = NULL; \
|
||||
}))
|
||||
|
|
@ -2624,19 +2693,19 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
#endif
|
||||
|
||||
if (IS_DIRTY(VF_SGVS))
|
||||
anv_gfx_copy(vf_sgvs, GENX(3DSTATE_VF_SGVS), pipeline->final.vf_sgvs);
|
||||
anv_gfx_copy(vf_sgvs, GENX(3DSTATE_VF_SGVS), MESA_SHADER_VERTEX, vs.vf_sgvs);
|
||||
|
||||
#if GFX_VER >= 11
|
||||
if (IS_DIRTY(VF_SGVS_2))
|
||||
anv_gfx_copy(vf_sgvs_2, GENX(3DSTATE_VF_SGVS_2), pipeline->final.vf_sgvs_2);
|
||||
anv_gfx_copy(vf_sgvs_2, GENX(3DSTATE_VF_SGVS_2), MESA_SHADER_VERTEX, vs.vf_sgvs_2);
|
||||
#endif
|
||||
|
||||
if (IS_DIRTY(VF_SGVS_INSTANCING))
|
||||
anv_gfx_copy_variable(vf_sgvs_instancing, pipeline->final.vf_sgvs_instancing);
|
||||
anv_gfx_copy_variable(vf_sgvs_instancing, MESA_SHADER_VERTEX, vs.vf_sgvs_instancing);
|
||||
|
||||
if (instance->vf_component_packing && IS_DIRTY(VF_COMPONENT_PACKING)) {
|
||||
anv_gfx_copy(vf_component_packing, GENX(3DSTATE_VF_COMPONENT_PACKING),
|
||||
pipeline->final.vf_component_packing);
|
||||
MESA_SHADER_VERTEX, vs.vf_component_packing);
|
||||
}
|
||||
|
||||
if (IS_DIRTY(INDEX_BUFFER)) {
|
||||
|
|
@ -2655,7 +2724,7 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
|
||||
if (IS_DIRTY(STREAMOUT)) {
|
||||
anv_gfx_pack_merge(so, GENX(3DSTATE_STREAMOUT),
|
||||
pipeline->partial.so, so) {
|
||||
gfx->streamout_stage, so, so) {
|
||||
SET(so, so, RenderingDisable);
|
||||
SET(so, so, RenderStreamSelect);
|
||||
SET(so, so, ReorderMode);
|
||||
|
|
@ -2664,7 +2733,7 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
}
|
||||
|
||||
if (IS_DIRTY(SO_DECL_LIST))
|
||||
anv_gfx_copy_variable(so_decl_list, pipeline->final.so_decl_list);
|
||||
anv_gfx_copy_variable(so_decl_list, gfx->streamout_stage, so_decl_list);
|
||||
|
||||
if (IS_DIRTY(CLIP)) {
|
||||
anv_gfx_pack(clip, GENX(3DSTATE_CLIP), clip) {
|
||||
|
|
@ -2886,7 +2955,8 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
|
||||
if (IS_DIRTY(TE)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
|
||||
anv_gfx_pack_merge(te, GENX(3DSTATE_TE), pipeline->partial.te, te) {
|
||||
anv_gfx_pack_merge(te, GENX(3DSTATE_TE),
|
||||
MESA_SHADER_TESS_EVAL, ds.te, te) {
|
||||
SET(te, te, OutputTopology);
|
||||
#if GFX_VERx10 >= 125
|
||||
SET(te, te, TessellationDistributionMode);
|
||||
|
|
@ -2986,7 +3056,8 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
}
|
||||
|
||||
if (IS_DIRTY(WM)) {
|
||||
anv_gfx_pack_merge(wm, GENX(3DSTATE_WM), pipeline->partial.wm, wm) {
|
||||
anv_gfx_pack_merge(wm, GENX(3DSTATE_WM),
|
||||
MESA_SHADER_FRAGMENT, ps.wm, wm) {
|
||||
SET(wm, wm, LineStippleEnable);
|
||||
SET(wm, wm, BarycentricInterpolationMode);
|
||||
}
|
||||
|
|
@ -3079,12 +3150,12 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (device->vk.enabled_features.meshShader) {
|
||||
if (device->vk.enabled_extensions.EXT_mesh_shader) {
|
||||
if (IS_DIRTY(MESH_CONTROL)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_MESH)) {
|
||||
anv_gfx_copy_protected(mesh_control,
|
||||
GENX(3DSTATE_MESH_CONTROL),
|
||||
pipeline->final.mesh_control);
|
||||
MESA_SHADER_MESH, ms.control);
|
||||
} else {
|
||||
anv_gfx_pack(mesh_control, GENX(3DSTATE_MESH_CONTROL), mc);
|
||||
}
|
||||
|
|
@ -3092,8 +3163,9 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
|
||||
if (IS_DIRTY(TASK_CONTROL)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_TASK)) {
|
||||
anv_gfx_copy_protected(task_control, GENX(3DSTATE_TASK_CONTROL),
|
||||
pipeline->final.task_control);
|
||||
anv_gfx_copy_protected(task_control,
|
||||
GENX(3DSTATE_TASK_CONTROL),
|
||||
MESA_SHADER_TASK, ts.control);
|
||||
} else {
|
||||
anv_gfx_pack(task_control, GENX(3DSTATE_TASK_CONTROL), tc);
|
||||
}
|
||||
|
|
@ -3101,70 +3173,58 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
|
||||
if (IS_DIRTY(MESH_SHADER)) {
|
||||
anv_gfx_copy(mesh_shader, GENX(3DSTATE_MESH_SHADER),
|
||||
pipeline->final.mesh_shader);
|
||||
MESA_SHADER_MESH, ms.shader);
|
||||
}
|
||||
|
||||
if (IS_DIRTY(MESH_DISTRIB)) {
|
||||
anv_gfx_copy(mesh_distrib, GENX(3DSTATE_MESH_DISTRIB),
|
||||
pipeline->final.mesh_distrib);
|
||||
MESA_SHADER_MESH, ms.distrib);
|
||||
}
|
||||
|
||||
if (IS_DIRTY(CLIP_MESH)) {
|
||||
anv_gfx_copy(clip_mesh, GENX(3DSTATE_CLIP_MESH),
|
||||
pipeline->final.clip_mesh);
|
||||
MESA_SHADER_MESH, ms.clip);
|
||||
}
|
||||
|
||||
if (IS_DIRTY(TASK_SHADER)) {
|
||||
anv_gfx_copy(task_shader, GENX(3DSTATE_TASK_SHADER),
|
||||
pipeline->final.task_shader);
|
||||
MESA_SHADER_TASK, ts.shader);
|
||||
}
|
||||
|
||||
if (IS_DIRTY(TASK_REDISTRIB)) {
|
||||
anv_gfx_copy(task_redistrib, GENX(3DSTATE_TASK_REDISTRIB),
|
||||
pipeline->final.task_redistrib);
|
||||
MESA_SHADER_TASK, ts.redistrib);
|
||||
}
|
||||
}
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
||||
if (IS_DIRTY(VS)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_VERTEX)) {
|
||||
anv_gfx_copy_protected(vs, GENX(3DSTATE_VS), pipeline->final.vs);
|
||||
} else {
|
||||
anv_gfx_pack(vs, GENX(3DSTATE_VS), vs);
|
||||
#if GFX_VERx10 == 90
|
||||
anv_gfx_pack_merge_protected(vs, GENX(3DSTATE_VS),
|
||||
MESA_SHADER_VERTEX, vs.vs, vs) {
|
||||
SET(vs, vs, VertexCacheDisable);
|
||||
}
|
||||
#else
|
||||
anv_gfx_copy_protected(vs, GENX(3DSTATE_VS), MESA_SHADER_VERTEX, vs.vs);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_DIRTY(HS)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_CTRL)) {
|
||||
anv_gfx_copy_protected(hs, GENX(3DSTATE_HS), pipeline->final.hs);
|
||||
} else {
|
||||
anv_gfx_pack(hs, GENX(3DSTATE_HS), hs);
|
||||
}
|
||||
}
|
||||
if (IS_DIRTY(HS))
|
||||
anv_gfx_copy_protected(hs, GENX(3DSTATE_HS), MESA_SHADER_TESS_CTRL, hs.hs);
|
||||
|
||||
if (IS_DIRTY(DS)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
|
||||
anv_gfx_copy_protected(ds, GENX(3DSTATE_DS), pipeline->final.ds);
|
||||
} else {
|
||||
anv_gfx_pack(ds, GENX(3DSTATE_DS), ds);
|
||||
}
|
||||
}
|
||||
if (IS_DIRTY(DS))
|
||||
anv_gfx_copy_protected(ds, GENX(3DSTATE_DS), MESA_SHADER_TESS_EVAL, ds.ds);
|
||||
|
||||
if (IS_DIRTY(GS)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_GEOMETRY)) {
|
||||
anv_gfx_pack_merge_protected(gs, GENX(3DSTATE_GS),
|
||||
pipeline->partial.gs, gs) {
|
||||
MESA_SHADER_GEOMETRY, gs.gs, gs) {
|
||||
SET(gs, gs, ReorderMode);
|
||||
}
|
||||
} else {
|
||||
anv_gfx_pack(gs, GENX(3DSTATE_GS), gs);
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_DIRTY(PS)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT)) {
|
||||
anv_gfx_pack_merge_protected(ps, GENX(3DSTATE_PS),
|
||||
pipeline->partial.ps, ps) {
|
||||
MESA_SHADER_FRAGMENT, ps.ps, ps) {
|
||||
SET(ps, ps, KernelStartPointer0);
|
||||
SET(ps, ps, KernelStartPointer1);
|
||||
SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
|
||||
|
|
@ -3187,15 +3247,12 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
#endif
|
||||
SET(ps, ps, PositionXYOffsetSelect);
|
||||
}
|
||||
} else {
|
||||
anv_gfx_pack(ps, GENX(3DSTATE_PS), ps);
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_DIRTY(PS_EXTRA)) {
|
||||
if (anv_gfx_has_stage(gfx, MESA_SHADER_FRAGMENT)) {
|
||||
anv_gfx_pack_merge(ps_extra, GENX(3DSTATE_PS_EXTRA),
|
||||
pipeline->partial.ps_extra, pse) {
|
||||
MESA_SHADER_FRAGMENT, ps.ps_extra, pse) {
|
||||
SET(pse, ps_extra, PixelShaderHasUAV);
|
||||
SET(pse, ps_extra, PixelShaderIsPerSample);
|
||||
#if GFX_VER >= 11
|
||||
|
|
@ -3213,7 +3270,7 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
* change through pre-rasterization shader) or if we notice a change.
|
||||
*/
|
||||
anv_gfx_pack_merge(ps_extra_dep, GENX(3DSTATE_PS_EXTRA),
|
||||
pipeline->partial.ps_extra, pse) {
|
||||
MESA_SHADER_FRAGMENT, ps.ps_extra, pse) {
|
||||
SET(pse, ps_extra, PixelShaderHasUAV);
|
||||
SET(pse, ps_extra, PixelShaderIsPerSample);
|
||||
#if GFX_VER >= 11
|
||||
|
|
@ -3269,15 +3326,13 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
cmd_buffer->device,
|
||||
&cmd_buffer->vk.dynamic_graphics_state,
|
||||
&cmd_buffer->state.gfx,
|
||||
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline),
|
||||
cmd_buffer->vk.level);
|
||||
|
||||
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
|
||||
|
||||
cmd_buffer_repack_gfx_state(&cmd_buffer->state.gfx.dyn_state,
|
||||
cmd_buffer,
|
||||
&cmd_buffer->state.gfx,
|
||||
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline));
|
||||
&cmd_buffer->state.gfx);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -3431,8 +3486,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_instance *instance = device->physical->instance;
|
||||
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
struct anv_graphics_pipeline *pipeline =
|
||||
anv_pipeline_to_graphics(gfx->base.pipeline);
|
||||
const struct vk_dynamic_graphics_state *dyn =
|
||||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
struct anv_push_constants *push_consts =
|
||||
|
|
@ -3493,7 +3546,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
const struct brw_mesh_prog_data *mesh_prog_data = get_gfx_mesh_prog_data(gfx);
|
||||
if (mesh_prog_data) {
|
||||
push_consts->gfx.fs_per_prim_remap_offset =
|
||||
pipeline->base.shaders[MESA_SHADER_MESH]->kernel.offset +
|
||||
gfx->shaders[MESA_SHADER_MESH]->kernel.offset +
|
||||
mesh_prog_data->wa_18019110168_mapping_offset;
|
||||
}
|
||||
|
||||
|
|
@ -3576,7 +3629,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
* 3. Send 3D State SOL with SOL Enabled
|
||||
*/
|
||||
if (intel_needs_workaround(device->info, 16011773973) &&
|
||||
pipeline->uses_xfb)
|
||||
gfx->shaders[gfx->streamout_stage]->xfb_info != NULL)
|
||||
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so);
|
||||
|
||||
anv_batch_emit_gfx_variable(batch, so_decl_list);
|
||||
|
|
@ -3597,7 +3650,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (device->vk.enabled_features.meshShader) {
|
||||
if (device->vk.enabled_extensions.EXT_mesh_shader) {
|
||||
if (IS_DIRTY(MESH_CONTROL))
|
||||
anv_batch_emit_gfx(batch, GENX(3DSTATE_MESH_CONTROL), mesh_control);
|
||||
|
||||
|
|
@ -3670,8 +3723,8 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
|||
anv_batch_emit_gfx(batch, GENX(3DSTATE_VF_TOPOLOGY), vft);
|
||||
|
||||
if (IS_DIRTY(VERTEX_INPUT)) {
|
||||
genX(batch_emit_pipeline_vertex_input)(batch, device,
|
||||
pipeline, dyn->vi);
|
||||
genX(batch_emit_vertex_input)(batch, device,
|
||||
gfx->shaders[MESA_SHADER_VERTEX], dyn->vi);
|
||||
}
|
||||
|
||||
if (IS_DIRTY(TE))
|
||||
|
|
@ -3823,8 +3876,6 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
|
||||
struct anv_graphics_pipeline *pipeline =
|
||||
anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
|
||||
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_REEMIT)) {
|
||||
|
|
@ -3863,7 +3914,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
* it after.
|
||||
*/
|
||||
if (intel_needs_workaround(device->info, 16011773973) &&
|
||||
pipeline->uses_xfb &&
|
||||
gfx->shaders[gfx->streamout_stage]->xfb_info != NULL &&
|
||||
BITSET_TEST(hw_state->emit_dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
|
||||
BITSET_SET(hw_state->emit_dirty, ANV_GFX_STATE_STREAMOUT);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -569,31 +569,6 @@ emit_vs_shader(struct anv_batch *batch,
|
|||
vs.SoftwareExceptionEnable = false;
|
||||
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
|
||||
|
||||
#if 0
|
||||
/* TODO: move to shader binding */
|
||||
if (GFX_VER == 9 && devinfo->gt == 4 &&
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
|
||||
/* On Sky Lake GT4, we have experienced some hangs related to the VS
|
||||
* cache and tessellation. It is unknown exactly what is happening
|
||||
* but the Haswell docs for the "VS Reference Count Full Force Miss
|
||||
* Enable" field of the "Thread Mode" register refer to a HSW bug in
|
||||
* which the VUE handle reference count would overflow resulting in
|
||||
* internal reference counting bugs. My (Faith's) best guess is that
|
||||
* this bug cropped back up on SKL GT4 when we suddenly had more
|
||||
* threads in play than any previous gfx9 hardware.
|
||||
*
|
||||
* What we do know for sure is that setting this bit when
|
||||
* tessellation shaders are in use fixes a GPU hang in Batman: Arkham
|
||||
* City when playing with DXVK (https://bugs.freedesktop.org/107280).
|
||||
* Disabling the vertex cache with tessellation shaders should only
|
||||
* have a minor performance impact as the tessellation shaders are
|
||||
* likely generating and processing far more geometry than the vertex
|
||||
* stage.
|
||||
*/
|
||||
vs.VertexCacheDisable = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
|
||||
vs.VertexURBEntryReadOffset = 0;
|
||||
vs.DispatchGRFStartRegisterForURBData =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue