diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index bc33816156e..fc0e10c3a7a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -1600,9 +1600,16 @@ void anv_CmdSetColorWriteEnableEXT( assert(attachmentCount <= MAX_RTS); - uint8_t color_writes = 0; - for (uint32_t i = 0; i < attachmentCount; i++) - color_writes |= pColorWriteEnables[i] ? (1 << i) : 0; + /* Keep the existing values outside the color attachments count of the + * current pipeline. + */ + uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; + for (uint32_t i = 0; i < attachmentCount; i++) { + if (pColorWriteEnables[i]) + color_writes |= BITFIELD_BIT(i); + else + color_writes &= ~BITFIELD_BIT(i); + } if (cmd_buffer->state.gfx.dynamic.color_writes != color_writes) { cmd_buffer->state.gfx.dynamic.color_writes = color_writes; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 52b6430bb32..e7db9215d11 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -487,12 +487,9 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, key->ignore_sample_mask_out = false; assert(rendering_info->colorAttachmentCount <= MAX_RTS); - for (uint32_t i = 0; i < rendering_info->colorAttachmentCount; i++) { - if (rendering_info->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED) - key->color_outputs_valid |= (1 << i); - } - - key->nr_color_regions = rendering_info->colorAttachmentCount; + /* Consider all inputs as valid until look at the NIR variables. */ + key->color_outputs_valid = (1u << MAX_RTS) - 1; + key->nr_color_regions = MAX_RTS; /* To reduce possible shader recompilations we would need to know if * there is a SampleMask output variable to compute if we should emit @@ -1119,6 +1116,26 @@ static void anv_pipeline_link_fs(const struct brw_compiler *compiler, struct anv_pipeline_stage *stage) { + /* Initially the valid outputs value is set to all possible render targets + * valid (see populate_wm_prog_key()), before we look at the shader + * variables. Here we look at the output variables of the shader an compute + * a correct number of render target outputs. + */ + stage->key.wm.color_outputs_valid = 0; + nir_foreach_shader_out_variable_safe(var, stage->nir) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + const unsigned rt = var->data.location - FRAG_RESULT_DATA0; + const unsigned array_len = + glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; + assert(rt + array_len <= MAX_RTS); + + stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len); + } + stage->key.wm.nr_color_regions = + util_last_bit(stage->key.wm.color_outputs_valid); + unsigned num_rt_bindings; struct anv_pipeline_binding rt_bindings[MAX_RTS]; if (stage->key.wm.nr_color_regions > 0) { @@ -1152,71 +1169,6 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler, typed_memcpy(stage->bind_map.surface_to_descriptor, rt_bindings, num_rt_bindings); stage->bind_map.surface_count += num_rt_bindings; - - /* Now that we've set up the color attachments, we can go through and - * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the - * hopes that dead code can clean them up in this and any earlier shader - * stages. - */ - nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir); - bool deleted_output = false; - nir_foreach_shader_out_variable_safe(var, stage->nir) { - /* TODO: We don't delete depth/stencil writes. We probably could if the - * subpass doesn't have a depth/stencil attachment. - */ - if (var->data.location < FRAG_RESULT_DATA0) - continue; - - const unsigned rt = var->data.location - FRAG_RESULT_DATA0; - - /* If this is the RT at location 0 and we have alpha to coverage - * enabled we still need that write because it will affect the coverage - * mask even if it's never written to a color target. - */ - if (rt == 0 && stage->key.wm.alpha_to_coverage) - continue; - - const unsigned array_len = - glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; - assert(rt + array_len <= MAX_RTS); - - if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & - BITFIELD_RANGE(rt, array_len))) { - deleted_output = true; - var->data.mode = nir_var_function_temp; - exec_node_remove(&var->node); - exec_list_push_tail(&impl->locals, &var->node); - } - } - - if (deleted_output) - nir_fixup_deref_modes(stage->nir); - - /* Initially the valid outputs value is based off the renderpass color - * attachments (see populate_wm_prog_key()), now that we've potentially - * deleted variables that map to unused attachments, we need to update the - * valid outputs for the backend compiler based on what output variables - * are actually used. */ - stage->key.wm.color_outputs_valid = 0; - nir_foreach_shader_out_variable_safe(var, stage->nir) { - if (var->data.location < FRAG_RESULT_DATA0) - continue; - - const unsigned rt = var->data.location - FRAG_RESULT_DATA0; - const unsigned array_len = - glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; - assert(rt + array_len <= MAX_RTS); - - stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len); - } - - /* We stored the number of subpass color attachments in nr_color_regions - * when calculating the key for caching. Now that we've computed the bind - * map, we can reduce this to the actual max before we go into the back-end - * compiler. - */ - stage->key.wm.nr_color_regions = - util_last_bit(stage->key.wm.color_outputs_valid); } static void @@ -2368,10 +2320,12 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); if (color_write_info) { - dynamic->color_writes = 0; + dynamic->color_writes = (1u << MAX_RTS) - 1; for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) { - dynamic->color_writes |= - color_write_info->pColorWriteEnables[i] ? (1u << i) : 0; + if (color_write_info->pColorWriteEnables[i]) + dynamic->color_writes |= BITFIELD_BIT(i); + else + dynamic->color_writes &= ~BITFIELD_BIT(i); } } } diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index a78bc3007c9..a1bccf9dc00 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1434,10 +1434,18 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline, .SourceAlphaBlendFactor = vk_to_intel_blend[a->srcAlphaBlendFactor], .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dstAlphaBlendFactor], .AlphaBlendFunction = vk_to_intel_blend_op[a->alphaBlendOp], - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + .WriteDisableAlpha = + (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 && + !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = + (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 && + !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = + (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 && + !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = + (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 && + !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), }; if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || @@ -2253,7 +2261,8 @@ has_color_buffer_write_enabled(const struct anv_graphics_pipeline *pipeline, if (binding->index == UINT32_MAX) continue; - if (blend && blend->pAttachments[binding->index].colorWriteMask != 0) + if (blend && binding->index < blend->attachmentCount && + blend->pAttachments[binding->index].colorWriteMask != 0) return true; } diff --git a/src/intel/vulkan/gfx7_cmd_buffer.c b/src/intel/vulkan/gfx7_cmd_buffer.c index 5060362878e..285d14066c4 100644 --- a/src/intel/vulkan/gfx7_cmd_buffer.c +++ b/src/intel/vulkan/gfx7_cmd_buffer.c @@ -283,14 +283,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) if (anv_cmd_buffer_needs_dynamic_state(cmd_buffer, ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) { - const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; - - bool dirty_color_blend = - cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; - - bool dirty_primitive_topology = - cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; - VkPolygonMode dynamic_raster_mode; VkPrimitiveTopology primitive_topology = cmd_buffer->state.gfx.dynamic.primitive_topology; @@ -298,21 +290,20 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline, primitive_topology); - if (dirty_color_blend || dirty_primitive_topology) { - uint32_t dwords[GENX(3DSTATE_WM_length)]; - struct GENX(3DSTATE_WM) wm = { - GENX(3DSTATE_WM_header), + const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; + uint32_t dwords[GENX(3DSTATE_WM_length)]; + struct GENX(3DSTATE_WM) wm = { + GENX(3DSTATE_WM_header), - .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch || - color_writes, - .MultisampleRasterizationMode = - genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode), - }; - GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); - - anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm); - } + .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch || + color_writes, + .MultisampleRasterizationMode = + genX(ms_rasterization_mode)(pipeline, + dynamic_raster_mode), + }; + GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); + anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm); } if (anv_cmd_buffer_needs_dynamic_state(cmd_buffer, @@ -329,14 +320,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) bool dirty_color_blend = cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; - /* Blend states of each RT */ - uint32_t surface_count = 0; - struct anv_pipeline_bind_map *map; - if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; - surface_count = map->surface_count; - } - uint32_t blend_dws[GENX(BLEND_STATE_length) + MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; uint32_t *dws = blend_dws; @@ -348,10 +331,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) bool dirty_logic_op = cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP; - for (uint32_t i = 0; i < surface_count; i++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; - bool write_disabled = - dirty_color_blend && (color_writes & (1u << binding->index)) == 0; + for (uint32_t i = 0; i < MAX_RTS; i++) { + bool write_disabled = dirty_color_blend && + (color_writes & BITFIELD_BIT(i)) == 0; struct GENX(BLEND_STATE_ENTRY) entry = { .WriteDisableAlpha = write_disabled, .WriteDisableRed = write_disabled, @@ -365,7 +347,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) } uint32_t num_dwords = GENX(BLEND_STATE_length) + - GENX(BLEND_STATE_ENTRY_length) * surface_count; + GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; struct anv_state blend_states = anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index 154e192ef47..5e165102011 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -642,43 +642,32 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP)) { const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes; + /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders * threads. */ - bool dirty_color_blend = - cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE; + uint32_t wm_dwords[GENX(3DSTATE_WM_length)]; + struct GENX(3DSTATE_WM) wm = { + GENX(3DSTATE_WM_header), - if (dirty_color_blend) { - uint32_t dwords[MAX2(GENX(3DSTATE_WM_length), - GENX(3DSTATE_PS_BLEND_length))]; - struct GENX(3DSTATE_WM) wm = { - GENX(3DSTATE_WM_header), + .ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch || + !color_writes) ? ForceON : 0, + }; + GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm); - .ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch || - !color_writes) ? ForceON : 0, - }; - GENX(3DSTATE_WM_pack)(NULL, dwords, &wm); + anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm); - anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.wm); - - /* 3DSTATE_PS_BLEND to be consistent with the rest of the - * BLEND_STATE_ENTRY. - */ - struct GENX(3DSTATE_PS_BLEND) ps_blend = { - GENX(3DSTATE_PS_BLEND_header), - .HasWriteableRT = color_writes != 0, - }; - GENX(3DSTATE_PS_BLEND_pack)(NULL, dwords, &ps_blend); - anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.ps_blend); - } - - /* Blend states of each RT */ - uint32_t surface_count = 0; - struct anv_pipeline_bind_map *map; - if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map; - surface_count = map->surface_count; - } + /* 3DSTATE_PS_BLEND to be consistent with the rest of the + * BLEND_STATE_ENTRY. + */ + uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)]; + struct GENX(3DSTATE_PS_BLEND) ps_blend = { + GENX(3DSTATE_PS_BLEND_header), + .HasWriteableRT = color_writes != 0, + }; + GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend); + anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords, + pipeline->gfx8.ps_blend); uint32_t blend_dws[GENX(BLEND_STATE_length) + MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; @@ -691,10 +680,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) bool dirty_logic_op = cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP; - for (uint32_t i = 0; i < surface_count; i++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i]; - bool write_disabled = - dirty_color_blend && (color_writes & (1u << binding->index)) == 0; + for (uint32_t i = 0; i < MAX_RTS; i++) { + bool write_disabled = (color_writes & BITFIELD_BIT(i)) == 0; struct GENX(BLEND_STATE_ENTRY) entry = { .WriteDisableAlpha = write_disabled, .WriteDisableRed = write_disabled, @@ -708,7 +695,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) } uint32_t num_dwords = GENX(BLEND_STATE_length) + - GENX(BLEND_STATE_ENTRY_length) * surface_count; + GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; struct anv_state blend_states = anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,