mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
anv: remove unused gfx7 code
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Jason Ekstrand <jason.ekstrand@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
parent
1a77f83c2b
commit
a659819f79
15 changed files with 41 additions and 1325 deletions
|
|
@ -109,19 +109,6 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
|||
type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC))
|
||||
data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
|
||||
|
||||
/* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader
|
||||
* Do not handle VK_DESCRIPTOR_TYPE_STORAGE_IMAGE and
|
||||
* VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT because they already must
|
||||
* have identity swizzle.
|
||||
*
|
||||
* TODO: We need to handle swizzle on buffer views too for those same
|
||||
* platforms.
|
||||
*/
|
||||
if (device->info.verx10 == 70 &&
|
||||
(type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
|
||||
type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
|
||||
data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
|
@ -175,9 +162,6 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
|
|||
if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
|
||||
size += sizeof(struct anv_address_range_descriptor);
|
||||
|
||||
if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)
|
||||
size += sizeof(struct anv_texture_swizzle_descriptor);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
|
|
@ -1478,26 +1462,6 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
|
|||
|
||||
anv_descriptor_set_write_image_param(desc_map, image_param);
|
||||
}
|
||||
|
||||
if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) {
|
||||
assert(!(data & ANV_DESCRIPTOR_SAMPLED_IMAGE));
|
||||
assert(image_view);
|
||||
struct anv_texture_swizzle_descriptor desc_data[3];
|
||||
memset(desc_data, 0, sizeof(desc_data));
|
||||
|
||||
for (unsigned p = 0; p < image_view->n_planes; p++) {
|
||||
desc_data[p] = (struct anv_texture_swizzle_descriptor) {
|
||||
.swizzle = {
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.r,
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.g,
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.b,
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.a,
|
||||
},
|
||||
};
|
||||
}
|
||||
memcpy(desc_map, desc_data,
|
||||
MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -258,7 +258,7 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
.EXT_buffer_device_address = device->has_a64_buffer_access,
|
||||
.EXT_calibrated_timestamps = device->has_reg_timestamp,
|
||||
.EXT_color_write_enable = true,
|
||||
.EXT_conditional_rendering = device->info.verx10 >= 75,
|
||||
.EXT_conditional_rendering = true,
|
||||
.EXT_conservative_rasterization = device->info.ver >= 9,
|
||||
.EXT_custom_border_color = device->info.ver >= 8,
|
||||
.EXT_depth_clip_control = true,
|
||||
|
|
@ -805,15 +805,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
|||
device->info = devinfo;
|
||||
|
||||
device->cmd_parser_version = -1;
|
||||
if (device->info.ver == 7) {
|
||||
device->cmd_parser_version =
|
||||
anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
|
||||
if (device->cmd_parser_version == -1) {
|
||||
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
|
||||
"failed to get command parser version");
|
||||
goto fail_base;
|
||||
}
|
||||
}
|
||||
|
||||
if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
|
||||
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
|
||||
|
|
@ -1428,8 +1419,8 @@ void anv_GetPhysicalDeviceFeatures2(
|
|||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
|
||||
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
|
||||
(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
|
||||
features->conditionalRendering = pdevice->info.verx10 >= 75;
|
||||
features->inheritedConditionalRendering = pdevice->info.verx10 >= 75;
|
||||
features->conditionalRendering = true;
|
||||
features->inheritedConditionalRendering = true;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -1751,8 +1742,7 @@ void anv_GetPhysicalDeviceProperties(
|
|||
const uint32_t max_textures =
|
||||
pdevice->has_bindless_images ? UINT16_MAX : 128;
|
||||
const uint32_t max_samplers =
|
||||
pdevice->has_bindless_samplers ? UINT16_MAX :
|
||||
(devinfo->verx10 >= 75) ? 128 : 16;
|
||||
pdevice->has_bindless_samplers ? UINT16_MAX : 128;
|
||||
const uint32_t max_images =
|
||||
pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
|
||||
|
||||
|
|
@ -2512,8 +2502,7 @@ void anv_GetPhysicalDeviceProperties2(
|
|||
props->transformFeedbackQueries = true;
|
||||
props->transformFeedbackStreamsLinesTriangles = false;
|
||||
props->transformFeedbackRasterizationStreamSelect = false;
|
||||
/* This requires MI_MATH */
|
||||
props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
|
||||
props->transformFeedbackDraw = true;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -495,14 +495,6 @@ anv_get_format_plane(const struct intel_device_info *devinfo,
|
|||
const struct isl_format_layout *isl_layout =
|
||||
isl_format_get_layout(plane_format.isl_format);
|
||||
|
||||
/* On Ivy Bridge we don't even have enough 24 and 48-bit formats that we
|
||||
* can reliably do texture upload with BLORP so just don't claim support
|
||||
* for any of them.
|
||||
*/
|
||||
if (devinfo->verx10 == 70 &&
|
||||
(isl_layout->bpb == 24 || isl_layout->bpb == 48))
|
||||
return unsupported;
|
||||
|
||||
if (tiling == VK_IMAGE_TILING_OPTIMAL &&
|
||||
!util_is_power_of_two_or_zero(isl_layout->bpb)) {
|
||||
/* Tiled formats *must* be power-of-two because we need up upload
|
||||
|
|
|
|||
|
|
@ -161,10 +161,6 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
|
|||
uint32_t *api_mode,
|
||||
bool *msaa_rasterization_enable);
|
||||
|
||||
uint32_t
|
||||
genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPolygonMode raster_mode);
|
||||
|
||||
VkPolygonMode
|
||||
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPrimitiveTopology primitive_topology);
|
||||
|
|
|
|||
|
|
@ -358,14 +358,6 @@ anv_image_plane_needs_shadow_surface(const struct intel_device_info *devinfo,
|
|||
return true;
|
||||
}
|
||||
|
||||
if (devinfo->ver <= 7 &&
|
||||
plane_format.aspect == VK_IMAGE_ASPECT_STENCIL_BIT &&
|
||||
(vk_plane_usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) {
|
||||
/* gfx7 can't sample from W-tiled surfaces. */
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -731,11 +723,6 @@ add_aux_surface_if_supported(struct anv_device *device,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (device->info->ver == 7) {
|
||||
anv_perf_warn(VK_LOG_OBJS(&image->vk.base), "Implement gfx7 HiZ");
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (image->vk.mip_levels > 1) {
|
||||
anv_perf_warn(VK_LOG_OBJS(&image->vk.base), "Enable multi-LOD HiZ");
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -2377,12 +2364,6 @@ anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
|
|||
if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
|
||||
return ANV_FAST_CLEAR_NONE;
|
||||
|
||||
/* We don't support MSAA fast-clears on Ivybridge or Bay Trail because they
|
||||
* lack the MI ALU which we need to determine the predicates.
|
||||
*/
|
||||
if (devinfo->verx10 == 70 && image->vk.samples > 1)
|
||||
return ANV_FAST_CLEAR_NONE;
|
||||
|
||||
enum isl_aux_state aux_state =
|
||||
anv_layout_to_aux_state(devinfo, image, aspect, layout);
|
||||
|
||||
|
|
@ -2495,23 +2476,9 @@ anv_image_fill_surface_state(struct anv_device *device,
|
|||
surface = &image->planes[plane].shadow_surface;
|
||||
}
|
||||
|
||||
/* For texturing from stencil on gfx7, we have to sample from a shadow
|
||||
* surface because we don't support W-tiling in the sampler.
|
||||
*/
|
||||
if (anv_surface_is_valid(&image->planes[plane].shadow_surface) &&
|
||||
aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
assert(device->info->ver == 7);
|
||||
assert(view_usage & ISL_SURF_USAGE_TEXTURE_BIT);
|
||||
surface = &image->planes[plane].shadow_surface;
|
||||
}
|
||||
|
||||
if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
|
||||
view.swizzle = anv_swizzle_for_render(view.swizzle);
|
||||
|
||||
/* On Ivy Bridge and Bay Trail we do the swizzle in the shader */
|
||||
if (device->info->verx10 == 70)
|
||||
view.swizzle = ISL_SWIZZLE_IDENTITY;
|
||||
|
||||
/* If this is a HiZ buffer we can sample from with a programmable clear
|
||||
* value (SKL+), define the clear value to the optimal constant.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1234,85 +1234,12 @@ build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
assert(state->pdevice->info.verx10 == 70);
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
|
||||
nir_tex_instr_is_query(tex) ||
|
||||
tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
|
||||
(tex->is_shadow && tex->is_new_style_shadow))
|
||||
return;
|
||||
|
||||
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
||||
assert(deref_src_idx >= 0);
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
unsigned set = var->data.descriptor_set;
|
||||
unsigned binding = var->data.binding;
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->layout->set[set].layout->binding[binding];
|
||||
|
||||
if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
|
||||
return;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
const unsigned plane_offset =
|
||||
plane * sizeof(struct anv_texture_swizzle_descriptor);
|
||||
nir_ssa_def *swiz =
|
||||
build_load_var_deref_descriptor_mem(b, deref, plane_offset,
|
||||
1, 32, state);
|
||||
|
||||
b->cursor = nir_after_instr(&tex->instr);
|
||||
|
||||
assert(tex->dest.ssa.bit_size == 32);
|
||||
assert(tex->dest.ssa.num_components == 4);
|
||||
|
||||
/* Initializing to undef is ok; nir_opt_undef will clean it up. */
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *comps[8];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
|
||||
comps[i] = undef;
|
||||
|
||||
comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
|
||||
if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
|
||||
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
|
||||
else
|
||||
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
|
||||
comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
|
||||
comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
|
||||
comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
|
||||
comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
|
||||
|
||||
nir_ssa_def *swiz_comps[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
|
||||
swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
|
||||
}
|
||||
nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
|
||||
|
||||
/* Rewrite uses before we insert so we don't rewrite this use */
|
||||
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
|
||||
swiz_tex_res,
|
||||
swiz_tex_res->parent_instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_tex(nir_builder *b, nir_tex_instr *tex,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
unsigned plane = tex_instr_get_and_remove_plane_src(tex);
|
||||
|
||||
/* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
|
||||
* before we lower the derefs away so we can still find the descriptor.
|
||||
*/
|
||||
if (state->pdevice->info.verx10 == 70)
|
||||
lower_gfx7_tex_swizzle(b, tex, plane, state);
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
lower_tex_deref(b, tex, nir_tex_src_texture_deref,
|
||||
|
|
|
|||
|
|
@ -84,7 +84,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
|||
const bool has_push_intrinsic = push_start <= push_end;
|
||||
|
||||
const bool push_ubo_ranges =
|
||||
pdevice->info.verx10 >= 75 &&
|
||||
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
|
||||
!brw_shader_stage_requires_bindless_resources(nir->info.stage);
|
||||
|
||||
|
|
|
|||
|
|
@ -1751,17 +1751,6 @@ struct anv_sampled_image_descriptor {
|
|||
uint32_t sampler;
|
||||
};
|
||||
|
||||
struct anv_texture_swizzle_descriptor {
|
||||
/** Texture swizzle
|
||||
*
|
||||
* See also nir_intrinsic_channel_select_intel
|
||||
*/
|
||||
uint8_t swizzle[4];
|
||||
|
||||
/** Unused padding to ensure the struct is a multiple of 64 bits */
|
||||
uint32_t _pad;
|
||||
};
|
||||
|
||||
/** Struct representing a storage image descriptor */
|
||||
struct anv_storage_image_descriptor {
|
||||
/** Bindless image handles
|
||||
|
|
@ -1803,8 +1792,6 @@ enum anv_descriptor_data {
|
|||
ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
|
||||
/** Storage image handles */
|
||||
ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
|
||||
/** Storage image handles */
|
||||
ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
|
||||
};
|
||||
|
||||
struct anv_descriptor_set_binding_layout {
|
||||
|
|
@ -3137,15 +3124,7 @@ struct anv_graphics_pipeline {
|
|||
* with dynamic state.
|
||||
*/
|
||||
struct {
|
||||
uint32_t sf[7];
|
||||
uint32_t clip[4];
|
||||
uint32_t xfb_bo_pitch[4];
|
||||
uint32_t wm[3];
|
||||
uint32_t blend_state[MAX_RTS * 2];
|
||||
uint32_t streamout_state[3];
|
||||
} gfx7;
|
||||
|
||||
struct {
|
||||
uint32_t sf[4];
|
||||
uint32_t raster[5];
|
||||
uint32_t wm[2];
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ blorp_get_surface_address(struct blorp_batch *blorp_batch,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 7 && GFX_VER < 10
|
||||
#if GFX_VER >= 8 && GFX_VER < 10
|
||||
static struct blorp_address
|
||||
blorp_get_surface_base_address(struct blorp_batch *batch)
|
||||
{
|
||||
|
|
@ -390,19 +390,6 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
|||
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
|
||||
* indirect fast-clear colors can cause GPU hangs if we don't stall first.
|
||||
* See genX(cmd_buffer_mi_memcpy) for more details.
|
||||
*/
|
||||
if (params->src.clear_color_addr.buffer ||
|
||||
params->dst.clear_color_addr.buffer) {
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_CS_STALL_BIT,
|
||||
"before blorp prep fast clear");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (batch->flags & BLORP_BATCH_USE_COMPUTE)
|
||||
blorp_exec_on_compute(batch, params);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -467,10 +467,6 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
|
|||
float depth_clear_value,
|
||||
VkRect2D render_area)
|
||||
{
|
||||
/* We don't do any HiZ or depth fast-clears on gfx7 yet */
|
||||
if (GFX_VER == 7)
|
||||
return false;
|
||||
|
||||
/* If we're just clearing stencil, we can always HiZ clear */
|
||||
if (!(clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
|
||||
return true;
|
||||
|
|
@ -500,13 +496,6 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
|
|||
if (depth_clear_value != ANV_HZ_FC_VAL)
|
||||
return false;
|
||||
|
||||
/* Only gfx9+ supports returning ANV_HZ_FC_VAL when sampling a fast-cleared
|
||||
* portion of a HiZ buffer. Testing has revealed that Gfx8 only supports
|
||||
* returning 0.0f. Gens prior to gfx8 do not support this feature at all.
|
||||
*/
|
||||
if (GFX_VER == 8 && anv_can_sample_with_hiz(device->info, iview->image))
|
||||
return false;
|
||||
|
||||
/* If we got here, then we can fast clear */
|
||||
return true;
|
||||
}
|
||||
|
|
@ -684,16 +673,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
static inline bool
|
||||
vk_image_layout_stencil_write_optimal(VkImageLayout layout)
|
||||
{
|
||||
return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
|
||||
layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
|
||||
layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
|
||||
* the initial layout is undefined, the HiZ buffer and depth buffer will
|
||||
* represent the same data at the end of this operation.
|
||||
|
|
@ -707,35 +686,7 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||
VkImageLayout final_layout,
|
||||
bool will_full_fast_clear)
|
||||
{
|
||||
#if GFX_VER == 7
|
||||
const uint32_t plane =
|
||||
anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
|
||||
/* On gfx7, we have to store a texturable version of the stencil buffer in
|
||||
* a shadow whenever VK_IMAGE_USAGE_SAMPLED_BIT is set and copy back and
|
||||
* forth at strategic points. Stencil writes are only allowed in following
|
||||
* layouts:
|
||||
*
|
||||
* - VK_IMAGE_LAYOUT_GENERAL
|
||||
* - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL
|
||||
*
|
||||
* For general, we have no nice opportunity to transition so we do the copy
|
||||
* to the shadow unconditionally at the end of the subpass. For transfer
|
||||
* destinations, we can update it as part of the transfer op. For the other
|
||||
* layouts, we delay the copy until a transition into some other layout.
|
||||
*/
|
||||
if (anv_surface_is_valid(&image->planes[plane].shadow_surface) &&
|
||||
vk_image_layout_stencil_write_optimal(initial_layout) &&
|
||||
!vk_image_layout_stencil_write_optimal(final_layout)) {
|
||||
anv_image_copy_to_shadow(cmd_buffer, image,
|
||||
VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
base_level, level_count,
|
||||
base_layer, layer_count);
|
||||
}
|
||||
#elif GFX_VER == 12
|
||||
#if GFX_VER == 12
|
||||
const uint32_t plane =
|
||||
anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
|
||||
|
|
@ -833,7 +784,6 @@ set_image_fast_clear_state(struct anv_cmd_buffer *cmd_buffer,
|
|||
/* This is only really practical on haswell and above because it requires
|
||||
* MI math in order to get it correct.
|
||||
*/
|
||||
#if GFX_VERx10 >= 75
|
||||
static void
|
||||
anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_image *image,
|
||||
|
|
@ -914,50 +864,6 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
|
|||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
}
|
||||
#endif /* GFX_VERx10 >= 75 */
|
||||
|
||||
#if GFX_VER <= 8
|
||||
static void
|
||||
anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_image *image,
|
||||
VkImageAspectFlagBits aspect,
|
||||
uint32_t level, uint32_t array_layer,
|
||||
enum isl_aux_op resolve_op,
|
||||
enum anv_fast_clear_type fast_clear_supported)
|
||||
{
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
struct mi_value fast_clear_type_mem =
|
||||
mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device,
|
||||
image, aspect));
|
||||
|
||||
/* This only works for partial resolves and only when the clear color is
|
||||
* all or nothing. On the upside, this emits less command streamer code
|
||||
* and works on Ivybridge and Bay Trail.
|
||||
*/
|
||||
assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
|
||||
assert(fast_clear_supported != ANV_FAST_CLEAR_ANY);
|
||||
|
||||
/* We don't support fast clears on anything other than the first slice. */
|
||||
if (level > 0 || array_layer > 0)
|
||||
return;
|
||||
|
||||
/* On gfx8, we don't have a concept of default clear colors because we
|
||||
* can't sample from CCS surfaces. It's enough to just load the fast clear
|
||||
* state into the predicate register.
|
||||
*/
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), fast_clear_type_mem);
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
|
||||
mi_store(&b, fast_clear_type_mem, mi_imm(0));
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOADINV;
|
||||
mip.CombineOperation = COMBINE_SET;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
}
|
||||
#endif /* GFX_VER <= 8 */
|
||||
|
||||
static void
|
||||
anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
|
@ -971,15 +877,9 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
|
|||
{
|
||||
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
|
||||
|
||||
#if GFX_VER >= 9
|
||||
anv_cmd_compute_resolve_predicate(cmd_buffer, image,
|
||||
aspect, level, array_layer,
|
||||
resolve_op, fast_clear_supported);
|
||||
#else /* GFX_VER <= 8 */
|
||||
anv_cmd_simple_resolve_predicate(cmd_buffer, image,
|
||||
aspect, level, array_layer,
|
||||
resolve_op, fast_clear_supported);
|
||||
#endif
|
||||
|
||||
/* CCS_D only supports full resolves and BLORP will assert on us if we try
|
||||
* to do a partial resolve on a CCS_D surface.
|
||||
|
|
@ -1005,16 +905,12 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
|
|||
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
anv_cmd_compute_resolve_predicate(cmd_buffer, image,
|
||||
aspect, 0, array_layer,
|
||||
resolve_op, fast_clear_supported);
|
||||
|
||||
anv_image_mcs_op(cmd_buffer, image, format, swizzle, aspect,
|
||||
array_layer, 1, resolve_op, NULL, true);
|
||||
#else
|
||||
unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail");
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1074,21 +970,14 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
|
|||
} else {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||
sdi.Address = addr;
|
||||
if (GFX_VERx10 >= 75) {
|
||||
/* Pre-SKL, the dword containing the clear values also contains
|
||||
* other fields, so we need to initialize those fields to match the
|
||||
* values that would be in a color attachment.
|
||||
*/
|
||||
sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
|
||||
ISL_CHANNEL_SELECT_GREEN << 22 |
|
||||
ISL_CHANNEL_SELECT_BLUE << 19 |
|
||||
ISL_CHANNEL_SELECT_ALPHA << 16;
|
||||
} else if (GFX_VER == 7) {
|
||||
/* On IVB, the dword containing the clear values also contains
|
||||
* other fields that must be zero or can be zero.
|
||||
*/
|
||||
sdi.ImmediateData = 0;
|
||||
}
|
||||
/* Pre-SKL, the dword containing the clear values also contains
|
||||
* other fields, so we need to initialize those fields to match the
|
||||
* values that would be in a color attachment.
|
||||
*/
|
||||
sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
|
||||
ISL_CHANNEL_SELECT_GREEN << 22 |
|
||||
ISL_CHANNEL_SELECT_BLUE << 19 |
|
||||
ISL_CHANNEL_SELECT_ALPHA << 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1115,30 +1004,6 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
|
|||
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
|
||||
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* On gfx7, the combination of commands used here(MI_LOAD_REGISTER_MEM
|
||||
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
|
||||
* in-flight when they are issued even if the memory touched is not
|
||||
* currently active for rendering. The weird bit is that it is not the
|
||||
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
|
||||
* rendering hangs such that the next stalling command after the
|
||||
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
|
||||
*
|
||||
* It is unclear exactly why this hang occurs. Both MI commands come with
|
||||
* warnings about the 3D pipeline but that doesn't seem to fully explain
|
||||
* it. My (Jason's) best theory is that it has something to do with the
|
||||
* fact that we're using a GPU state register as our temporary and that
|
||||
* something with reading/writing it is causing problems.
|
||||
*
|
||||
* In order to work around this issue, we emit a PIPE_CONTROL with the
|
||||
* command streamer stall bit set.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_CS_STALL_BIT,
|
||||
"after copy_fast_clear_dwords. Avoid potential hang");
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
|
|
@ -1736,9 +1601,7 @@ genX(BeginCommandBuffer)(
|
|||
if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
|
||||
!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT))
|
||||
genX(emit_sample_pattern)(&cmd_buffer->batch, NULL);
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
|
||||
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =
|
||||
vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT);
|
||||
|
|
@ -1861,7 +1724,6 @@ genX(CmdExecuteCommands)(
|
|||
assert(secondary->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
|
||||
assert(!anv_batch_has_error(&secondary->batch));
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
if (secondary->state.conditional_render_enabled) {
|
||||
if (!primary->state.conditional_render_enabled) {
|
||||
/* Secondary buffer is constructed as if it will be executed
|
||||
|
|
@ -1874,7 +1736,6 @@ genX(CmdExecuteCommands)(
|
|||
mi_imm(UINT64_MAX));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (secondary->usage_flags &
|
||||
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
|
||||
|
|
@ -2143,23 +2004,6 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
|
|||
#endif
|
||||
|
||||
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
|
||||
#if GFX_VER == 8
|
||||
/* From Broadwell PRM, volume 2a:
|
||||
* PIPE_CONTROL: Command Streamer Stall Enable:
|
||||
*
|
||||
* "This bit must be always set when PIPE_CONTROL command is
|
||||
* programmed by GPGPU and MEDIA workloads, except for the cases
|
||||
* when only Read Only Cache Invalidation bits are set (State
|
||||
* Cache Invalidation Enable, Instruction cache Invalidation
|
||||
* Enable, Texture Cache Invalidation Enable, Constant Cache
|
||||
* Invalidation Enable). This is to WA FFDOP CG issue, this WA
|
||||
* need not implemented when FF_DOP_CG is disabled."
|
||||
*
|
||||
* Since we do all the invalidation in the following PIPE_CONTROL,
|
||||
* if we got here, we need a stall.
|
||||
*/
|
||||
pipe.CommandStreamerStallEnable |= current_pipeline == GPGPU;
|
||||
#endif
|
||||
|
||||
pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
|
||||
|
||||
|
|
@ -2226,46 +2070,6 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
|
|||
if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
|
||||
bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES);
|
||||
|
||||
if (GFX_VERx10 == 75) {
|
||||
/* Haswell needs addition work-arounds:
|
||||
*
|
||||
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* Option 1:
|
||||
* PIPE_CONTROL command with the CS Stall and the required write
|
||||
* caches flushed with Post-SyncOperation as Write Immediate Data
|
||||
* followed by eight dummy MI_STORE_DATA_IMM (write to scratch
|
||||
* spce) commands.
|
||||
*
|
||||
* Example:
|
||||
* - Workload-1
|
||||
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
|
||||
* Immediate Data, Required Write Cache Flush bits set)
|
||||
* - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
|
||||
* - Workload-2 (Can use the data produce or output by
|
||||
* Workload-1)
|
||||
*
|
||||
* Unfortunately, both the PRMs and the internal docs are a bit
|
||||
* out-of-date in this regard. What the windows driver does (and
|
||||
* this appears to actually work) is to emit a register read from the
|
||||
* memory address written by the pipe control above.
|
||||
*
|
||||
* What register we load into doesn't matter. We choose an indirect
|
||||
* rendering register because we know it always exists and it's one
|
||||
* of the first registers the command parser allows us to write. If
|
||||
* you don't have command parser support in your kernel (pre-4.2),
|
||||
* this will get turned into MI_NOOP and you won't get the
|
||||
* workaround. Unfortunately, there's just not much we can do in
|
||||
* that case. This register is perfectly safe to write since we
|
||||
* always re-load all of the indirect draw registers right before
|
||||
* 3DPRIMITIVE when needed anyway.
|
||||
*/
|
||||
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
|
||||
lrm.RegisterAddress = 0x243C; /* GFX7_3DPRIM_START_INSTANCE */
|
||||
lrm.MemoryAddress = device->workaround_address;
|
||||
}
|
||||
}
|
||||
|
||||
bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
|
||||
}
|
||||
|
|
@ -3180,7 +2984,6 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
|
|||
const struct anv_pipeline_bind_map *bind_map =
|
||||
&pipeline->shaders[stage]->bind_map;
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
/* The Skylake PRM contains the following restriction:
|
||||
*
|
||||
* "The driver must ensure The following case does not occur
|
||||
|
|
@ -3200,33 +3003,10 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
|
|||
/* At this point we only have non-empty ranges */
|
||||
assert(range->length > 0);
|
||||
|
||||
/* For Ivy Bridge, make sure we only set the first range (actual
|
||||
* push constants)
|
||||
*/
|
||||
assert((GFX_VERx10 >= 75) || i == 0);
|
||||
|
||||
c.ConstantBody.ReadLength[i + shift] = range->length;
|
||||
c.ConstantBody.Buffer[i + shift] =
|
||||
anv_address_add(buffers[i], range->start * 32);
|
||||
}
|
||||
#else
|
||||
/* For Ivy Bridge, push constants are relative to dynamic state
|
||||
* base address and we only ever push actual push constants.
|
||||
*/
|
||||
if (bind_map->push_ranges[0].length > 0) {
|
||||
assert(buffer_count == 1);
|
||||
assert(bind_map->push_ranges[0].set ==
|
||||
ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
|
||||
assert(buffers[0].bo ==
|
||||
cmd_buffer->device->dynamic_state_pool.block_pool.bo);
|
||||
c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
|
||||
c.ConstantBody.Buffer[0].bo = NULL;
|
||||
c.ConstantBody.Buffer[0].offset = buffers[0].offset;
|
||||
}
|
||||
assert(bind_map->push_ranges[1].length == 0);
|
||||
assert(bind_map->push_ranges[2].length == 0);
|
||||
assert(bind_map->push_ranges[3].length == 0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -3471,10 +3251,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) &&
|
||||
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) &&
|
||||
#if GFX_VER <= 7
|
||||
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) &&
|
||||
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) &&
|
||||
#endif
|
||||
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT))
|
||||
return;
|
||||
|
||||
|
|
@ -3488,10 +3264,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
struct GENX(3DSTATE_CLIP) clip = {
|
||||
GENX(3DSTATE_CLIP_header),
|
||||
#if GFX_VER <= 7
|
||||
.FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
|
||||
.CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
|
||||
#endif
|
||||
.ViewportXYClipTestEnable = xy_clip_test_enable,
|
||||
};
|
||||
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
|
||||
|
|
@ -3515,7 +3287,7 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords,
|
||||
pipeline->gfx7.clip);
|
||||
pipeline->gfx8.clip);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -3551,12 +3323,10 @@ cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
|
|||
.XMaxClipGuardband = 1.0f,
|
||||
.YMinClipGuardband = -1.0f,
|
||||
.YMaxClipGuardband = 1.0f,
|
||||
#if GFX_VER >= 8
|
||||
.XMinViewPort = vp->x,
|
||||
.XMaxViewPort = vp->x + vp->width - 1,
|
||||
.YMinViewPort = MIN2(vp->y, vp->y + vp->height),
|
||||
.YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
|
||||
#endif
|
||||
};
|
||||
|
||||
const uint32_t fb_size_max = 1 << 14;
|
||||
|
|
@ -3753,12 +3523,6 @@ cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
|
|||
&cmd_buffer->vk.dynamic_graphics_state;
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
|
||||
#if GFX_VER == 7
|
||||
# define streamout_state_dw pipeline->gfx7.streamout_state
|
||||
#else
|
||||
# define streamout_state_dw pipeline->gfx8.streamout_state
|
||||
#endif
|
||||
|
||||
uint32_t dwords[GENX(3DSTATE_STREAMOUT_length)];
|
||||
|
||||
struct GENX(3DSTATE_STREAMOUT) so = {
|
||||
|
|
@ -3766,7 +3530,7 @@ cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
|
|||
.RenderingDisable = dyn->rs.rasterizer_discard_enable,
|
||||
};
|
||||
GENX(3DSTATE_STREAMOUT_pack)(NULL, dwords, &so);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords, streamout_state_dw);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.streamout_state);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -3811,21 +3575,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
uint32_t stride = dyn->vi_binding_strides[vb];
|
||||
UNUSED uint32_t size = cmd_buffer->state.vertex_bindings[vb].size;
|
||||
|
||||
#if GFX_VER <= 7
|
||||
bool per_instance = pipeline->vb[vb].instanced;
|
||||
uint32_t divisor = pipeline->vb[vb].instance_divisor *
|
||||
pipeline->instance_multiplier;
|
||||
#endif
|
||||
|
||||
state = (struct GENX(VERTEX_BUFFER_STATE)) {
|
||||
.VertexBufferIndex = vb,
|
||||
|
||||
.MOCS = anv_mocs(cmd_buffer->device, buffer->address.bo,
|
||||
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
|
||||
#if GFX_VER <= 7
|
||||
.BufferAccessType = per_instance ? INSTANCEDATA : VERTEXDATA,
|
||||
.InstanceDataStepRate = per_instance ? divisor : 1,
|
||||
#endif
|
||||
.AddressModifyEnable = true,
|
||||
.BufferPitch = stride,
|
||||
.BufferStartingAddress = anv_address_add(buffer->address, offset),
|
||||
|
|
@ -3834,16 +3588,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
.L3BypassDisable = true,
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
.BufferSize = size,
|
||||
#else
|
||||
/* XXX: to handle dynamic offset for older gens we might want
|
||||
* to modify Endaddress, but there are issues when doing so:
|
||||
*
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7439
|
||||
*/
|
||||
.EndAddress = anv_address_add(buffer->address, buffer->vk.size - 1),
|
||||
#endif
|
||||
};
|
||||
} else {
|
||||
state = (struct GENX(VERTEX_BUFFER_STATE)) {
|
||||
|
|
@ -3874,9 +3619,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
!cmd_buffer->state.push_constants_dirty)
|
||||
return;
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) ||
|
||||
(GFX_VER == 7 && (cmd_buffer->state.gfx.dirty &
|
||||
ANV_CMD_DIRTY_PIPELINE))) {
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
|
||||
/* Wa_16011411144:
|
||||
*
|
||||
* SW must insert a PIPE_CONTROL cmd before and after the
|
||||
|
|
@ -3907,20 +3650,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
sob.MOCS = anv_mocs(cmd_buffer->device, xfb->buffer->address.bo, 0);
|
||||
sob.SurfaceBaseAddress = anv_address_add(xfb->buffer->address,
|
||||
xfb->offset);
|
||||
#if GFX_VER >= 8
|
||||
sob.SOBufferEnable = true;
|
||||
sob.StreamOffsetWriteEnable = false;
|
||||
/* Size is in DWords - 1 */
|
||||
sob.SurfaceSize = DIV_ROUND_UP(xfb->size, 4) - 1;
|
||||
#else
|
||||
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gfx7 so
|
||||
* we trust in SurfaceEndAddress = SurfaceBaseAddress = 0 (the
|
||||
* default for an empty SO_BUFFER packet) to disable them.
|
||||
*/
|
||||
sob.SurfacePitch = pipeline->gfx7.xfb_bo_pitch[idx];
|
||||
sob.SurfaceEndAddress = anv_address_add(xfb->buffer->address,
|
||||
xfb->offset + xfb->size);
|
||||
#endif
|
||||
} else {
|
||||
sob.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
|
||||
}
|
||||
|
|
@ -3950,28 +3683,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
cmd_buffer_alloc_push_constants(cmd_buffer);
|
||||
}
|
||||
|
||||
#if GFX_VER <= 7
|
||||
if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT ||
|
||||
cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) {
|
||||
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
|
||||
*
|
||||
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
|
||||
* stall needs to be sent just prior to any 3DSTATE_VS,
|
||||
* 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
|
||||
* 3DSTATE_BINDING_TABLE_POINTER_VS,
|
||||
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one
|
||||
* PIPE_CONTROL needs to be sent before any combination of VS
|
||||
* associated 3DSTATE."
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DepthStallEnable = true;
|
||||
pc.PostSyncOperation = WriteImmediateData;
|
||||
pc.Address = cmd_buffer->device->workaround_address;
|
||||
anv_debug_dump_pc(pc);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Render targets live in the same binding table as fragment descriptors */
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS)
|
||||
descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
|
|
@ -4442,7 +4153,6 @@ void genX(CmdDrawIndirectByteCountEXT)(
|
|||
uint32_t counterOffset,
|
||||
uint32_t vertexStride)
|
||||
{
|
||||
#if GFX_VERx10 >= 75
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer);
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
|
|
@ -4503,7 +4213,6 @@ void genX(CmdDrawIndirectByteCountEXT)(
|
|||
|
||||
trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace,
|
||||
instanceCount * pipeline->instance_multiplier);
|
||||
#endif /* GFX_VERx10 >= 75 */
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -4521,13 +4230,8 @@ load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
struct mi_value instance_count = mi_mem32(anv_address_add(addr, 4));
|
||||
if (pipeline->instance_multiplier > 1) {
|
||||
#if GFX_VERx10 >= 75
|
||||
instance_count = mi_imul_imm(&b, instance_count,
|
||||
pipeline->instance_multiplier);
|
||||
#else
|
||||
anv_finishme("Multiview + indirect draw requires MI_MATH; "
|
||||
"MI_MATH is not supported on Ivy Bridge");
|
||||
#endif
|
||||
}
|
||||
mi_store(&b, mi_reg32(GFX7_3DPRIM_INSTANCE_COUNT), instance_count);
|
||||
|
||||
|
|
@ -4673,10 +4377,8 @@ prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
|
|||
struct mi_value ret = mi_imm(0);
|
||||
|
||||
if (cmd_buffer->state.conditional_render_enabled) {
|
||||
#if GFX_VERx10 >= 75
|
||||
ret = mi_new_gpr(b);
|
||||
mi_store(b, mi_value_ref(b, ret), mi_mem32(count_address));
|
||||
#endif
|
||||
} else {
|
||||
/* Upload the current draw count from the draw parameters buffer to
|
||||
* MI_PREDICATE_SRC0.
|
||||
|
|
@ -4718,7 +4420,6 @@ emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
static void
|
||||
emit_draw_count_predicate_with_conditional_render(
|
||||
struct anv_cmd_buffer *cmd_buffer,
|
||||
|
|
@ -4729,24 +4430,8 @@ emit_draw_count_predicate_with_conditional_render(
|
|||
struct mi_value pred = mi_ult(b, mi_imm(draw_index), max);
|
||||
pred = mi_iand(b, pred, mi_reg64(ANV_PREDICATE_RESULT_REG));
|
||||
|
||||
#if GFX_VER >= 8
|
||||
mi_store(b, mi_reg32(MI_PREDICATE_RESULT), pred);
|
||||
#else
|
||||
/* MI_PREDICATE_RESULT is not whitelisted in i915 command parser
|
||||
* so we emit MI_PREDICATE to set it.
|
||||
*/
|
||||
|
||||
mi_store(b, mi_reg64(MI_PREDICATE_SRC0), pred);
|
||||
mi_store(b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOADINV;
|
||||
mip.CombineOperation = COMBINE_SET;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
emit_draw_count_predicate_cond(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
|
@ -4754,16 +4439,12 @@ emit_draw_count_predicate_cond(struct anv_cmd_buffer *cmd_buffer,
|
|||
uint32_t draw_index,
|
||||
struct mi_value max)
|
||||
{
|
||||
#if GFX_VERx10 >= 75
|
||||
if (cmd_buffer->state.conditional_render_enabled) {
|
||||
emit_draw_count_predicate_with_conditional_render(
|
||||
cmd_buffer, b, draw_index, mi_value_ref(b, max));
|
||||
} else {
|
||||
emit_draw_count_predicate(cmd_buffer, b, draw_index);
|
||||
}
|
||||
#else
|
||||
emit_draw_count_predicate(cmd_buffer, b, draw_index);
|
||||
#endif
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndirectCount)(
|
||||
|
|
@ -5257,24 +4938,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
|
||||
static VkResult
|
||||
verify_cmd_parser(const struct anv_device *device,
|
||||
int required_version,
|
||||
const char *function)
|
||||
{
|
||||
if (device->physical->cmd_parser_version < required_version) {
|
||||
return vk_errorf(device->physical, VK_ERROR_FEATURE_NOT_PRESENT,
|
||||
"cmd parser version %d is required for %s",
|
||||
required_version, function);
|
||||
} else {
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t baseGroupX,
|
||||
|
|
@ -5363,8 +5026,7 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||
uint32_t groupCountX, uint32_t groupCountY,
|
||||
uint32_t groupCountZ)
|
||||
{
|
||||
bool predicate = (GFX_VER <= 7 && indirect) ||
|
||||
cmd_buffer->state.conditional_render_enabled;
|
||||
bool predicate = cmd_buffer->state.conditional_render_enabled;
|
||||
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
const struct brw_cs_dispatch_info dispatch =
|
||||
|
|
@ -5479,15 +5141,6 @@ void genX(CmdDispatchIndirect)(
|
|||
|
||||
anv_cmd_buffer_push_base_group_id(cmd_buffer, 0, 0, 0);
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* Linux 4.4 added command parser version 5 which allows the GPGPU
|
||||
* indirect dispatch registers to be written.
|
||||
*/
|
||||
if (verify_cmd_parser(cmd_buffer->device, 5,
|
||||
"vkCmdDispatchIndirect") != VK_SUCCESS)
|
||||
return;
|
||||
#endif
|
||||
|
||||
anv_measure_snapshot(cmd_buffer,
|
||||
INTEL_SNAPSHOT_COMPUTE,
|
||||
"compute indirect",
|
||||
|
|
@ -5514,56 +5167,8 @@ void genX(CmdDispatchIndirect)(
|
|||
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMY), size_y);
|
||||
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), size_z);
|
||||
|
||||
#if GFX_VER <= 7
|
||||
/* predicate = (compute_dispatch_indirect_x_size == 0); */
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), size_x);
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
|
||||
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOAD;
|
||||
mip.CombineOperation = COMBINE_SET;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
|
||||
/* predicate |= (compute_dispatch_indirect_y_size == 0); */
|
||||
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0), size_y);
|
||||
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOAD;
|
||||
mip.CombineOperation = COMBINE_OR;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
|
||||
/* predicate |= (compute_dispatch_indirect_z_size == 0); */
|
||||
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0), size_z);
|
||||
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOAD;
|
||||
mip.CombineOperation = COMBINE_OR;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
|
||||
/* predicate = !predicate; */
|
||||
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOADINV;
|
||||
mip.CombineOperation = COMBINE_OR;
|
||||
mip.CompareOperation = COMPARE_FALSE;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
if (cmd_buffer->state.conditional_render_enabled) {
|
||||
/* predicate &= !(conditional_rendering_predicate == 0); */
|
||||
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0),
|
||||
mi_reg32(ANV_PREDICATE_RESULT_REG));
|
||||
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
|
||||
mip.LoadOperation = LOAD_LOADINV;
|
||||
mip.CombineOperation = COMBINE_AND;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* GFX_VER > 7 */
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0);
|
||||
|
||||
|
|
@ -6900,16 +6505,6 @@ void genX(CmdBeginRendering)(
|
|||
|
||||
gfx->dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
|
||||
|
||||
/* Our implementation of VK_KHR_multiview uses instancing to draw the
|
||||
* different views. If the client asks for instancing, we need to use the
|
||||
* Instance Data Step Rate to ensure that we repeat the client's
|
||||
* per-instance data once for each view. Since this bit is in
|
||||
* VERTEX_BUFFER_STATE on gfx7, we need to dirty vertex buffers at the top
|
||||
* of each subpass.
|
||||
*/
|
||||
if (GFX_VER == 7)
|
||||
gfx->vb_dirty |= ~0;
|
||||
|
||||
/* It is possible to start a render pass with an old pipeline. Because the
|
||||
* render pass and subpass index are both baked into the pipeline, this is
|
||||
* highly unlikely. In order to do so, it requires that you have a render
|
||||
|
|
@ -7156,49 +6751,12 @@ void genX(CmdEndRendering)(
|
|||
VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* On gfx7, we have to store a texturable version of the stencil buffer in
|
||||
* a shadow whenever VK_IMAGE_USAGE_SAMPLED_BIT is set and copy back and
|
||||
* forth at strategic points. Stencil writes are only allowed in following
|
||||
* layouts:
|
||||
*
|
||||
* - VK_IMAGE_LAYOUT_GENERAL
|
||||
* - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL
|
||||
* - VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT
|
||||
*
|
||||
* For general, we have no nice opportunity to transition so we do the copy
|
||||
* to the shadow unconditionally at the end of the subpass. For transfer
|
||||
* destinations, we can update it as part of the transfer op. For the other
|
||||
* layouts, we delay the copy until a transition into some other layout.
|
||||
*/
|
||||
if (gfx->stencil_att.iview != NULL) {
|
||||
const struct anv_image_view *iview = gfx->stencil_att.iview;
|
||||
const struct anv_image *image = iview->image;
|
||||
const uint32_t plane =
|
||||
anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
|
||||
if (anv_surface_is_valid(&image->planes[plane].shadow_surface) &&
|
||||
(gfx->stencil_att.layout == VK_IMAGE_LAYOUT_GENERAL ||
|
||||
gfx->stencil_att.layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT)) {
|
||||
anv_image_copy_to_shadow(cmd_buffer, image,
|
||||
VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
iview->planes[plane].isl.base_level, 1,
|
||||
iview->planes[plane].isl.base_array_layer,
|
||||
layers);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_cmd_buffer_reset_rendering(cmd_buffer);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
#if GFX_VERx10 >= 75
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
||||
|
|
@ -7211,10 +6769,8 @@ genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
|
|||
mip.CombineOperation = COMBINE_SET;
|
||||
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
void genX(CmdBeginConditionalRenderingEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin)
|
||||
|
|
@ -7265,7 +6821,6 @@ void genX(CmdEndConditionalRenderingEXT)(
|
|||
|
||||
cmd_state->conditional_render_enabled = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set of stage bits for which are pipelined, i.e. they get queued
|
||||
* by the command streamer for later execution.
|
||||
|
|
@ -7349,7 +6904,6 @@ void genX(CmdWaitEvents2)(
|
|||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
#if GFX_VER >= 8
|
||||
for (uint32_t i = 0; i < eventCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
|
||||
|
||||
|
|
@ -7363,9 +6917,6 @@ void genX(CmdWaitEvents2)(
|
|||
};
|
||||
}
|
||||
}
|
||||
#else
|
||||
anv_finishme("Implement events on gfx7");
|
||||
#endif
|
||||
|
||||
cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,13 +55,11 @@ static void
|
|||
emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
||||
const struct intel_l3_config *l3_config)
|
||||
{
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
vfi.InstancingEnable = false;
|
||||
vfi.VertexElementIndex = 0;
|
||||
}
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
|
||||
#endif
|
||||
|
||||
/* Disable all shader stages */
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
|
||||
|
|
@ -75,10 +73,8 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
sbe.VertexURBEntryReadOffset = 1;
|
||||
sbe.NumberofSFOutputAttributes = 1;
|
||||
sbe.VertexURBEntryReadLength = 1;
|
||||
#if GFX_VER >= 8
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 9
|
||||
for (unsigned i = 0; i < 32; i++)
|
||||
|
|
@ -100,11 +96,9 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
||||
topo.PrimitiveTopologyType = _3DPRIM_POINTLIST;
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
|
||||
vf.StatisticsEnable = false;
|
||||
|
|
@ -141,11 +135,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
#if GFX_VER >= 12
|
||||
.L3BypassDisable = true,
|
||||
#endif
|
||||
#if (GFX_VER >= 8)
|
||||
.BufferSize = size,
|
||||
#else
|
||||
.EndAddress = anv_address_add(src, size - 1),
|
||||
#endif
|
||||
});
|
||||
|
||||
dw = anv_batch_emitn(batch, 3, GENX(3DSTATE_VERTEX_ELEMENTS));
|
||||
|
|
@ -172,15 +162,9 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
sob.MOCS = anv_mocs(device, dst.bo, 0),
|
||||
sob.SurfaceBaseAddress = dst;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
sob.SOBufferEnable = true;
|
||||
sob.SurfaceSize = size / 4 - 1;
|
||||
#else
|
||||
sob.SurfacePitch = bs;
|
||||
sob.SurfaceEndAddress = anv_address_add(dst, size);
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* As SOL writes out data, it updates the SO_WRITE_OFFSET registers with
|
||||
* the end position of the stream. We need to reset this value to 0 at
|
||||
* the beginning of the run or else SOL will start at the offset from
|
||||
|
|
@ -188,17 +172,8 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
*/
|
||||
sob.StreamOffsetWriteEnable = true;
|
||||
sob.StreamOffset = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER <= 7
|
||||
/* The hardware can do this for us on BDW+ (see above) */
|
||||
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), load) {
|
||||
load.RegisterOffset = GENX(SO_WRITE_OFFSET0_num);
|
||||
load.DataDWord = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
|
||||
.StreamtoBufferSelects0 = (1 << 0),
|
||||
.NumEntries0 = 1);
|
||||
|
|
@ -216,11 +191,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
|
|||
so.RenderingDisable = true;
|
||||
so.Stream0VertexReadOffset = 0;
|
||||
so.Stream0VertexReadLength = DIV_ROUND_UP(32, 64);
|
||||
#if GFX_VER >= 8
|
||||
so.Buffer0SurfacePitch = bs;
|
||||
#else
|
||||
so.SOBufferEnable0 = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {
|
||||
|
|
|
|||
|
|
@ -177,7 +177,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
|
|||
};
|
||||
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* On Broadwell and later, we have a separate VF_INSTANCING packet
|
||||
* that controls instancing. On Haswell and prior, that's part of
|
||||
* VERTEX_BUFFER_STATE which we emit later.
|
||||
|
|
@ -191,7 +190,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
|
|||
vfi.VertexElementIndex = slot;
|
||||
vfi.InstanceDataStepRate = per_instance ? divisor : 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const uint32_t id_slot = elem_count;
|
||||
|
|
@ -215,24 +213,16 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
|
|||
.SourceElementFormat = ISL_FORMAT_R32G32_UINT,
|
||||
.Component0Control = base_ctrl,
|
||||
.Component1Control = base_ctrl,
|
||||
#if GFX_VER >= 8
|
||||
.Component2Control = VFCOMP_STORE_0,
|
||||
.Component3Control = VFCOMP_STORE_0,
|
||||
#else
|
||||
.Component2Control = VFCOMP_STORE_VID,
|
||||
.Component3Control = VFCOMP_STORE_IID,
|
||||
#endif
|
||||
};
|
||||
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
|
||||
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
vfi.VertexElementIndex = id_slot;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
||||
sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
|
||||
sgvs.VertexIDComponentNumber = 2;
|
||||
|
|
@ -241,7 +231,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
|
|||
sgvs.InstanceIDComponentNumber = 3;
|
||||
sgvs.InstanceIDElementOffset = id_slot;
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint32_t drawid_slot = elem_count + needs_svgs_elem;
|
||||
if (vs_prog_data->uses_drawid) {
|
||||
|
|
@ -258,11 +247,9 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
|
|||
&p[1 + drawid_slot * 2],
|
||||
&element);
|
||||
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
||||
vfi.VertexElementIndex = drawid_slot;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -285,22 +272,6 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
|||
entry_size, entries, start, deref_block_size,
|
||||
&constrained);
|
||||
|
||||
#if GFX_VERx10 == 70
|
||||
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
|
||||
*
|
||||
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
|
||||
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
|
||||
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
|
||||
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
|
||||
* needs to be sent before any combination of VS associated 3DSTATE."
|
||||
*/
|
||||
anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) {
|
||||
pc.DepthStallEnable = true;
|
||||
pc.PostSyncOperation = WriteImmediateData;
|
||||
pc.Address = device->workaround_address;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
|
||||
urb._3DCommandSubOpcode += i;
|
||||
|
|
@ -396,9 +367,7 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
|
||||
#if GFX_VER >= 8
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
|
||||
#endif
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline))
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh);
|
||||
|
|
@ -422,14 +391,10 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
|
||||
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
|
||||
GENX(3DSTATE_SBE_SWIZ_header),
|
||||
};
|
||||
#else
|
||||
# define swiz sbe
|
||||
#endif
|
||||
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
const struct brw_vue_map *fs_input_map =
|
||||
|
|
@ -497,10 +462,8 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
|
||||
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
|
||||
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
|
||||
#if GFX_VER >= 8
|
||||
sbe.ForceVertexURBEntryReadOffset = true;
|
||||
sbe.ForceVertexURBEntryReadLength = true;
|
||||
#endif
|
||||
} else {
|
||||
assert(anv_pipeline_is_mesh(pipeline));
|
||||
#if GFX_VERx10 >= 125
|
||||
|
|
@ -554,12 +517,10 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
|||
return;
|
||||
GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
|
||||
|
||||
#if GFX_VER >= 8
|
||||
dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
|
||||
if (!dw)
|
||||
return;
|
||||
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Returns the final polygon mode for rasterization
|
||||
|
|
@ -639,32 +600,6 @@ genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
|
||||
VkPolygonMode raster_mode)
|
||||
{
|
||||
#if GFX_VER <= 7
|
||||
if (raster_mode == VK_POLYGON_MODE_LINE) {
|
||||
switch (pipeline->line_mode) {
|
||||
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
|
||||
return MSRASTMODE_ON_PATTERN;
|
||||
|
||||
case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
|
||||
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
|
||||
return MSRASTMODE_OFF_PIXEL;
|
||||
|
||||
default:
|
||||
unreachable("Unsupported line rasterization mode");
|
||||
}
|
||||
} else {
|
||||
return pipeline->rasterization_samples > 1 ?
|
||||
MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
|
||||
}
|
||||
#else
|
||||
unreachable("Only on gen7");
|
||||
#endif
|
||||
}
|
||||
|
||||
const uint32_t genX(vk_to_intel_cullmode)[] = {
|
||||
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
||||
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
||||
|
|
@ -690,7 +625,6 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
|
|||
uint32_t *api_mode,
|
||||
bool *msaa_rasterization_enable)
|
||||
{
|
||||
#if GFX_VER >= 8
|
||||
if (raster_mode == VK_POLYGON_MODE_LINE) {
|
||||
/* Unfortunately, configuring our line rasterization hardware on gfx8
|
||||
* and later is rather painful. Instead of giving us bits to tell the
|
||||
|
|
@ -731,9 +665,6 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
|
|||
*api_mode = DX100;
|
||||
*msaa_rasterization_enable = true;
|
||||
}
|
||||
#else
|
||||
unreachable("Invalid call");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -770,10 +701,6 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
|||
unreachable("Invalid provoking vertex mode");
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
sf.LineStippleEnable = rs->line.stipple.enable;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
sf.DerefBlockSize = urb_deref_block_size;
|
||||
#endif
|
||||
|
|
@ -796,25 +723,19 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
|||
sf.PointWidth = 1.0;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
struct GENX(3DSTATE_RASTER) raster = {
|
||||
GENX(3DSTATE_RASTER_header),
|
||||
};
|
||||
#else
|
||||
# define raster sf
|
||||
#endif
|
||||
|
||||
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
|
||||
* "Multisample Modes State".
|
||||
*/
|
||||
#if GFX_VER >= 8
|
||||
/* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
|
||||
* computations. If we ever set this bit to a different value, they will
|
||||
* need to be updated accordingly.
|
||||
*/
|
||||
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
|
||||
raster.ForceMultisampling = false;
|
||||
#endif
|
||||
|
||||
raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
|
||||
raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
|
||||
|
|
@ -824,7 +745,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
|||
/* GFX9+ splits ViewportZClipTestEnable into near and far enable bits */
|
||||
raster.ViewportZFarClipTestEnable = pipeline->depth_clip_enable;
|
||||
raster.ViewportZNearClipTestEnable = pipeline->depth_clip_enable;
|
||||
#elif GFX_VER >= 8
|
||||
#elif GFX_VER == 8
|
||||
raster.ViewportZClipTestEnable = pipeline->depth_clip_enable;
|
||||
#endif
|
||||
|
||||
|
|
@ -833,42 +754,18 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
|||
rs->conservative_mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
|
||||
#endif
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
|
||||
* can get the depth offsets correct.
|
||||
*/
|
||||
if (rp != NULL &&
|
||||
rp->depth_attachment_format != VK_FORMAT_UNDEFINED) {
|
||||
assert(vk_format_has_depth(rp->depth_attachment_format));
|
||||
enum isl_format isl_format =
|
||||
anv_get_isl_format(pipeline->base.device->info,
|
||||
rp->depth_attachment_format,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
VK_IMAGE_TILING_OPTIMAL);
|
||||
sf.DepthBufferSurfaceFormat =
|
||||
isl_format_get_depth_format(isl_format, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
|
||||
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
|
||||
#else
|
||||
# undef raster
|
||||
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ms_state(struct anv_graphics_pipeline *pipeline,
|
||||
const struct vk_multisample_state *ms)
|
||||
{
|
||||
#if GFX_VER >= 8
|
||||
/* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
|
||||
genX(emit_multisample)(&pipeline->base.batch,
|
||||
pipeline->rasterization_samples,
|
||||
NULL);
|
||||
#endif
|
||||
|
||||
/* From the Vulkan 1.0 spec:
|
||||
* If pSampleMask is NULL, it is treated as if the mask has all bits
|
||||
|
|
@ -876,11 +773,7 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
|
|||
*
|
||||
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
|
||||
*/
|
||||
#if GFX_VER >= 8
|
||||
uint32_t sample_mask = 0xffff;
|
||||
#else
|
||||
uint32_t sample_mask = 0xff;
|
||||
#endif
|
||||
|
||||
if (ms != NULL)
|
||||
sample_mask &= ms->sample_mask;
|
||||
|
|
@ -1005,10 +898,8 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
|
||||
struct GENX(BLEND_STATE) blend_state = {
|
||||
#if GFX_VER >= 8
|
||||
.AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
|
||||
.AlphaToOneEnable = ms && ms->alpha_to_one_enable,
|
||||
#endif
|
||||
};
|
||||
|
||||
uint32_t surface_count = 0;
|
||||
|
|
@ -1018,15 +909,11 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||
surface_count = map->surface_count;
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
uint32_t *blend_state_start = devinfo->ver >= 8 ?
|
||||
pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
|
||||
uint32_t *blend_state_start = pipeline->gfx8.blend_state;
|
||||
uint32_t *state_pos = blend_state_start;
|
||||
|
||||
state_pos += GENX(BLEND_STATE_length);
|
||||
#if GFX_VER >= 8
|
||||
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
|
||||
#endif
|
||||
for (unsigned i = 0; i < surface_count; i++) {
|
||||
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
|
||||
|
||||
|
|
@ -1046,10 +933,6 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||
&cb->attachments[binding->index];
|
||||
|
||||
struct GENX(BLEND_STATE_ENTRY) entry = {
|
||||
#if GFX_VER < 8
|
||||
.AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
|
||||
.AlphaToOneEnable = ms && ms->alpha_to_one_enable,
|
||||
#endif
|
||||
.LogicOpEnable = cb->logic_op_enable,
|
||||
|
||||
/* Vulkan specification 1.2.168, VkLogicOp:
|
||||
|
|
@ -1082,11 +965,7 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||
if (a->src_color_blend_factor != a->src_alpha_blend_factor ||
|
||||
a->dst_color_blend_factor != a->dst_alpha_blend_factor ||
|
||||
a->color_blend_op != a->alpha_blend_op) {
|
||||
#if GFX_VER >= 8
|
||||
blend_state.IndependentAlphaBlendEnable = true;
|
||||
#else
|
||||
entry.IndependentAlphaBlendEnable = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The Dual Source Blending documentation says:
|
||||
|
|
@ -1129,13 +1008,10 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
|
||||
state_pos += GENX(BLEND_STATE_ENTRY_length);
|
||||
#if GFX_VER >= 8
|
||||
if (i == 0)
|
||||
bs0 = entry;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
struct GENX(3DSTATE_PS_BLEND) blend = {
|
||||
GENX(3DSTATE_PS_BLEND_header),
|
||||
};
|
||||
|
|
@ -1149,7 +1025,6 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||
blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
|
||||
|
||||
GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
|
||||
#endif
|
||||
|
||||
GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
|
||||
}
|
||||
|
|
@ -1173,9 +1048,7 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
|||
clip.APIMode = pipeline->negative_one_to_one ? APIMODE_OGL : APIMODE_D3D;
|
||||
clip.GuardbandClipTestEnable = true;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
clip.VertexSubPixelPrecisionSelect = _8Bit;
|
||||
#endif
|
||||
clip.ClipMode = CLIPMODE_NORMAL;
|
||||
|
||||
switch (rs->provoking_vertex) {
|
||||
|
|
@ -1225,10 +1098,6 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
|||
clip.ForceZeroRTAIndexEnable =
|
||||
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
|
||||
|
||||
#if GFX_VER == 7
|
||||
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
|
||||
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
|
||||
#endif
|
||||
} else if (anv_pipeline_is_mesh(pipeline)) {
|
||||
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
||||
if (vp && vp->viewport_count > 0 &&
|
||||
|
|
@ -1237,16 +1106,10 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
|
||||
clip.CullMode = genX(vk_to_intel_cullmode)[rs->cull_mode];
|
||||
clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
|
||||
#else
|
||||
clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
|
||||
wm_prog_data->uses_nonperspective_interp_modes : 0;
|
||||
#endif
|
||||
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx8.clip, &clip);
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (anv_pipeline_is_mesh(pipeline)) {
|
||||
|
|
@ -1388,12 +1251,6 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VER == 7
|
||||
# define streamout_state_dw pipeline->gfx7.streamout_state
|
||||
#else
|
||||
# define streamout_state_dw pipeline->gfx8.streamout_state
|
||||
#endif
|
||||
|
||||
struct GENX(3DSTATE_STREAMOUT) so = {
|
||||
GENX(3DSTATE_STREAMOUT_header),
|
||||
};
|
||||
|
|
@ -1417,28 +1274,10 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
so.RenderStreamSelect = rs->rasterization_stream;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
|
||||
so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
|
||||
so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
|
||||
so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
|
||||
#else
|
||||
pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
|
||||
pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
|
||||
pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
|
||||
pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
|
||||
|
||||
/* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
|
||||
* is a bit inconvenient because we don't know what buffers will
|
||||
* actually be enabled until draw time. We do our best here by
|
||||
* setting them based on buffers_written and we disable them
|
||||
* as-needed at draw time by setting EndAddress = BaseAddress.
|
||||
*/
|
||||
so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
|
||||
so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
|
||||
so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
|
||||
so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
|
||||
#endif
|
||||
|
||||
int urb_entry_read_offset = 0;
|
||||
int urb_entry_read_length =
|
||||
|
|
@ -1458,7 +1297,7 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
|
|||
so.Stream3VertexReadLength = urb_entry_read_length - 1;
|
||||
}
|
||||
|
||||
GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
|
||||
GENX(3DSTATE_STREAMOUT_pack)(NULL, pipeline->gfx8.streamout_state, &so);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -1525,10 +1364,8 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
|
|||
vs.Enable = true;
|
||||
vs.StatisticsEnable = true;
|
||||
vs.KernelStartPointer = vs_bin->kernel.offset;
|
||||
#if GFX_VER >= 8
|
||||
vs.SIMD8DispatchEnable =
|
||||
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
|
||||
#endif
|
||||
|
||||
assert(!vs_prog_data->base.base.use_alt_mode);
|
||||
#if GFX_VER < 11
|
||||
|
|
@ -1574,12 +1411,10 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
|
|||
vs.DispatchGRFStartRegisterForURBData =
|
||||
vs_prog_data->base.base.dispatch_grf_start_reg;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
vs.UserClipDistanceClipTestEnableBitmask =
|
||||
vs_prog_data->base.clip_distance_mask;
|
||||
vs.UserClipDistanceCullTestEnableBitmask =
|
||||
vs_prog_data->base.cull_distance_mask;
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
vs.ScratchSpaceBuffer =
|
||||
|
|
@ -1714,7 +1549,6 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
|
|||
ds.DispatchGRFStartRegisterForURBData =
|
||||
tes_prog_data->base.base.dispatch_grf_start_reg;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
#if GFX_VER < 11
|
||||
ds.DispatchMode =
|
||||
tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
|
||||
|
|
@ -1729,7 +1563,6 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
|
|||
tes_prog_data->base.clip_distance_mask;
|
||||
ds.UserClipDistanceCullTestEnableBitmask =
|
||||
tes_prog_data->base.cull_distance_mask;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
ds.PrimitiveIDNotRequired = !tes_prog_data->include_primitive_id;
|
||||
|
|
@ -1787,24 +1620,20 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline)
|
|||
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
|
||||
gs.ReorderMode = TRAILING;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
|
||||
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
|
||||
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
|
||||
gs_prog_data->static_vertex_count : 0;
|
||||
#endif
|
||||
|
||||
gs.VertexURBEntryReadOffset = 0;
|
||||
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
|
||||
gs.DispatchGRFStartRegisterForURBData =
|
||||
gs_prog_data->base.base.dispatch_grf_start_reg;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
gs.UserClipDistanceClipTestEnableBitmask =
|
||||
gs_prog_data->base.clip_distance_mask;
|
||||
gs.UserClipDistanceCullTestEnableBitmask =
|
||||
gs_prog_data->base.cull_distance_mask;
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
gs.ScratchSpaceBuffer =
|
||||
|
|
@ -1844,7 +1673,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
|||
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
|
||||
* doesn't take into account KillPixels when no depth or stencil
|
||||
* writes are enabled. In order for occlusion queries to work
|
||||
|
|
@ -1864,50 +1692,14 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
|||
pipeline->force_fragment_thread_dispatch =
|
||||
wm_prog_data->has_side_effects ||
|
||||
wm_prog_data->uses_kill;
|
||||
#endif
|
||||
|
||||
wm.BarycentricInterpolationMode =
|
||||
wm_prog_data->barycentric_interp_modes;
|
||||
|
||||
#if GFX_VER < 8
|
||||
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
||||
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
|
||||
|
||||
/* If the subpass has a depth or stencil self-dependency, then we
|
||||
* need to force the hardware to do the depth/stencil write *after*
|
||||
* fragment shader execution. Otherwise, the writes may hit memory
|
||||
* before we get around to fetching from the input attachment and we
|
||||
* may get the depth or stencil value from the current draw rather
|
||||
* than the previous one.
|
||||
*/
|
||||
wm.PixelShaderKillsPixel = rp->depth_self_dependency ||
|
||||
rp->stencil_self_dependency ||
|
||||
wm_prog_data->uses_kill;
|
||||
|
||||
pipeline->force_fragment_thread_dispatch =
|
||||
wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
|
||||
wm_prog_data->has_side_effects ||
|
||||
wm.PixelShaderKillsPixel;
|
||||
|
||||
if (ms != NULL && ms->rasterization_samples > 1) {
|
||||
if (wm_prog_data->persample_dispatch) {
|
||||
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
||||
} else {
|
||||
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
||||
}
|
||||
} else {
|
||||
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
||||
}
|
||||
#endif
|
||||
|
||||
wm.LineStippleEnable = rs->line.stipple.enable;
|
||||
}
|
||||
|
||||
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
||||
uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
|
||||
GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
|
||||
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1922,40 +1714,12 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
||||
#if GFX_VER == 7
|
||||
/* Even if no fragments are ever dispatched, gfx7 hardware hangs if
|
||||
* we don't at least set the maximum number of threads.
|
||||
*/
|
||||
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
|
||||
#endif
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
|
||||
#if GFX_VER < 8
|
||||
/* The hardware wedges if you have this bit set but don't turn on any dual
|
||||
* source blend factors.
|
||||
*/
|
||||
bool dual_src_blend = false;
|
||||
if (wm_prog_data->dual_src_blend && cb) {
|
||||
for (uint32_t i = 0; i < cb->attachment_count; i++) {
|
||||
const struct vk_color_blend_attachment_state *a =
|
||||
&cb->attachments[i];
|
||||
|
||||
if (a->blend_enable &&
|
||||
(is_dual_src_blend_factor(a->src_color_blend_factor) ||
|
||||
is_dual_src_blend_factor(a->dst_color_blend_factor) ||
|
||||
is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
|
||||
is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
|
||||
dual_src_blend = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
||||
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
|
||||
|
|
@ -1983,8 +1747,7 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
|
||||
|
||||
ps.SingleProgramFlow = false;
|
||||
ps.VectorMaskEnable = GFX_VER >= 8 &&
|
||||
wm_prog_data->uses_vmask;
|
||||
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
|
||||
/* Wa_1606682166 */
|
||||
ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(fs_bin);
|
||||
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
|
||||
|
|
@ -1992,25 +1755,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||
wm_prog_data->base.ubo_ranges[0].length;
|
||||
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||
POSOFFSET_SAMPLE: POSOFFSET_NONE;
|
||||
#if GFX_VER < 8
|
||||
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
||||
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
||||
ps.DualSourceBlendEnable = dual_src_blend;
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
/* Haswell requires the sample mask to be set in this packet as well
|
||||
* as in 3DSTATE_SAMPLE_MASK; the values should match.
|
||||
*/
|
||||
ps.SampleMask = 0xff;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
ps.MaximumNumberofThreadsPerPSD =
|
||||
devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
|
||||
#else
|
||||
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
|
||||
#endif
|
||||
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
||||
|
|
@ -2030,7 +1777,6 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
static void
|
||||
emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
||||
const struct vk_rasterization_state *rs,
|
||||
|
|
@ -2093,7 +1839,6 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
|||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
|
||||
|
|
@ -2323,25 +2068,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
|||
emit_3dstate_primitive_replication(pipeline, state->rp);
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
/* From gfx7_vs_state.c */
|
||||
|
||||
/**
|
||||
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
|
||||
* Geometry > Geometry Shader > State:
|
||||
*
|
||||
* "Note: Because of corruption in IVB:GT2, software needs to flush the
|
||||
* whole fixed function pipeline when the GS enable changes value in
|
||||
* the 3DSTATE_GS."
|
||||
*
|
||||
* The hardware architects have clarified that in this context "flush the
|
||||
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
|
||||
* Stall" bit set.
|
||||
*/
|
||||
if (device->info->platform == INTEL_PLATFORM_IVB)
|
||||
gfx7_emit_vs_workaround_flush(brw);
|
||||
#endif
|
||||
|
||||
if (anv_pipeline_is_primitive(pipeline)) {
|
||||
emit_vertex_input(pipeline, state->vi);
|
||||
|
||||
|
|
@ -2379,9 +2105,7 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
|||
emit_3dstate_wm(pipeline, state->ia, state->rs,
|
||||
state->ms, state->cb, state->rp);
|
||||
emit_3dstate_ps(pipeline, state->ms, state->cb);
|
||||
#if GFX_VER >= 8
|
||||
emit_3dstate_ps_extra(pipeline, state->rs, state->rp);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
|
|
@ -2424,43 +2148,25 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
|||
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
||||
|
||||
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||
#if GFX_VER > 7
|
||||
vfe.StackSize = 0;
|
||||
#else
|
||||
vfe.GPGPUMode = true;
|
||||
#endif
|
||||
vfe.MaximumNumberofThreads =
|
||||
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
||||
vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
|
||||
vfe.NumberofURBEntries = 2;
|
||||
#if GFX_VER < 11
|
||||
vfe.ResetGatewayTimer = true;
|
||||
#endif
|
||||
#if GFX_VER <= 8
|
||||
#if GFX_VER == 8
|
||||
vfe.BypassGatewayControl = true;
|
||||
#endif
|
||||
vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2;
|
||||
vfe.URBEntryAllocationSize = 2;
|
||||
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
||||
|
||||
if (cs_bin->prog_data->total_scratch) {
|
||||
if (GFX_VER >= 8) {
|
||||
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
|
||||
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
ffs(cs_bin->prog_data->total_scratch) - 11;
|
||||
} else if (GFX_VERx10 == 75) {
|
||||
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
|
||||
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
ffs(cs_bin->prog_data->total_scratch) - 12;
|
||||
} else {
|
||||
/* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
|
||||
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
cs_bin->prog_data->total_scratch / 1024 - 1;
|
||||
}
|
||||
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
|
||||
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
ffs(cs_bin->prog_data->total_scratch) - 11;
|
||||
vfe.ScratchSpaceBasePointer =
|
||||
get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
|
||||
}
|
||||
|
|
@ -2481,14 +2187,10 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
|||
.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||
|
||||
#if GFX_VERx10 != 75
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
#endif
|
||||
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
||||
#if GFX_VERx10 >= 75
|
||||
.CrossThreadConstantDataReadLength =
|
||||
cs_prog_data->push.cross_thread.regs,
|
||||
#endif
|
||||
#if GFX_VER >= 12
|
||||
/* TODO: Check if we are missing workarounds and enable mid-thread
|
||||
* preemption.
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@
|
|||
* - GPR 15 for conditional rendering
|
||||
*/
|
||||
#define MI_BUILDER_NUM_ALLOC_GPRS 14
|
||||
#define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
|
||||
#define MI_BUILDER_CAN_WRITE_BATCH true
|
||||
#define __gen_get_batch_dwords anv_batch_emit_dwords
|
||||
#define __gen_address_offset anv_address_add
|
||||
#define __gen_get_batch_address(b, a) anv_batch_address(b, a)
|
||||
|
|
@ -67,12 +67,10 @@ VkResult genX(CreateQueryPool)(
|
|||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
const struct anv_physical_device *pdevice = device->physical;
|
||||
#if GFX_VER >= 8
|
||||
const VkQueryPoolPerformanceCreateInfoKHR *perf_query_info = NULL;
|
||||
struct intel_perf_counter_pass *counter_pass;
|
||||
struct intel_perf_query_info **pass_query;
|
||||
uint32_t n_passes = 0;
|
||||
#endif
|
||||
uint32_t data_offset = 0;
|
||||
VK_MULTIALLOC(ma);
|
||||
VkResult result;
|
||||
|
|
@ -132,7 +130,6 @@ VkResult genX(CreateQueryPool)(
|
|||
uint64s_per_slot += 2 * DIV_ROUND_UP(layout->size, sizeof(uint64_t));
|
||||
break;
|
||||
}
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
const struct intel_perf_query_field_layout *layout =
|
||||
&pdevice->perf->query_layout;
|
||||
|
|
@ -158,7 +155,6 @@ VkResult genX(CreateQueryPool)(
|
|||
uint64s_per_slot *= n_passes;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
/* Query has two values: begin and end. */
|
||||
uint64s_per_slot = 1 + 2;
|
||||
|
|
@ -180,7 +176,6 @@ VkResult genX(CreateQueryPool)(
|
|||
pool->data_offset = data_offset;
|
||||
pool->snapshot_size = (pool->stride - data_offset) / 2;
|
||||
}
|
||||
#if GFX_VER >= 8
|
||||
else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
pool->pass_size = pool->stride / n_passes;
|
||||
pool->data_offset = data_offset;
|
||||
|
|
@ -198,7 +193,6 @@ VkResult genX(CreateQueryPool)(
|
|||
perf_query_info->counterIndexCount,
|
||||
pool->pass_query);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint64_t size = pool->slots * (uint64_t)pool->stride;
|
||||
result = anv_device_alloc_bo(device, "query-pool", size,
|
||||
|
|
@ -209,7 +203,6 @@ VkResult genX(CreateQueryPool)(
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
for (uint32_t p = 0; p < pool->n_passes; p++) {
|
||||
struct mi_builder b;
|
||||
|
|
@ -225,7 +218,6 @@ VkResult genX(CreateQueryPool)(
|
|||
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*pQueryPool = anv_query_pool_to_handle(pool);
|
||||
|
||||
|
|
@ -252,7 +244,6 @@ void genX(DestroyQueryPool)(
|
|||
vk_object_free(&device->vk, pAllocator, pool);
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
/**
|
||||
* VK_KHR_performance_query layout :
|
||||
*
|
||||
|
|
@ -350,7 +341,6 @@ khr_perf_query_ensure_relocs(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* VK_INTEL_performance_query layout :
|
||||
|
|
@ -402,7 +392,6 @@ query_slot(struct anv_query_pool *pool, uint32_t query)
|
|||
static bool
|
||||
query_is_available(struct anv_query_pool *pool, uint32_t query)
|
||||
{
|
||||
#if GFX_VER >= 8
|
||||
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
for (uint32_t p = 0; p < pool->n_passes; p++) {
|
||||
volatile uint64_t *slot =
|
||||
|
|
@ -412,7 +401,6 @@ query_is_available(struct anv_query_pool *pool, uint32_t query)
|
|||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
return *(volatile uint64_t *)query_slot(pool, query);
|
||||
}
|
||||
|
|
@ -521,8 +509,8 @@ VkResult genX(GetQueryPoolResults)(
|
|||
if (write_results) {
|
||||
uint64_t result = slot[idx * 2 + 2] - slot[idx * 2 + 1];
|
||||
|
||||
/* WaDividePSInvocationCountBy4:HSW,BDW */
|
||||
if ((device->info->ver == 8 || device->info->verx10 == 75) &&
|
||||
/* WaDividePSInvocationCountBy4:BDW */
|
||||
if (device->info->ver == 8 &&
|
||||
(1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT)
|
||||
result >>= 2;
|
||||
|
||||
|
|
@ -553,7 +541,6 @@ VkResult genX(GetQueryPoolResults)(
|
|||
break;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
const struct anv_physical_device *pdevice = device->physical;
|
||||
assert((flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
|
||||
|
|
@ -570,7 +557,6 @@ VkResult genX(GetQueryPoolResults)(
|
|||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
if (!write_results)
|
||||
|
|
@ -692,7 +678,6 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
break;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
for (uint32_t p = 0; p < pool->n_passes; p++) {
|
||||
|
|
@ -705,7 +690,6 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL:
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
|
|
@ -767,7 +751,6 @@ void genX(CmdResetQueryPool)(
|
|||
break;
|
||||
}
|
||||
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
|
||||
|
|
@ -782,7 +765,6 @@ void genX(CmdResetQueryPool)(
|
|||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
struct mi_builder b;
|
||||
|
|
@ -808,13 +790,11 @@ void genX(ResetQueryPool)(
|
|||
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
#if GFX_VER >= 8
|
||||
for (uint32_t p = 0; p < pool->n_passes; p++) {
|
||||
uint64_t *pass_slot = pool->bo->map +
|
||||
khr_perf_query_availability_offset(pool, firstQuery + i, p);
|
||||
*pass_slot = 0;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
uint64_t *slot = query_slot(pool, firstQuery + i);
|
||||
*slot = 0;
|
||||
|
|
@ -966,7 +946,6 @@ void genX(CmdBeginQueryIndexedEXT)(
|
|||
emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
|
||||
break;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
if (!khr_perf_query_ensure_relocs(cmd_buffer))
|
||||
return;
|
||||
|
|
@ -1077,7 +1056,6 @@ void genX(CmdBeginQueryIndexedEXT)(
|
|||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
|
|
@ -1163,7 +1141,6 @@ void genX(CmdEndQueryIndexedEXT)(
|
|||
emit_query_mi_availability(&b, query_addr, true);
|
||||
break;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
|
|
@ -1241,7 +1218,6 @@ void genX(CmdEndQueryIndexedEXT)(
|
|||
assert(cmd_buffer->perf_reloc_idx == pdevice->n_perf_query_commands);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
|
|
@ -1330,8 +1306,6 @@ void genX(CmdWriteTimestamp2)(
|
|||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 75
|
||||
|
||||
#define MI_PREDICATE_SRC0 0x2400
|
||||
#define MI_PREDICATE_SRC1 0x2408
|
||||
#define MI_PREDICATE_RESULT 0x2418
|
||||
|
|
@ -1470,9 +1444,8 @@ void genX(CmdCopyQueryPoolResults)(
|
|||
result = compute_query_result(&b, anv_address_add(query_addr,
|
||||
idx * 16 + 8));
|
||||
|
||||
/* WaDividePSInvocationCountBy4:HSW,BDW */
|
||||
if ((cmd_buffer->device->info->ver == 8 ||
|
||||
cmd_buffer->device->info->verx10 == 75) &&
|
||||
/* WaDividePSInvocationCountBy4:BDW */
|
||||
if (cmd_buffer->device->info->ver == 8 &&
|
||||
(1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT) {
|
||||
result = mi_ushr32_imm(&b, result, 2);
|
||||
}
|
||||
|
|
@ -1495,11 +1468,9 @@ void genX(CmdCopyQueryPoolResults)(
|
|||
gpu_write_query_result(&b, dest_addr, flags, idx++, result);
|
||||
break;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
||||
unreachable("Copy KHR performance query results not implemented");
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
unreachable("unhandled query type");
|
||||
|
|
@ -1513,18 +1484,3 @@ void genX(CmdCopyQueryPoolResults)(
|
|||
dest_addr = anv_address_add(dest_addr, destStride);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void genX(CmdCopyQueryPoolResults)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkQueryPool queryPool,
|
||||
uint32_t firstQuery,
|
||||
uint32_t queryCount,
|
||||
VkBuffer destBuffer,
|
||||
VkDeviceSize destOffset,
|
||||
VkDeviceSize destStride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
anv_finishme("Queries not yet supported on Ivy Bridge");
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -583,8 +583,6 @@ genX(emit_l3_config)(struct anv_batch *batch,
|
|||
{
|
||||
UNUSED const struct intel_device_info *devinfo = device->info;
|
||||
|
||||
#if GFX_VER >= 8
|
||||
|
||||
#if GFX_VER >= 12
|
||||
#define L3_ALLOCATION_REG GENX(L3ALLOC)
|
||||
#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num)
|
||||
|
|
@ -621,81 +619,6 @@ genX(emit_l3_config)(struct anv_batch *batch,
|
|||
l3cr.AllAllocation = cfg->n[INTEL_L3P_ALL];
|
||||
}
|
||||
}
|
||||
|
||||
#else /* GFX_VER < 8 */
|
||||
|
||||
const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
|
||||
const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
|
||||
cfg->n[INTEL_L3P_ALL];
|
||||
const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
|
||||
cfg->n[INTEL_L3P_ALL];
|
||||
const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
|
||||
cfg->n[INTEL_L3P_ALL];
|
||||
|
||||
assert(!cfg->n[INTEL_L3P_ALL]);
|
||||
|
||||
/* When enabled SLM only uses a portion of the L3 on half of the banks,
|
||||
* the matching space on the remaining banks has to be allocated to a
|
||||
* client (URB for all validated configurations) set to the
|
||||
* lower-bandwidth 2-bank address hashing mode.
|
||||
*/
|
||||
const bool urb_low_bw = cfg->n[INTEL_L3P_SLM] && devinfo->platform != INTEL_PLATFORM_BYT;
|
||||
assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
|
||||
|
||||
/* Minimum number of ways that can be allocated to the URB. */
|
||||
const unsigned n0_urb = devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0;
|
||||
assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
|
||||
|
||||
anv_batch_write_reg(batch, GENX(L3SQCREG1), l3sqc) {
|
||||
l3sqc.ConvertDC_UC = !has_dc;
|
||||
l3sqc.ConvertIS_UC = !has_is;
|
||||
l3sqc.ConvertC_UC = !has_c;
|
||||
l3sqc.ConvertT_UC = !has_t;
|
||||
#if GFX_VERx10 == 75
|
||||
l3sqc.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT;
|
||||
#else
|
||||
l3sqc.L3SQGeneralPriorityCreditInitialization =
|
||||
devinfo->platform == INTEL_PLATFORM_BYT ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT;
|
||||
#endif
|
||||
l3sqc.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT;
|
||||
}
|
||||
|
||||
anv_batch_write_reg(batch, GENX(L3CNTLREG2), l3cr2) {
|
||||
l3cr2.SLMEnable = cfg->n[INTEL_L3P_SLM];
|
||||
l3cr2.URBLowBandwidth = urb_low_bw;
|
||||
l3cr2.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb;
|
||||
#if !GFX_VERx10 == 75
|
||||
l3cr2.ALLAllocation = cfg->n[INTEL_L3P_ALL];
|
||||
#endif
|
||||
l3cr2.ROAllocation = cfg->n[INTEL_L3P_RO];
|
||||
l3cr2.DCAllocation = cfg->n[INTEL_L3P_DC];
|
||||
}
|
||||
|
||||
anv_batch_write_reg(batch, GENX(L3CNTLREG3), l3cr3) {
|
||||
l3cr3.ISAllocation = cfg->n[INTEL_L3P_IS];
|
||||
l3cr3.ISLowBandwidth = 0;
|
||||
l3cr3.CAllocation = cfg->n[INTEL_L3P_C];
|
||||
l3cr3.CLowBandwidth = 0;
|
||||
l3cr3.TAllocation = cfg->n[INTEL_L3P_T];
|
||||
l3cr3.TLowBandwidth = 0;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 75
|
||||
if (device->physical->cmd_parser_version >= 4) {
|
||||
/* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
|
||||
* them disabled to avoid crashing the system hard.
|
||||
*/
|
||||
anv_batch_write_reg(batch, GENX(SCRATCH1), s1) {
|
||||
s1.L3AtomicDisable = !has_dc;
|
||||
}
|
||||
anv_batch_write_reg(batch, GENX(CHICKEN3), c3) {
|
||||
c3.L3AtomicDisableMask = true;
|
||||
c3.L3AtomicDisable = !has_dc;
|
||||
}
|
||||
}
|
||||
#endif /* GFX_VERx10 == 75 */
|
||||
|
||||
#endif /* GFX_VER < 8 */
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -944,7 +867,7 @@ VkResult genX(CreateSampler)(
|
|||
|
||||
sampler->n_planes = 1;
|
||||
|
||||
uint32_t border_color_stride = GFX_VERx10 == 75 ? 512 : 64;
|
||||
uint32_t border_color_stride = 64;
|
||||
uint32_t border_color_offset;
|
||||
ASSERTED bool has_custom_color = false;
|
||||
if (pCreateInfo->borderColor <= VK_BORDER_COLOR_INT_OPAQUE_WHITE) {
|
||||
|
|
@ -952,7 +875,6 @@ VkResult genX(CreateSampler)(
|
|||
pCreateInfo->borderColor *
|
||||
border_color_stride;
|
||||
} else {
|
||||
assert(GFX_VER >= 8);
|
||||
sampler->custom_border_color =
|
||||
anv_state_reserved_pool_alloc(&device->custom_border_colors);
|
||||
border_color_offset = sampler->custom_border_color.offset;
|
||||
|
|
@ -1077,11 +999,7 @@ VkResult genX(CreateSampler)(
|
|||
.CPSLODCompensationEnable = true,
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 8
|
||||
.LODPreClampMode = CLAMP_MODE_OGL,
|
||||
#else
|
||||
.LODPreClampEnable = CLAMP_ENABLE_OGL,
|
||||
#endif
|
||||
|
||||
#if GFX_VER == 8
|
||||
.BaseMipLevel = 0.0,
|
||||
|
|
@ -1104,9 +1022,7 @@ VkResult genX(CreateSampler)(
|
|||
|
||||
.BorderColorPointer = border_color_offset,
|
||||
|
||||
#if GFX_VER >= 8
|
||||
.LODClampMagnificationMode = MIPNONE,
|
||||
#endif
|
||||
|
||||
.MaximumAnisotropy = vk_to_intel_max_anisotropy(pCreateInfo->maxAnisotropy),
|
||||
.RAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,
|
||||
|
|
|
|||
|
|
@ -74,20 +74,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
|
|||
lri.DataDWord = cache_mode;
|
||||
}
|
||||
|
||||
#elif GFX_VER == 8
|
||||
|
||||
uint32_t cache_mode;
|
||||
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
|
||||
.NPPMAFixEnable = enable,
|
||||
.NPEarlyZFailsDisable = enable,
|
||||
.NPPMAFixEnableMask = true,
|
||||
.NPEarlyZFailsDisableMask = true);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
|
||||
lri.RegisterOffset = GENX(CACHE_MODE_1_num);
|
||||
lri.DataDWord = cache_mode;
|
||||
}
|
||||
|
||||
#endif /* GFX_VER == 8 */
|
||||
#endif /* GFX_VER == 9 */
|
||||
|
||||
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
|
||||
* Flush bits is often necessary. We do it regardless because it's easier.
|
||||
|
|
@ -106,96 +93,6 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
|
|||
}
|
||||
}
|
||||
|
||||
UNUSED static bool
|
||||
want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct vk_depth_stencil_state *ds)
|
||||
{
|
||||
assert(GFX_VER == 8);
|
||||
|
||||
/* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
|
||||
*
|
||||
* SW must set this bit in order to enable this fix when following
|
||||
* expression is TRUE.
|
||||
*
|
||||
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
|
||||
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
|
||||
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
|
||||
* (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
|
||||
* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
|
||||
* (3DSTATE_PS_EXTRA::PixelShaderValid) &&
|
||||
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
|
||||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
|
||||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
|
||||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
|
||||
* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
||||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
||||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
||||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
||||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
|
||||
* 3DSTATE_WM::ForceKillPix != ForceOff &&
|
||||
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
|
||||
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
|
||||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
|
||||
*/
|
||||
|
||||
/* These are always true:
|
||||
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
|
||||
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
|
||||
*/
|
||||
|
||||
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
|
||||
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
|
||||
* and there is no harm.
|
||||
*
|
||||
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
|
||||
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
|
||||
*/
|
||||
if (!cmd_buffer->state.hiz_enabled)
|
||||
return false;
|
||||
|
||||
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
|
||||
return false;
|
||||
|
||||
/* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
if (wm_prog_data->early_fragment_tests)
|
||||
return false;
|
||||
|
||||
/* We never use anv_pipeline for HiZ ops so this is trivially true:
|
||||
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
|
||||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
|
||||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
|
||||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
|
||||
*/
|
||||
|
||||
/* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
|
||||
if (!ds->depth.test_enable)
|
||||
return false;
|
||||
|
||||
/* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
||||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
||||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
||||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
||||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
|
||||
* 3DSTATE_WM::ForceKillPix != ForceOff &&
|
||||
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
|
||||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
|
||||
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
|
||||
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
|
||||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
|
||||
*/
|
||||
return (pipeline->kill_pixel && (ds->depth.write_enable ||
|
||||
ds->stencil.write_enable)) ||
|
||||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
|
||||
}
|
||||
|
||||
UNUSED static bool
|
||||
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct vk_depth_stencil_state *ds)
|
||||
|
|
@ -331,15 +228,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
};
|
||||
#if GFX_VER == 8
|
||||
if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) {
|
||||
sf.CHVLineWidth = dyn->rs.line.width;
|
||||
} else {
|
||||
sf.LineWidth = dyn->rs.line.width;
|
||||
}
|
||||
#else
|
||||
sf.LineWidth = dyn->rs.line.width,
|
||||
#endif
|
||||
|
||||
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
|
||||
}
|
||||
|
|
@ -394,75 +284,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
* across different state packets for gfx8 and gfx9. We handle that by
|
||||
* using a big old #if switch here.
|
||||
*/
|
||||
#if GFX_VER == 8
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GENX(COLOR_CALC_STATE_length) * 4,
|
||||
64);
|
||||
struct GENX(COLOR_CALC_STATE) cc = {
|
||||
.BlendConstantColorRed = dyn->cb.blend_constants[0],
|
||||
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
|
||||
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
|
||||
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
|
||||
.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
|
||||
.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
|
||||
};
|
||||
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
|
||||
ccp.ColorCalcStatePointer = cc_state.offset;
|
||||
ccp.ColorCalcStatePointerValid = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
|
||||
VkImageAspectFlags ds_aspects = 0;
|
||||
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
|
||||
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
struct vk_depth_stencil_state opt_ds = dyn->ds;
|
||||
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
|
||||
ds.DoubleSidedStencilEnable = true;
|
||||
|
||||
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
|
||||
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
|
||||
|
||||
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
|
||||
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
|
||||
|
||||
ds.DepthTestEnable = opt_ds.depth.test_enable;
|
||||
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
|
||||
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
|
||||
ds.StencilTestEnable = opt_ds.stencil.test_enable;
|
||||
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
|
||||
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
|
||||
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
|
||||
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
|
||||
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
|
||||
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
|
||||
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
|
||||
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
|
||||
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
|
||||
}
|
||||
|
||||
const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
|
||||
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
|
||||
}
|
||||
#else
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
|
|
@ -531,7 +352,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
|||
const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
|
||||
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 12
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue