anv: remove unused gfx7 code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18208>
This commit is contained in:
Lionel Landwerlin 2022-08-03 12:38:39 +03:00 committed by Marge Bot
parent 1a77f83c2b
commit a659819f79
15 changed files with 41 additions and 1325 deletions

View file

@ -109,19 +109,6 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC))
data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
/* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader
* Do not handle VK_DESCRIPTOR_TYPE_STORAGE_IMAGE and
* VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT because they already must
* have identity swizzle.
*
* TODO: We need to handle swizzle on buffer views too for those same
* platforms.
*/
if (device->info.verx10 == 70 &&
(type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE;
return data;
}
@ -175,9 +162,6 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
size += sizeof(struct anv_address_range_descriptor);
if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)
size += sizeof(struct anv_texture_swizzle_descriptor);
return size;
}
@ -1478,26 +1462,6 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
anv_descriptor_set_write_image_param(desc_map, image_param);
}
if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) {
assert(!(data & ANV_DESCRIPTOR_SAMPLED_IMAGE));
assert(image_view);
struct anv_texture_swizzle_descriptor desc_data[3];
memset(desc_data, 0, sizeof(desc_data));
for (unsigned p = 0; p < image_view->n_planes; p++) {
desc_data[p] = (struct anv_texture_swizzle_descriptor) {
.swizzle = {
(uint8_t)image_view->planes[p].isl.swizzle.r,
(uint8_t)image_view->planes[p].isl.swizzle.g,
(uint8_t)image_view->planes[p].isl.swizzle.b,
(uint8_t)image_view->planes[p].isl.swizzle.a,
},
};
}
memcpy(desc_map, desc_data,
MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
}
}
void

View file

@ -258,7 +258,7 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_buffer_device_address = device->has_a64_buffer_access,
.EXT_calibrated_timestamps = device->has_reg_timestamp,
.EXT_color_write_enable = true,
.EXT_conditional_rendering = device->info.verx10 >= 75,
.EXT_conditional_rendering = true,
.EXT_conservative_rasterization = device->info.ver >= 9,
.EXT_custom_border_color = device->info.ver >= 8,
.EXT_depth_clip_control = true,
@ -805,15 +805,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
device->info = devinfo;
device->cmd_parser_version = -1;
if (device->info.ver == 7) {
device->cmd_parser_version =
anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
if (device->cmd_parser_version == -1) {
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
"failed to get command parser version");
goto fail_base;
}
}
if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
@ -1428,8 +1419,8 @@ void anv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
features->conditionalRendering = pdevice->info.verx10 >= 75;
features->inheritedConditionalRendering = pdevice->info.verx10 >= 75;
features->conditionalRendering = true;
features->inheritedConditionalRendering = true;
break;
}
@ -1751,8 +1742,7 @@ void anv_GetPhysicalDeviceProperties(
const uint32_t max_textures =
pdevice->has_bindless_images ? UINT16_MAX : 128;
const uint32_t max_samplers =
pdevice->has_bindless_samplers ? UINT16_MAX :
(devinfo->verx10 >= 75) ? 128 : 16;
pdevice->has_bindless_samplers ? UINT16_MAX : 128;
const uint32_t max_images =
pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
@ -2512,8 +2502,7 @@ void anv_GetPhysicalDeviceProperties2(
props->transformFeedbackQueries = true;
props->transformFeedbackStreamsLinesTriangles = false;
props->transformFeedbackRasterizationStreamSelect = false;
/* This requires MI_MATH */
props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
props->transformFeedbackDraw = true;
break;
}

View file

@ -495,14 +495,6 @@ anv_get_format_plane(const struct intel_device_info *devinfo,
const struct isl_format_layout *isl_layout =
isl_format_get_layout(plane_format.isl_format);
/* On Ivy Bridge we don't even have enough 24 and 48-bit formats that we
* can reliably do texture upload with BLORP so just don't claim support
* for any of them.
*/
if (devinfo->verx10 == 70 &&
(isl_layout->bpb == 24 || isl_layout->bpb == 48))
return unsupported;
if (tiling == VK_IMAGE_TILING_OPTIMAL &&
!util_is_power_of_two_or_zero(isl_layout->bpb)) {
/* Tiled formats *must* be power-of-two because we need up upload

View file

@ -161,10 +161,6 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
uint32_t *api_mode,
bool *msaa_rasterization_enable);
uint32_t
genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
VkPolygonMode raster_mode);
VkPolygonMode
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
VkPrimitiveTopology primitive_topology);

View file

@ -358,14 +358,6 @@ anv_image_plane_needs_shadow_surface(const struct intel_device_info *devinfo,
return true;
}
if (devinfo->ver <= 7 &&
plane_format.aspect == VK_IMAGE_ASPECT_STENCIL_BIT &&
(vk_plane_usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) {
/* gfx7 can't sample from W-tiled surfaces. */
return true;
}
return false;
}
@ -731,11 +723,6 @@ add_aux_surface_if_supported(struct anv_device *device,
return VK_SUCCESS;
}
if (device->info->ver == 7) {
anv_perf_warn(VK_LOG_OBJS(&image->vk.base), "Implement gfx7 HiZ");
return VK_SUCCESS;
}
if (image->vk.mip_levels > 1) {
anv_perf_warn(VK_LOG_OBJS(&image->vk.base), "Enable multi-LOD HiZ");
return VK_SUCCESS;
@ -2377,12 +2364,6 @@ anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
return ANV_FAST_CLEAR_NONE;
/* We don't support MSAA fast-clears on Ivybridge or Bay Trail because they
* lack the MI ALU which we need to determine the predicates.
*/
if (devinfo->verx10 == 70 && image->vk.samples > 1)
return ANV_FAST_CLEAR_NONE;
enum isl_aux_state aux_state =
anv_layout_to_aux_state(devinfo, image, aspect, layout);
@ -2495,23 +2476,9 @@ anv_image_fill_surface_state(struct anv_device *device,
surface = &image->planes[plane].shadow_surface;
}
/* For texturing from stencil on gfx7, we have to sample from a shadow
* surface because we don't support W-tiling in the sampler.
*/
if (anv_surface_is_valid(&image->planes[plane].shadow_surface) &&
aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
assert(device->info->ver == 7);
assert(view_usage & ISL_SURF_USAGE_TEXTURE_BIT);
surface = &image->planes[plane].shadow_surface;
}
if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
view.swizzle = anv_swizzle_for_render(view.swizzle);
/* On Ivy Bridge and Bay Trail we do the swizzle in the shader */
if (device->info->verx10 == 70)
view.swizzle = ISL_SWIZZLE_IDENTITY;
/* If this is a HiZ buffer we can sample from with a programmable clear
* value (SKL+), define the clear value to the optimal constant.
*/

View file

@ -1234,85 +1234,12 @@ build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
}
}
static void
lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
struct apply_pipeline_layout_state *state)
{
assert(state->pdevice->info.verx10 == 70);
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
nir_tex_instr_is_query(tex) ||
tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
(tex->is_shadow && tex->is_new_style_shadow))
return;
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
assert(deref_src_idx >= 0);
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
const struct anv_descriptor_set_binding_layout *bind_layout =
&state->layout->set[set].layout->binding[binding];
if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
return;
b->cursor = nir_before_instr(&tex->instr);
const unsigned plane_offset =
plane * sizeof(struct anv_texture_swizzle_descriptor);
nir_ssa_def *swiz =
build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1, 32, state);
b->cursor = nir_after_instr(&tex->instr);
assert(tex->dest.ssa.bit_size == 32);
assert(tex->dest.ssa.num_components == 4);
/* Initializing to undef is ok; nir_opt_undef will clean it up. */
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
nir_ssa_def *comps[8];
for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
comps[i] = undef;
comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
else
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
nir_ssa_def *swiz_comps[4];
for (unsigned i = 0; i < 4; i++) {
nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
}
nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
/* Rewrite uses before we insert so we don't rewrite this use */
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
swiz_tex_res,
swiz_tex_res->parent_instr);
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex,
struct apply_pipeline_layout_state *state)
{
unsigned plane = tex_instr_get_and_remove_plane_src(tex);
/* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
* before we lower the derefs away so we can still find the descriptor.
*/
if (state->pdevice->info.verx10 == 70)
lower_gfx7_tex_swizzle(b, tex, plane, state);
b->cursor = nir_before_instr(&tex->instr);
lower_tex_deref(b, tex, nir_tex_src_texture_deref,

View file

@ -84,7 +84,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
const bool has_push_intrinsic = push_start <= push_end;
const bool push_ubo_ranges =
pdevice->info.verx10 >= 75 &&
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
!brw_shader_stage_requires_bindless_resources(nir->info.stage);

View file

@ -1751,17 +1751,6 @@ struct anv_sampled_image_descriptor {
uint32_t sampler;
};
struct anv_texture_swizzle_descriptor {
/** Texture swizzle
*
* See also nir_intrinsic_channel_select_intel
*/
uint8_t swizzle[4];
/** Unused padding to ensure the struct is a multiple of 64 bits */
uint32_t _pad;
};
/** Struct representing a storage image descriptor */
struct anv_storage_image_descriptor {
/** Bindless image handles
@ -1803,8 +1792,6 @@ enum anv_descriptor_data {
ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
/** Storage image handles */
ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
/** Storage image handles */
ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
};
struct anv_descriptor_set_binding_layout {
@ -3137,15 +3124,7 @@ struct anv_graphics_pipeline {
* with dynamic state.
*/
struct {
uint32_t sf[7];
uint32_t clip[4];
uint32_t xfb_bo_pitch[4];
uint32_t wm[3];
uint32_t blend_state[MAX_RTS * 2];
uint32_t streamout_state[3];
} gfx7;
struct {
uint32_t sf[4];
uint32_t raster[5];
uint32_t wm[2];

View file

@ -123,7 +123,7 @@ blorp_get_surface_address(struct blorp_batch *blorp_batch,
}
}
#if GFX_VER >= 7 && GFX_VER < 10
#if GFX_VER >= 8 && GFX_VER < 10
static struct blorp_address
blorp_get_surface_base_address(struct blorp_batch *batch)
{
@ -390,19 +390,6 @@ genX(blorp_exec)(struct blorp_batch *batch,
genX(cmd_buffer_config_l3)(cmd_buffer, cfg);
}
#if GFX_VER == 7
/* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
* indirect fast-clear colors can cause GPU hangs if we don't stall first.
* See genX(cmd_buffer_mi_memcpy) for more details.
*/
if (params->src.clear_color_addr.buffer ||
params->dst.clear_color_addr.buffer) {
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_CS_STALL_BIT,
"before blorp prep fast clear");
}
#endif
if (batch->flags & BLORP_BATCH_USE_COMPUTE)
blorp_exec_on_compute(batch, params);
else

View file

@ -467,10 +467,6 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
float depth_clear_value,
VkRect2D render_area)
{
/* We don't do any HiZ or depth fast-clears on gfx7 yet */
if (GFX_VER == 7)
return false;
/* If we're just clearing stencil, we can always HiZ clear */
if (!(clear_aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
return true;
@ -500,13 +496,6 @@ anv_can_hiz_clear_ds_view(struct anv_device *device,
if (depth_clear_value != ANV_HZ_FC_VAL)
return false;
/* Only gfx9+ supports returning ANV_HZ_FC_VAL when sampling a fast-cleared
* portion of a HiZ buffer. Testing has revealed that Gfx8 only supports
* returning 0.0f. Gens prior to gfx8 do not support this feature at all.
*/
if (GFX_VER == 8 && anv_can_sample_with_hiz(device->info, iview->image))
return false;
/* If we got here, then we can fast clear */
return true;
}
@ -684,16 +673,6 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
}
}
#if GFX_VER == 7
static inline bool
vk_image_layout_stencil_write_optimal(VkImageLayout layout)
{
return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL;
}
#endif
/* Transitions a HiZ-enabled depth buffer from one layout to another. Unless
* the initial layout is undefined, the HiZ buffer and depth buffer will
* represent the same data at the end of this operation.
@ -707,35 +686,7 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
VkImageLayout final_layout,
bool will_full_fast_clear)
{
#if GFX_VER == 7
const uint32_t plane =
anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
/* On gfx7, we have to store a texturable version of the stencil buffer in
* a shadow whenever VK_IMAGE_USAGE_SAMPLED_BIT is set and copy back and
* forth at strategic points. Stencil writes are only allowed in following
* layouts:
*
* - VK_IMAGE_LAYOUT_GENERAL
* - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
* - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL
*
* For general, we have no nice opportunity to transition so we do the copy
* to the shadow unconditionally at the end of the subpass. For transfer
* destinations, we can update it as part of the transfer op. For the other
* layouts, we delay the copy until a transition into some other layout.
*/
if (anv_surface_is_valid(&image->planes[plane].shadow_surface) &&
vk_image_layout_stencil_write_optimal(initial_layout) &&
!vk_image_layout_stencil_write_optimal(final_layout)) {
anv_image_copy_to_shadow(cmd_buffer, image,
VK_IMAGE_ASPECT_STENCIL_BIT,
base_level, level_count,
base_layer, layer_count);
}
#elif GFX_VER == 12
#if GFX_VER == 12
const uint32_t plane =
anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
@ -833,7 +784,6 @@ set_image_fast_clear_state(struct anv_cmd_buffer *cmd_buffer,
/* This is only really practical on haswell and above because it requires
* MI math in order to get it correct.
*/
#if GFX_VERx10 >= 75
static void
anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
@ -914,50 +864,6 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
}
#endif /* GFX_VERx10 >= 75 */
#if GFX_VER <= 8
static void
anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
VkImageAspectFlagBits aspect,
uint32_t level, uint32_t array_layer,
enum isl_aux_op resolve_op,
enum anv_fast_clear_type fast_clear_supported)
{
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
struct mi_value fast_clear_type_mem =
mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device,
image, aspect));
/* This only works for partial resolves and only when the clear color is
* all or nothing. On the upside, this emits less command streamer code
* and works on Ivybridge and Bay Trail.
*/
assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
assert(fast_clear_supported != ANV_FAST_CLEAR_ANY);
/* We don't support fast clears on anything other than the first slice. */
if (level > 0 || array_layer > 0)
return;
/* On gfx8, we don't have a concept of default clear colors because we
* can't sample from CCS surfaces. It's enough to just load the fast clear
* state into the predicate register.
*/
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), fast_clear_type_mem);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
mi_store(&b, fast_clear_type_mem, mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
}
#endif /* GFX_VER <= 8 */
static void
anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
@ -971,15 +877,9 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
{
const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
#if GFX_VER >= 9
anv_cmd_compute_resolve_predicate(cmd_buffer, image,
aspect, level, array_layer,
resolve_op, fast_clear_supported);
#else /* GFX_VER <= 8 */
anv_cmd_simple_resolve_predicate(cmd_buffer, image,
aspect, level, array_layer,
resolve_op, fast_clear_supported);
#endif
/* CCS_D only supports full resolves and BLORP will assert on us if we try
* to do a partial resolve on a CCS_D surface.
@ -1005,16 +905,12 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
assert(resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
#if GFX_VERx10 >= 75
anv_cmd_compute_resolve_predicate(cmd_buffer, image,
aspect, 0, array_layer,
resolve_op, fast_clear_supported);
anv_image_mcs_op(cmd_buffer, image, format, swizzle, aspect,
array_layer, 1, resolve_op, NULL, true);
#else
unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail");
#endif
}
void
@ -1074,21 +970,14 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
} else {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
sdi.Address = addr;
if (GFX_VERx10 >= 75) {
/* Pre-SKL, the dword containing the clear values also contains
* other fields, so we need to initialize those fields to match the
* values that would be in a color attachment.
*/
sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
ISL_CHANNEL_SELECT_GREEN << 22 |
ISL_CHANNEL_SELECT_BLUE << 19 |
ISL_CHANNEL_SELECT_ALPHA << 16;
} else if (GFX_VER == 7) {
/* On IVB, the dword containing the clear values also contains
* other fields that must be zero or can be zero.
*/
sdi.ImmediateData = 0;
}
/* Pre-SKL, the dword containing the clear values also contains
* other fields, so we need to initialize those fields to match the
* values that would be in a color attachment.
*/
sdi.ImmediateData = ISL_CHANNEL_SELECT_RED << 25 |
ISL_CHANNEL_SELECT_GREEN << 22 |
ISL_CHANNEL_SELECT_BLUE << 19 |
ISL_CHANNEL_SELECT_ALPHA << 16;
}
}
}
@ -1115,30 +1004,6 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
#if GFX_VER == 7
/* On gfx7, the combination of commands used here(MI_LOAD_REGISTER_MEM
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
* in-flight when they are issued even if the memory touched is not
* currently active for rendering. The weird bit is that it is not the
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
* rendering hangs such that the next stalling command after the
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
*
* It is unclear exactly why this hang occurs. Both MI commands come with
* warnings about the 3D pipeline but that doesn't seem to fully explain
* it. My (Jason's) best theory is that it has something to do with the
* fact that we're using a GPU state register as our temporary and that
* something with reading/writing it is causing problems.
*
* In order to work around this issue, we emit a PIPE_CONTROL with the
* command streamer stall bit set.
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_CS_STALL_BIT,
"after copy_fast_clear_dwords. Avoid potential hang");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
#endif
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
@ -1736,9 +1601,7 @@ genX(BeginCommandBuffer)(
if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT))
genX(emit_sample_pattern)(&cmd_buffer->batch, NULL);
#endif
#if GFX_VERx10 >= 75
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info =
vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT);
@ -1861,7 +1724,6 @@ genX(CmdExecuteCommands)(
assert(secondary->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
assert(!anv_batch_has_error(&secondary->batch));
#if GFX_VERx10 >= 75
if (secondary->state.conditional_render_enabled) {
if (!primary->state.conditional_render_enabled) {
/* Secondary buffer is constructed as if it will be executed
@ -1874,7 +1736,6 @@ genX(CmdExecuteCommands)(
mi_imm(UINT64_MAX));
}
}
#endif
if (secondary->usage_flags &
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
@ -2143,23 +2004,6 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
#endif
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
#if GFX_VER == 8
/* From Broadwell PRM, volume 2a:
* PIPE_CONTROL: Command Streamer Stall Enable:
*
* "This bit must be always set when PIPE_CONTROL command is
* programmed by GPGPU and MEDIA workloads, except for the cases
* when only Read Only Cache Invalidation bits are set (State
* Cache Invalidation Enable, Instruction cache Invalidation
* Enable, Texture Cache Invalidation Enable, Constant Cache
* Invalidation Enable). This is to WA FFDOP CG issue, this WA
* need not implemented when FF_DOP_CG is disabled."
*
* Since we do all the invalidation in the following PIPE_CONTROL,
* if we got here, we need a stall.
*/
pipe.CommandStreamerStallEnable |= current_pipeline == GPGPU;
#endif
pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
@ -2226,46 +2070,6 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES);
if (GFX_VERx10 == 75) {
/* Haswell needs addition work-arounds:
*
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
*
* Option 1:
* PIPE_CONTROL command with the CS Stall and the required write
* caches flushed with Post-SyncOperation as Write Immediate Data
* followed by eight dummy MI_STORE_DATA_IMM (write to scratch
* spce) commands.
*
* Example:
* - Workload-1
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
* Immediate Data, Required Write Cache Flush bits set)
* - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
* - Workload-2 (Can use the data produce or output by
* Workload-1)
*
* Unfortunately, both the PRMs and the internal docs are a bit
* out-of-date in this regard. What the windows driver does (and
* this appears to actually work) is to emit a register read from the
* memory address written by the pipe control above.
*
* What register we load into doesn't matter. We choose an indirect
* rendering register because we know it always exists and it's one
* of the first registers the command parser allows us to write. If
* you don't have command parser support in your kernel (pre-4.2),
* this will get turned into MI_NOOP and you won't get the
* workaround. Unfortunately, there's just not much we can do in
* that case. This register is perfectly safe to write since we
* always re-load all of the indirect draw registers right before
* 3DPRIMITIVE when needed anyway.
*/
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
lrm.RegisterAddress = 0x243C; /* GFX7_3DPRIM_START_INSTANCE */
lrm.MemoryAddress = device->workaround_address;
}
}
bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS |
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
}
@ -3180,7 +2984,6 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
#if GFX_VERx10 >= 75
/* The Skylake PRM contains the following restriction:
*
* "The driver must ensure The following case does not occur
@ -3200,33 +3003,10 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
/* At this point we only have non-empty ranges */
assert(range->length > 0);
/* For Ivy Bridge, make sure we only set the first range (actual
* push constants)
*/
assert((GFX_VERx10 >= 75) || i == 0);
c.ConstantBody.ReadLength[i + shift] = range->length;
c.ConstantBody.Buffer[i + shift] =
anv_address_add(buffers[i], range->start * 32);
}
#else
/* For Ivy Bridge, push constants are relative to dynamic state
* base address and we only ever push actual push constants.
*/
if (bind_map->push_ranges[0].length > 0) {
assert(buffer_count == 1);
assert(bind_map->push_ranges[0].set ==
ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
assert(buffers[0].bo ==
cmd_buffer->device->dynamic_state_pool.block_pool.bo);
c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
c.ConstantBody.Buffer[0].bo = NULL;
c.ConstantBody.Buffer[0].offset = buffers[0].offset;
}
assert(bind_map->push_ranges[1].length == 0);
assert(bind_map->push_ranges[2].length == 0);
assert(bind_map->push_ranges[3].length == 0);
#endif
}
}
}
@ -3471,10 +3251,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
if (!(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) &&
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) &&
#if GFX_VER <= 7
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) &&
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) &&
#endif
!BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT))
return;
@ -3488,10 +3264,6 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
struct GENX(3DSTATE_CLIP) clip = {
GENX(3DSTATE_CLIP_header),
#if GFX_VER <= 7
.FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
.CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
#endif
.ViewportXYClipTestEnable = xy_clip_test_enable,
};
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
@ -3515,7 +3287,7 @@ cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
anv_batch_emit_merge(&cmd_buffer->batch, dwords,
pipeline->gfx7.clip);
pipeline->gfx8.clip);
}
static void
@ -3551,12 +3323,10 @@ cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
.XMaxClipGuardband = 1.0f,
.YMinClipGuardband = -1.0f,
.YMaxClipGuardband = 1.0f,
#if GFX_VER >= 8
.XMinViewPort = vp->x,
.XMaxViewPort = vp->x + vp->width - 1,
.YMinViewPort = MIN2(vp->y, vp->y + vp->height),
.YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
#endif
};
const uint32_t fb_size_max = 1 << 14;
@ -3753,12 +3523,6 @@ cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->vk.dynamic_graphics_state;
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
#if GFX_VER == 7
# define streamout_state_dw pipeline->gfx7.streamout_state
#else
# define streamout_state_dw pipeline->gfx8.streamout_state
#endif
uint32_t dwords[GENX(3DSTATE_STREAMOUT_length)];
struct GENX(3DSTATE_STREAMOUT) so = {
@ -3766,7 +3530,7 @@ cmd_buffer_emit_streamout(struct anv_cmd_buffer *cmd_buffer)
.RenderingDisable = dyn->rs.rasterizer_discard_enable,
};
GENX(3DSTATE_STREAMOUT_pack)(NULL, dwords, &so);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, streamout_state_dw);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.streamout_state);
}
void
@ -3811,21 +3575,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
uint32_t stride = dyn->vi_binding_strides[vb];
UNUSED uint32_t size = cmd_buffer->state.vertex_bindings[vb].size;
#if GFX_VER <= 7
bool per_instance = pipeline->vb[vb].instanced;
uint32_t divisor = pipeline->vb[vb].instance_divisor *
pipeline->instance_multiplier;
#endif
state = (struct GENX(VERTEX_BUFFER_STATE)) {
.VertexBufferIndex = vb,
.MOCS = anv_mocs(cmd_buffer->device, buffer->address.bo,
ISL_SURF_USAGE_VERTEX_BUFFER_BIT),
#if GFX_VER <= 7
.BufferAccessType = per_instance ? INSTANCEDATA : VERTEXDATA,
.InstanceDataStepRate = per_instance ? divisor : 1,
#endif
.AddressModifyEnable = true,
.BufferPitch = stride,
.BufferStartingAddress = anv_address_add(buffer->address, offset),
@ -3834,16 +3588,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
.L3BypassDisable = true,
#endif
#if GFX_VER >= 8
.BufferSize = size,
#else
/* XXX: to handle dynamic offset for older gens we might want
* to modify Endaddress, but there are issues when doing so:
*
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7439
*/
.EndAddress = anv_address_add(buffer->address, buffer->vk.size - 1),
#endif
};
} else {
state = (struct GENX(VERTEX_BUFFER_STATE)) {
@ -3874,9 +3619,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
!cmd_buffer->state.push_constants_dirty)
return;
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) ||
(GFX_VER == 7 && (cmd_buffer->state.gfx.dirty &
ANV_CMD_DIRTY_PIPELINE))) {
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) {
/* Wa_16011411144:
*
* SW must insert a PIPE_CONTROL cmd before and after the
@ -3907,20 +3650,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
sob.MOCS = anv_mocs(cmd_buffer->device, xfb->buffer->address.bo, 0);
sob.SurfaceBaseAddress = anv_address_add(xfb->buffer->address,
xfb->offset);
#if GFX_VER >= 8
sob.SOBufferEnable = true;
sob.StreamOffsetWriteEnable = false;
/* Size is in DWords - 1 */
sob.SurfaceSize = DIV_ROUND_UP(xfb->size, 4) - 1;
#else
/* We don't have SOBufferEnable in 3DSTATE_SO_BUFFER on Gfx7 so
* we trust in SurfaceEndAddress = SurfaceBaseAddress = 0 (the
* default for an empty SO_BUFFER packet) to disable them.
*/
sob.SurfacePitch = pipeline->gfx7.xfb_bo_pitch[idx];
sob.SurfaceEndAddress = anv_address_add(xfb->buffer->address,
xfb->offset + xfb->size);
#endif
} else {
sob.MOCS = anv_mocs(cmd_buffer->device, NULL, 0);
}
@ -3950,28 +3683,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer_alloc_push_constants(cmd_buffer);
}
#if GFX_VER <= 7
if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT ||
cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) {
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
*
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
* stall needs to be sent just prior to any 3DSTATE_VS,
* 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
* 3DSTATE_BINDING_TABLE_POINTER_VS,
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one
* PIPE_CONTROL needs to be sent before any combination of VS
* associated 3DSTATE."
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.DepthStallEnable = true;
pc.PostSyncOperation = WriteImmediateData;
pc.Address = cmd_buffer->device->workaround_address;
anv_debug_dump_pc(pc);
}
}
#endif
/* Render targets live in the same binding table as fragment descriptors */
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS)
descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
@ -4442,7 +4153,6 @@ void genX(CmdDrawIndirectByteCountEXT)(
uint32_t counterOffset,
uint32_t vertexStride)
{
#if GFX_VERx10 >= 75
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer);
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
@ -4503,7 +4213,6 @@ void genX(CmdDrawIndirectByteCountEXT)(
trace_intel_end_draw_indirect_byte_count(&cmd_buffer->trace,
instanceCount * pipeline->instance_multiplier);
#endif /* GFX_VERx10 >= 75 */
}
static void
@ -4521,13 +4230,8 @@ load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
struct mi_value instance_count = mi_mem32(anv_address_add(addr, 4));
if (pipeline->instance_multiplier > 1) {
#if GFX_VERx10 >= 75
instance_count = mi_imul_imm(&b, instance_count,
pipeline->instance_multiplier);
#else
anv_finishme("Multiview + indirect draw requires MI_MATH; "
"MI_MATH is not supported on Ivy Bridge");
#endif
}
mi_store(&b, mi_reg32(GFX7_3DPRIM_INSTANCE_COUNT), instance_count);
@ -4673,10 +4377,8 @@ prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
struct mi_value ret = mi_imm(0);
if (cmd_buffer->state.conditional_render_enabled) {
#if GFX_VERx10 >= 75
ret = mi_new_gpr(b);
mi_store(b, mi_value_ref(b, ret), mi_mem32(count_address));
#endif
} else {
/* Upload the current draw count from the draw parameters buffer to
* MI_PREDICATE_SRC0.
@ -4718,7 +4420,6 @@ emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
}
}
#if GFX_VERx10 >= 75
static void
emit_draw_count_predicate_with_conditional_render(
struct anv_cmd_buffer *cmd_buffer,
@ -4729,24 +4430,8 @@ emit_draw_count_predicate_with_conditional_render(
struct mi_value pred = mi_ult(b, mi_imm(draw_index), max);
pred = mi_iand(b, pred, mi_reg64(ANV_PREDICATE_RESULT_REG));
#if GFX_VER >= 8
mi_store(b, mi_reg32(MI_PREDICATE_RESULT), pred);
#else
/* MI_PREDICATE_RESULT is not whitelisted in i915 command parser
* so we emit MI_PREDICATE to set it.
*/
mi_store(b, mi_reg64(MI_PREDICATE_SRC0), pred);
mi_store(b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
#endif
}
#endif
static void
emit_draw_count_predicate_cond(struct anv_cmd_buffer *cmd_buffer,
@ -4754,16 +4439,12 @@ emit_draw_count_predicate_cond(struct anv_cmd_buffer *cmd_buffer,
uint32_t draw_index,
struct mi_value max)
{
#if GFX_VERx10 >= 75
if (cmd_buffer->state.conditional_render_enabled) {
emit_draw_count_predicate_with_conditional_render(
cmd_buffer, b, draw_index, mi_value_ref(b, max));
} else {
emit_draw_count_predicate(cmd_buffer, b, draw_index);
}
#else
emit_draw_count_predicate(cmd_buffer, b, draw_index);
#endif
}
void genX(CmdDrawIndirectCount)(
@ -5257,24 +4938,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
#if GFX_VER == 7
static VkResult
verify_cmd_parser(const struct anv_device *device,
int required_version,
const char *function)
{
if (device->physical->cmd_parser_version < required_version) {
return vk_errorf(device->physical, VK_ERROR_FEATURE_NOT_PRESENT,
"cmd parser version %d is required for %s",
required_version, function);
} else {
return VK_SUCCESS;
}
}
#endif
static void
anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer,
uint32_t baseGroupX,
@ -5363,8 +5026,7 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
uint32_t groupCountX, uint32_t groupCountY,
uint32_t groupCountZ)
{
bool predicate = (GFX_VER <= 7 && indirect) ||
cmd_buffer->state.conditional_render_enabled;
bool predicate = cmd_buffer->state.conditional_render_enabled;
const struct intel_device_info *devinfo = pipeline->base.device->info;
const struct brw_cs_dispatch_info dispatch =
@ -5479,15 +5141,6 @@ void genX(CmdDispatchIndirect)(
anv_cmd_buffer_push_base_group_id(cmd_buffer, 0, 0, 0);
#if GFX_VER == 7
/* Linux 4.4 added command parser version 5 which allows the GPGPU
* indirect dispatch registers to be written.
*/
if (verify_cmd_parser(cmd_buffer->device, 5,
"vkCmdDispatchIndirect") != VK_SUCCESS)
return;
#endif
anv_measure_snapshot(cmd_buffer,
INTEL_SNAPSHOT_COMPUTE,
"compute indirect",
@ -5514,56 +5167,8 @@ void genX(CmdDispatchIndirect)(
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMY), size_y);
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), size_z);
#if GFX_VER <= 7
/* predicate = (compute_dispatch_indirect_x_size == 0); */
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), size_x);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
/* predicate |= (compute_dispatch_indirect_y_size == 0); */
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0), size_y);
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_OR;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
/* predicate |= (compute_dispatch_indirect_z_size == 0); */
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0), size_z);
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_OR;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
/* predicate = !predicate; */
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
mip.CombineOperation = COMBINE_OR;
mip.CompareOperation = COMPARE_FALSE;
}
#if GFX_VERx10 == 75
if (cmd_buffer->state.conditional_render_enabled) {
/* predicate &= !(conditional_rendering_predicate == 0); */
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0),
mi_reg32(ANV_PREDICATE_RESULT_REG));
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
mip.CombineOperation = COMBINE_AND;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
}
#endif
#else /* GFX_VER > 7 */
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
#endif
emit_cs_walker(cmd_buffer, pipeline, true, prog_data, 0, 0, 0);
@ -6900,16 +6505,6 @@ void genX(CmdBeginRendering)(
gfx->dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
/* Our implementation of VK_KHR_multiview uses instancing to draw the
* different views. If the client asks for instancing, we need to use the
* Instance Data Step Rate to ensure that we repeat the client's
* per-instance data once for each view. Since this bit is in
* VERTEX_BUFFER_STATE on gfx7, we need to dirty vertex buffers at the top
* of each subpass.
*/
if (GFX_VER == 7)
gfx->vb_dirty |= ~0;
/* It is possible to start a render pass with an old pipeline. Because the
* render pass and subpass index are both baked into the pipeline, this is
* highly unlikely. In order to do so, it requires that you have a render
@ -7156,49 +6751,12 @@ void genX(CmdEndRendering)(
VK_IMAGE_ASPECT_STENCIL_BIT);
}
#if GFX_VER == 7
/* On gfx7, we have to store a texturable version of the stencil buffer in
* a shadow whenever VK_IMAGE_USAGE_SAMPLED_BIT is set and copy back and
* forth at strategic points. Stencil writes are only allowed in following
* layouts:
*
* - VK_IMAGE_LAYOUT_GENERAL
* - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
* - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT
*
* For general, we have no nice opportunity to transition so we do the copy
* to the shadow unconditionally at the end of the subpass. For transfer
* destinations, we can update it as part of the transfer op. For the other
* layouts, we delay the copy until a transition into some other layout.
*/
if (gfx->stencil_att.iview != NULL) {
const struct anv_image_view *iview = gfx->stencil_att.iview;
const struct anv_image *image = iview->image;
const uint32_t plane =
anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
if (anv_surface_is_valid(&image->planes[plane].shadow_surface) &&
(gfx->stencil_att.layout == VK_IMAGE_LAYOUT_GENERAL ||
gfx->stencil_att.layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT)) {
anv_image_copy_to_shadow(cmd_buffer, image,
VK_IMAGE_ASPECT_STENCIL_BIT,
iview->planes[plane].isl.base_level, 1,
iview->planes[plane].isl.base_array_layer,
layers);
}
}
#endif
anv_cmd_buffer_reset_rendering(cmd_buffer);
}
void
genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VERx10 >= 75
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
@ -7211,10 +6769,8 @@ genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_SRCS_EQUAL;
}
#endif
}
#if GFX_VERx10 >= 75
void genX(CmdBeginConditionalRenderingEXT)(
VkCommandBuffer commandBuffer,
const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin)
@ -7265,7 +6821,6 @@ void genX(CmdEndConditionalRenderingEXT)(
cmd_state->conditional_render_enabled = false;
}
#endif
/* Set of stage bits for which are pipelined, i.e. they get queued
* by the command streamer for later execution.
@ -7349,7 +6904,6 @@ void genX(CmdWaitEvents2)(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
#if GFX_VER >= 8
for (uint32_t i = 0; i < eventCount; i++) {
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
@ -7363,9 +6917,6 @@ void genX(CmdWaitEvents2)(
};
}
}
#else
anv_finishme("Implement events on gfx7");
#endif
cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event");
}

View file

@ -55,13 +55,11 @@ static void
emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
const struct intel_l3_config *l3_config)
{
#if GFX_VER >= 8
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = false;
vfi.VertexElementIndex = 0;
}
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
#endif
/* Disable all shader stages */
anv_batch_emit(batch, GENX(3DSTATE_VS), vs);
@ -75,10 +73,8 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
sbe.VertexURBEntryReadOffset = 1;
sbe.NumberofSFOutputAttributes = 1;
sbe.VertexURBEntryReadLength = 1;
#if GFX_VER >= 8
sbe.ForceVertexURBEntryReadLength = true;
sbe.ForceVertexURBEntryReadOffset = true;
#endif
#if GFX_VER >= 9
for (unsigned i = 0; i < 32; i++)
@ -100,11 +96,9 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
anv_batch_emit(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
#endif
#if GFX_VER >= 8
anv_batch_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
topo.PrimitiveTopologyType = _3DPRIM_POINTLIST;
}
#endif
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vf) {
vf.StatisticsEnable = false;
@ -141,11 +135,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
#if GFX_VER >= 12
.L3BypassDisable = true,
#endif
#if (GFX_VER >= 8)
.BufferSize = size,
#else
.EndAddress = anv_address_add(src, size - 1),
#endif
});
dw = anv_batch_emitn(batch, 3, GENX(3DSTATE_VERTEX_ELEMENTS));
@ -172,15 +162,9 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
sob.MOCS = anv_mocs(device, dst.bo, 0),
sob.SurfaceBaseAddress = dst;
#if GFX_VER >= 8
sob.SOBufferEnable = true;
sob.SurfaceSize = size / 4 - 1;
#else
sob.SurfacePitch = bs;
sob.SurfaceEndAddress = anv_address_add(dst, size);
#endif
#if GFX_VER >= 8
/* As SOL writes out data, it updates the SO_WRITE_OFFSET registers with
* the end position of the stream. We need to reset this value to 0 at
* the beginning of the run or else SOL will start at the offset from
@ -188,17 +172,8 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
*/
sob.StreamOffsetWriteEnable = true;
sob.StreamOffset = 0;
#endif
}
#if GFX_VER <= 7
/* The hardware can do this for us on BDW+ (see above) */
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), load) {
load.RegisterOffset = GENX(SO_WRITE_OFFSET0_num);
load.DataDWord = 0;
}
#endif
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
.StreamtoBufferSelects0 = (1 << 0),
.NumEntries0 = 1);
@ -216,11 +191,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
so.RenderingDisable = true;
so.Stream0VertexReadOffset = 0;
so.Stream0VertexReadLength = DIV_ROUND_UP(32, 64);
#if GFX_VER >= 8
so.Buffer0SurfacePitch = bs;
#else
so.SOBufferEnable0 = true;
#endif
}
anv_batch_emit(batch, GENX(3DPRIMITIVE), prim) {

View file

@ -177,7 +177,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
#if GFX_VER >= 8
/* On Broadwell and later, we have a separate VF_INSTANCING packet
* that controls instancing. On Haswell and prior, that's part of
* VERTEX_BUFFER_STATE which we emit later.
@ -191,7 +190,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
vfi.VertexElementIndex = slot;
vfi.InstanceDataStepRate = per_instance ? divisor : 1;
}
#endif
}
const uint32_t id_slot = elem_count;
@ -215,24 +213,16 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
.SourceElementFormat = ISL_FORMAT_R32G32_UINT,
.Component0Control = base_ctrl,
.Component1Control = base_ctrl,
#if GFX_VER >= 8
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
#else
.Component2Control = VFCOMP_STORE_VID,
.Component3Control = VFCOMP_STORE_IID,
#endif
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
#if GFX_VER >= 8
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.VertexElementIndex = id_slot;
}
#endif
}
#if GFX_VER >= 8
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
sgvs.VertexIDComponentNumber = 2;
@ -241,7 +231,6 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
sgvs.InstanceIDComponentNumber = 3;
sgvs.InstanceIDElementOffset = id_slot;
}
#endif
const uint32_t drawid_slot = elem_count + needs_svgs_elem;
if (vs_prog_data->uses_drawid) {
@ -258,11 +247,9 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline,
&p[1 + drawid_slot * 2],
&element);
#if GFX_VER >= 8
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.VertexElementIndex = drawid_slot;
}
#endif
}
}
@ -285,22 +272,6 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
entry_size, entries, start, deref_block_size,
&constrained);
#if GFX_VERx10 == 70
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
*
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) {
pc.DepthStallEnable = true;
pc.PostSyncOperation = WriteImmediateData;
pc.Address = device->workaround_address;
}
#endif
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
@ -396,9 +367,7 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
#if GFX_VER >= 8
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
#endif
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline))
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh);
@ -422,14 +391,10 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
#endif
#if GFX_VER >= 8
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
GENX(3DSTATE_SBE_SWIZ_header),
};
#else
# define swiz sbe
#endif
if (anv_pipeline_is_primitive(pipeline)) {
const struct brw_vue_map *fs_input_map =
@ -497,10 +462,8 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
#if GFX_VER >= 8
sbe.ForceVertexURBEntryReadOffset = true;
sbe.ForceVertexURBEntryReadLength = true;
#endif
} else {
assert(anv_pipeline_is_mesh(pipeline));
#if GFX_VERx10 >= 125
@ -554,12 +517,10 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
return;
GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
#if GFX_VER >= 8
dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
if (!dw)
return;
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
#endif
}
/** Returns the final polygon mode for rasterization
@ -639,32 +600,6 @@ genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
}
}
uint32_t
genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
VkPolygonMode raster_mode)
{
#if GFX_VER <= 7
if (raster_mode == VK_POLYGON_MODE_LINE) {
switch (pipeline->line_mode) {
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
return MSRASTMODE_ON_PATTERN;
case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
return MSRASTMODE_OFF_PIXEL;
default:
unreachable("Unsupported line rasterization mode");
}
} else {
return pipeline->rasterization_samples > 1 ?
MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
}
#else
unreachable("Only on gen7");
#endif
}
const uint32_t genX(vk_to_intel_cullmode)[] = {
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
@ -690,7 +625,6 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
uint32_t *api_mode,
bool *msaa_rasterization_enable)
{
#if GFX_VER >= 8
if (raster_mode == VK_POLYGON_MODE_LINE) {
/* Unfortunately, configuring our line rasterization hardware on gfx8
* and later is rather painful. Instead of giving us bits to tell the
@ -731,9 +665,6 @@ genX(rasterization_mode)(VkPolygonMode raster_mode,
*api_mode = DX100;
*msaa_rasterization_enable = true;
}
#else
unreachable("Invalid call");
#endif
}
static void
@ -770,10 +701,6 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
unreachable("Invalid provoking vertex mode");
}
#if GFX_VERx10 == 75
sf.LineStippleEnable = rs->line.stipple.enable;
#endif
#if GFX_VER >= 12
sf.DerefBlockSize = urb_deref_block_size;
#endif
@ -796,25 +723,19 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
sf.PointWidth = 1.0;
}
#if GFX_VER >= 8
struct GENX(3DSTATE_RASTER) raster = {
GENX(3DSTATE_RASTER_header),
};
#else
# define raster sf
#endif
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
* "Multisample Modes State".
*/
#if GFX_VER >= 8
/* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
* computations. If we ever set this bit to a different value, they will
* need to be updated accordingly.
*/
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
raster.ForceMultisampling = false;
#endif
raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
@ -824,7 +745,7 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
/* GFX9+ splits ViewportZClipTestEnable into near and far enable bits */
raster.ViewportZFarClipTestEnable = pipeline->depth_clip_enable;
raster.ViewportZNearClipTestEnable = pipeline->depth_clip_enable;
#elif GFX_VER >= 8
#elif GFX_VER == 8
raster.ViewportZClipTestEnable = pipeline->depth_clip_enable;
#endif
@ -833,42 +754,18 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
rs->conservative_mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
#endif
#if GFX_VER == 7
/* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
* can get the depth offsets correct.
*/
if (rp != NULL &&
rp->depth_attachment_format != VK_FORMAT_UNDEFINED) {
assert(vk_format_has_depth(rp->depth_attachment_format));
enum isl_format isl_format =
anv_get_isl_format(pipeline->base.device->info,
rp->depth_attachment_format,
VK_IMAGE_ASPECT_DEPTH_BIT,
VK_IMAGE_TILING_OPTIMAL);
sf.DepthBufferSurfaceFormat =
isl_format_get_depth_format(isl_format, false);
}
#endif
#if GFX_VER >= 8
GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
#else
# undef raster
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf);
#endif
}
static void
emit_ms_state(struct anv_graphics_pipeline *pipeline,
const struct vk_multisample_state *ms)
{
#if GFX_VER >= 8
/* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
genX(emit_multisample)(&pipeline->base.batch,
pipeline->rasterization_samples,
NULL);
#endif
/* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits
@ -876,11 +773,7 @@ emit_ms_state(struct anv_graphics_pipeline *pipeline,
*
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
*/
#if GFX_VER >= 8
uint32_t sample_mask = 0xffff;
#else
uint32_t sample_mask = 0xff;
#endif
if (ms != NULL)
sample_mask &= ms->sample_mask;
@ -1005,10 +898,8 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
struct GENX(BLEND_STATE) blend_state = {
#if GFX_VER >= 8
.AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
.AlphaToOneEnable = ms && ms->alpha_to_one_enable,
#endif
};
uint32_t surface_count = 0;
@ -1018,15 +909,11 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
surface_count = map->surface_count;
}
const struct intel_device_info *devinfo = pipeline->base.device->info;
uint32_t *blend_state_start = devinfo->ver >= 8 ?
pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
uint32_t *blend_state_start = pipeline->gfx8.blend_state;
uint32_t *state_pos = blend_state_start;
state_pos += GENX(BLEND_STATE_length);
#if GFX_VER >= 8
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
#endif
for (unsigned i = 0; i < surface_count; i++) {
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
@ -1046,10 +933,6 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
&cb->attachments[binding->index];
struct GENX(BLEND_STATE_ENTRY) entry = {
#if GFX_VER < 8
.AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
.AlphaToOneEnable = ms && ms->alpha_to_one_enable,
#endif
.LogicOpEnable = cb->logic_op_enable,
/* Vulkan specification 1.2.168, VkLogicOp:
@ -1082,11 +965,7 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
if (a->src_color_blend_factor != a->src_alpha_blend_factor ||
a->dst_color_blend_factor != a->dst_alpha_blend_factor ||
a->color_blend_op != a->alpha_blend_op) {
#if GFX_VER >= 8
blend_state.IndependentAlphaBlendEnable = true;
#else
entry.IndependentAlphaBlendEnable = true;
#endif
}
/* The Dual Source Blending documentation says:
@ -1129,13 +1008,10 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
}
GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
state_pos += GENX(BLEND_STATE_ENTRY_length);
#if GFX_VER >= 8
if (i == 0)
bs0 = entry;
#endif
}
#if GFX_VER >= 8
struct GENX(3DSTATE_PS_BLEND) blend = {
GENX(3DSTATE_PS_BLEND_header),
};
@ -1149,7 +1025,6 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
#endif
GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
}
@ -1173,9 +1048,7 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
clip.APIMode = pipeline->negative_one_to_one ? APIMODE_OGL : APIMODE_D3D;
clip.GuardbandClipTestEnable = true;
#if GFX_VER >= 8
clip.VertexSubPixelPrecisionSelect = _8Bit;
#endif
clip.ClipMode = CLIPMODE_NORMAL;
switch (rs->provoking_vertex) {
@ -1225,10 +1098,6 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
clip.ForceZeroRTAIndexEnable =
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
#if GFX_VER == 7
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
#endif
} else if (anv_pipeline_is_mesh(pipeline)) {
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
if (vp && vp->viewport_count > 0 &&
@ -1237,16 +1106,10 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
}
}
#if GFX_VER == 7
clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
clip.CullMode = genX(vk_to_intel_cullmode)[rs->cull_mode];
clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
#else
clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
wm_prog_data->uses_nonperspective_interp_modes : 0;
#endif
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx8.clip, &clip);
#if GFX_VERx10 >= 125
if (anv_pipeline_is_mesh(pipeline)) {
@ -1388,12 +1251,6 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
}
}
#if GFX_VER == 7
# define streamout_state_dw pipeline->gfx7.streamout_state
#else
# define streamout_state_dw pipeline->gfx8.streamout_state
#endif
struct GENX(3DSTATE_STREAMOUT) so = {
GENX(3DSTATE_STREAMOUT_header),
};
@ -1417,28 +1274,10 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
so.RenderStreamSelect = rs->rasterization_stream;
#if GFX_VER >= 8
so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
#else
pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
/* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
* is a bit inconvenient because we don't know what buffers will
* actually be enabled until draw time. We do our best here by
* setting them based on buffers_written and we disable them
* as-needed at draw time by setting EndAddress = BaseAddress.
*/
so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
#endif
int urb_entry_read_offset = 0;
int urb_entry_read_length =
@ -1458,7 +1297,7 @@ emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
so.Stream3VertexReadLength = urb_entry_read_length - 1;
}
GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
GENX(3DSTATE_STREAMOUT_pack)(NULL, pipeline->gfx8.streamout_state, &so);
}
static uint32_t
@ -1525,10 +1364,8 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
vs.Enable = true;
vs.StatisticsEnable = true;
vs.KernelStartPointer = vs_bin->kernel.offset;
#if GFX_VER >= 8
vs.SIMD8DispatchEnable =
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
#endif
assert(!vs_prog_data->base.base.use_alt_mode);
#if GFX_VER < 11
@ -1574,12 +1411,10 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
vs.DispatchGRFStartRegisterForURBData =
vs_prog_data->base.base.dispatch_grf_start_reg;
#if GFX_VER >= 8
vs.UserClipDistanceClipTestEnableBitmask =
vs_prog_data->base.clip_distance_mask;
vs.UserClipDistanceCullTestEnableBitmask =
vs_prog_data->base.cull_distance_mask;
#endif
#if GFX_VERx10 >= 125
vs.ScratchSpaceBuffer =
@ -1714,7 +1549,6 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
ds.DispatchGRFStartRegisterForURBData =
tes_prog_data->base.base.dispatch_grf_start_reg;
#if GFX_VER >= 8
#if GFX_VER < 11
ds.DispatchMode =
tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
@ -1729,7 +1563,6 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
tes_prog_data->base.clip_distance_mask;
ds.UserClipDistanceCullTestEnableBitmask =
tes_prog_data->base.cull_distance_mask;
#endif
#if GFX_VER >= 12
ds.PrimitiveIDNotRequired = !tes_prog_data->include_primitive_id;
@ -1787,24 +1620,20 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline)
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
gs.ReorderMode = TRAILING;
#if GFX_VER >= 8
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
gs_prog_data->static_vertex_count : 0;
#endif
gs.VertexURBEntryReadOffset = 0;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.DispatchGRFStartRegisterForURBData =
gs_prog_data->base.base.dispatch_grf_start_reg;
#if GFX_VER >= 8
gs.UserClipDistanceClipTestEnableBitmask =
gs_prog_data->base.clip_distance_mask;
gs.UserClipDistanceCullTestEnableBitmask =
gs_prog_data->base.cull_distance_mask;
#endif
#if GFX_VERx10 >= 125
gs.ScratchSpaceBuffer =
@ -1844,7 +1673,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
wm.EarlyDepthStencilControl = EDSC_NORMAL;
}
#if GFX_VER >= 8
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
* doesn't take into account KillPixels when no depth or stencil
* writes are enabled. In order for occlusion queries to work
@ -1864,50 +1692,14 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
pipeline->force_fragment_thread_dispatch =
wm_prog_data->has_side_effects ||
wm_prog_data->uses_kill;
#endif
wm.BarycentricInterpolationMode =
wm_prog_data->barycentric_interp_modes;
#if GFX_VER < 8
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
/* If the subpass has a depth or stencil self-dependency, then we
* need to force the hardware to do the depth/stencil write *after*
* fragment shader execution. Otherwise, the writes may hit memory
* before we get around to fetching from the input attachment and we
* may get the depth or stencil value from the current draw rather
* than the previous one.
*/
wm.PixelShaderKillsPixel = rp->depth_self_dependency ||
rp->stencil_self_dependency ||
wm_prog_data->uses_kill;
pipeline->force_fragment_thread_dispatch =
wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
wm_prog_data->has_side_effects ||
wm.PixelShaderKillsPixel;
if (ms != NULL && ms->rasterization_samples > 1) {
if (wm_prog_data->persample_dispatch) {
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
} else {
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
}
} else {
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
}
#endif
wm.LineStippleEnable = rs->line.stipple.enable;
}
const struct intel_device_info *devinfo = pipeline->base.device->info;
uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
}
static void
@ -1922,40 +1714,12 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
#if GFX_VER == 7
/* Even if no fragments are ever dispatched, gfx7 hardware hangs if
* we don't at least set the maximum number of threads.
*/
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
#endif
}
return;
}
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
#if GFX_VER < 8
/* The hardware wedges if you have this bit set but don't turn on any dual
* source blend factors.
*/
bool dual_src_blend = false;
if (wm_prog_data->dual_src_blend && cb) {
for (uint32_t i = 0; i < cb->attachment_count; i++) {
const struct vk_color_blend_attachment_state *a =
&cb->attachments[i];
if (a->blend_enable &&
(is_dual_src_blend_factor(a->src_color_blend_factor) ||
is_dual_src_blend_factor(a->dst_color_blend_factor) ||
is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
dual_src_blend = true;
break;
}
}
}
#endif
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
@ -1983,8 +1747,7 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
ps.SingleProgramFlow = false;
ps.VectorMaskEnable = GFX_VER >= 8 &&
wm_prog_data->uses_vmask;
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
/* Wa_1606682166 */
ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(fs_bin);
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
@ -1992,25 +1755,9 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
wm_prog_data->base.ubo_ranges[0].length;
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
POSOFFSET_SAMPLE: POSOFFSET_NONE;
#if GFX_VER < 8
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.DualSourceBlendEnable = dual_src_blend;
#endif
#if GFX_VERx10 == 75
/* Haswell requires the sample mask to be set in this packet as well
* as in 3DSTATE_SAMPLE_MASK; the values should match.
*/
ps.SampleMask = 0xff;
#endif
#if GFX_VER >= 8
ps.MaximumNumberofThreadsPerPSD =
devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
#else
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
#endif
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
@ -2030,7 +1777,6 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
}
}
#if GFX_VER >= 8
static void
emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
const struct vk_rasterization_state *rs,
@ -2093,7 +1839,6 @@ emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
#endif
}
}
#endif
static void
emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
@ -2323,25 +2068,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
emit_3dstate_primitive_replication(pipeline, state->rp);
#endif
#if 0
/* From gfx7_vs_state.c */
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
*
* "Note: Because of corruption in IVB:GT2, software needs to flush the
* whole fixed function pipeline when the GS enable changes value in
* the 3DSTATE_GS."
*
* The hardware architects have clarified that in this context "flush the
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
if (device->info->platform == INTEL_PLATFORM_IVB)
gfx7_emit_vs_workaround_flush(brw);
#endif
if (anv_pipeline_is_primitive(pipeline)) {
emit_vertex_input(pipeline, state->vi);
@ -2379,9 +2105,7 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
emit_3dstate_wm(pipeline, state->ia, state->rs,
state->ms, state->cb, state->rp);
emit_3dstate_ps(pipeline, state->ms, state->cb);
#if GFX_VER >= 8
emit_3dstate_ps_extra(pipeline, state->rs, state->rp);
#endif
}
#if GFX_VERx10 >= 125
@ -2424,43 +2148,25 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
const struct anv_shader_bin *cs_bin = pipeline->cs;
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
#if GFX_VER > 7
vfe.StackSize = 0;
#else
vfe.GPGPUMode = true;
#endif
vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total - 1;
vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
vfe.NumberofURBEntries = 2;
#if GFX_VER < 11
vfe.ResetGatewayTimer = true;
#endif
#if GFX_VER <= 8
#if GFX_VER == 8
vfe.BypassGatewayControl = true;
#endif
vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2;
vfe.URBEntryAllocationSize = 2;
vfe.CURBEAllocationSize = vfe_curbe_allocation;
if (cs_bin->prog_data->total_scratch) {
if (GFX_VER >= 8) {
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
vfe.PerThreadScratchSpace =
ffs(cs_bin->prog_data->total_scratch) - 11;
} else if (GFX_VERx10 == 75) {
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
*/
vfe.PerThreadScratchSpace =
ffs(cs_bin->prog_data->total_scratch) - 12;
} else {
/* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
*/
vfe.PerThreadScratchSpace =
cs_bin->prog_data->total_scratch / 1024 - 1;
}
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
vfe.PerThreadScratchSpace =
ffs(cs_bin->prog_data->total_scratch) - 11;
vfe.ScratchSpaceBasePointer =
get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
}
@ -2481,14 +2187,10 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
.SharedLocalMemorySize =
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
#if GFX_VERx10 != 75
.ConstantURBEntryReadOffset = 0,
#endif
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
#if GFX_VERx10 >= 75
.CrossThreadConstantDataReadLength =
cs_prog_data->push.cross_thread.regs,
#endif
#if GFX_VER >= 12
/* TODO: Check if we are missing workarounds and enable mid-thread
* preemption.

View file

@ -39,7 +39,7 @@
* - GPR 15 for conditional rendering
*/
#define MI_BUILDER_NUM_ALLOC_GPRS 14
#define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
#define MI_BUILDER_CAN_WRITE_BATCH true
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
#define __gen_get_batch_address(b, a) anv_batch_address(b, a)
@ -67,12 +67,10 @@ VkResult genX(CreateQueryPool)(
{
ANV_FROM_HANDLE(anv_device, device, _device);
const struct anv_physical_device *pdevice = device->physical;
#if GFX_VER >= 8
const VkQueryPoolPerformanceCreateInfoKHR *perf_query_info = NULL;
struct intel_perf_counter_pass *counter_pass;
struct intel_perf_query_info **pass_query;
uint32_t n_passes = 0;
#endif
uint32_t data_offset = 0;
VK_MULTIALLOC(ma);
VkResult result;
@ -132,7 +130,6 @@ VkResult genX(CreateQueryPool)(
uint64s_per_slot += 2 * DIV_ROUND_UP(layout->size, sizeof(uint64_t));
break;
}
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
const struct intel_perf_query_field_layout *layout =
&pdevice->perf->query_layout;
@ -158,7 +155,6 @@ VkResult genX(CreateQueryPool)(
uint64s_per_slot *= n_passes;
break;
}
#endif
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
/* Query has two values: begin and end. */
uint64s_per_slot = 1 + 2;
@ -180,7 +176,6 @@ VkResult genX(CreateQueryPool)(
pool->data_offset = data_offset;
pool->snapshot_size = (pool->stride - data_offset) / 2;
}
#if GFX_VER >= 8
else if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
pool->pass_size = pool->stride / n_passes;
pool->data_offset = data_offset;
@ -198,7 +193,6 @@ VkResult genX(CreateQueryPool)(
perf_query_info->counterIndexCount,
pool->pass_query);
}
#endif
uint64_t size = pool->slots * (uint64_t)pool->stride;
result = anv_device_alloc_bo(device, "query-pool", size,
@ -209,7 +203,6 @@ VkResult genX(CreateQueryPool)(
if (result != VK_SUCCESS)
goto fail;
#if GFX_VER >= 8
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
for (uint32_t p = 0; p < pool->n_passes; p++) {
struct mi_builder b;
@ -225,7 +218,6 @@ VkResult genX(CreateQueryPool)(
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
}
}
#endif
*pQueryPool = anv_query_pool_to_handle(pool);
@ -252,7 +244,6 @@ void genX(DestroyQueryPool)(
vk_object_free(&device->vk, pAllocator, pool);
}
#if GFX_VER >= 8
/**
* VK_KHR_performance_query layout :
*
@ -350,7 +341,6 @@ khr_perf_query_ensure_relocs(struct anv_cmd_buffer *cmd_buffer)
return true;
}
#endif
/**
* VK_INTEL_performance_query layout :
@ -402,7 +392,6 @@ query_slot(struct anv_query_pool *pool, uint32_t query)
static bool
query_is_available(struct anv_query_pool *pool, uint32_t query)
{
#if GFX_VER >= 8
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
for (uint32_t p = 0; p < pool->n_passes; p++) {
volatile uint64_t *slot =
@ -412,7 +401,6 @@ query_is_available(struct anv_query_pool *pool, uint32_t query)
}
return true;
}
#endif
return *(volatile uint64_t *)query_slot(pool, query);
}
@ -521,8 +509,8 @@ VkResult genX(GetQueryPoolResults)(
if (write_results) {
uint64_t result = slot[idx * 2 + 2] - slot[idx * 2 + 1];
/* WaDividePSInvocationCountBy4:HSW,BDW */
if ((device->info->ver == 8 || device->info->verx10 == 75) &&
/* WaDividePSInvocationCountBy4:BDW */
if (device->info->ver == 8 &&
(1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT)
result >>= 2;
@ -553,7 +541,6 @@ VkResult genX(GetQueryPoolResults)(
break;
}
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
const struct anv_physical_device *pdevice = device->physical;
assert((flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
@ -570,7 +557,6 @@ VkResult genX(GetQueryPoolResults)(
}
break;
}
#endif
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
if (!write_results)
@ -692,7 +678,6 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
}
break;
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
for (uint32_t i = 0; i < num_queries; i++) {
for (uint32_t p = 0; p < pool->n_passes; p++) {
@ -705,7 +690,6 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
}
break;
}
#endif
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL:
for (uint32_t i = 0; i < num_queries; i++) {
@ -767,7 +751,6 @@ void genX(CmdResetQueryPool)(
break;
}
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
@ -782,7 +765,6 @@ void genX(CmdResetQueryPool)(
}
break;
}
#endif
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
struct mi_builder b;
@ -808,13 +790,11 @@ void genX(ResetQueryPool)(
for (uint32_t i = 0; i < queryCount; i++) {
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
#if GFX_VER >= 8
for (uint32_t p = 0; p < pool->n_passes; p++) {
uint64_t *pass_slot = pool->bo->map +
khr_perf_query_availability_offset(pool, firstQuery + i, p);
*pass_slot = 0;
}
#endif
} else {
uint64_t *slot = query_slot(pool, firstQuery + i);
*slot = 0;
@ -966,7 +946,6 @@ void genX(CmdBeginQueryIndexedEXT)(
emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
break;
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
if (!khr_perf_query_ensure_relocs(cmd_buffer))
return;
@ -1077,7 +1056,6 @@ void genX(CmdBeginQueryIndexedEXT)(
}
break;
}
#endif
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
@ -1163,7 +1141,6 @@ void genX(CmdEndQueryIndexedEXT)(
emit_query_mi_availability(&b, query_addr, true);
break;
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;
@ -1241,7 +1218,6 @@ void genX(CmdEndQueryIndexedEXT)(
assert(cmd_buffer->perf_reloc_idx == pdevice->n_perf_query_commands);
break;
}
#endif
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
@ -1330,8 +1306,6 @@ void genX(CmdWriteTimestamp2)(
}
}
#if GFX_VERx10 >= 75
#define MI_PREDICATE_SRC0 0x2400
#define MI_PREDICATE_SRC1 0x2408
#define MI_PREDICATE_RESULT 0x2418
@ -1470,9 +1444,8 @@ void genX(CmdCopyQueryPoolResults)(
result = compute_query_result(&b, anv_address_add(query_addr,
idx * 16 + 8));
/* WaDividePSInvocationCountBy4:HSW,BDW */
if ((cmd_buffer->device->info->ver == 8 ||
cmd_buffer->device->info->verx10 == 75) &&
/* WaDividePSInvocationCountBy4:BDW */
if (cmd_buffer->device->info->ver == 8 &&
(1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT) {
result = mi_ushr32_imm(&b, result, 2);
}
@ -1495,11 +1468,9 @@ void genX(CmdCopyQueryPoolResults)(
gpu_write_query_result(&b, dest_addr, flags, idx++, result);
break;
#if GFX_VER >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
unreachable("Copy KHR performance query results not implemented");
break;
#endif
default:
unreachable("unhandled query type");
@ -1513,18 +1484,3 @@ void genX(CmdCopyQueryPoolResults)(
dest_addr = anv_address_add(dest_addr, destStride);
}
}
#else
void genX(CmdCopyQueryPoolResults)(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
VkBuffer destBuffer,
VkDeviceSize destOffset,
VkDeviceSize destStride,
VkQueryResultFlags flags)
{
anv_finishme("Queries not yet supported on Ivy Bridge");
}
#endif

View file

@ -583,8 +583,6 @@ genX(emit_l3_config)(struct anv_batch *batch,
{
UNUSED const struct intel_device_info *devinfo = device->info;
#if GFX_VER >= 8
#if GFX_VER >= 12
#define L3_ALLOCATION_REG GENX(L3ALLOC)
#define L3_ALLOCATION_REG_num GENX(L3ALLOC_num)
@ -621,81 +619,6 @@ genX(emit_l3_config)(struct anv_batch *batch,
l3cr.AllAllocation = cfg->n[INTEL_L3P_ALL];
}
}
#else /* GFX_VER < 8 */
const bool has_dc = cfg->n[INTEL_L3P_DC] || cfg->n[INTEL_L3P_ALL];
const bool has_is = cfg->n[INTEL_L3P_IS] || cfg->n[INTEL_L3P_RO] ||
cfg->n[INTEL_L3P_ALL];
const bool has_c = cfg->n[INTEL_L3P_C] || cfg->n[INTEL_L3P_RO] ||
cfg->n[INTEL_L3P_ALL];
const bool has_t = cfg->n[INTEL_L3P_T] || cfg->n[INTEL_L3P_RO] ||
cfg->n[INTEL_L3P_ALL];
assert(!cfg->n[INTEL_L3P_ALL]);
/* When enabled SLM only uses a portion of the L3 on half of the banks,
* the matching space on the remaining banks has to be allocated to a
* client (URB for all validated configurations) set to the
* lower-bandwidth 2-bank address hashing mode.
*/
const bool urb_low_bw = cfg->n[INTEL_L3P_SLM] && devinfo->platform != INTEL_PLATFORM_BYT;
assert(!urb_low_bw || cfg->n[INTEL_L3P_URB] == cfg->n[INTEL_L3P_SLM]);
/* Minimum number of ways that can be allocated to the URB. */
const unsigned n0_urb = devinfo->platform == INTEL_PLATFORM_BYT ? 32 : 0;
assert(cfg->n[INTEL_L3P_URB] >= n0_urb);
anv_batch_write_reg(batch, GENX(L3SQCREG1), l3sqc) {
l3sqc.ConvertDC_UC = !has_dc;
l3sqc.ConvertIS_UC = !has_is;
l3sqc.ConvertC_UC = !has_c;
l3sqc.ConvertT_UC = !has_t;
#if GFX_VERx10 == 75
l3sqc.L3SQGeneralPriorityCreditInitialization = SQGPCI_DEFAULT;
#else
l3sqc.L3SQGeneralPriorityCreditInitialization =
devinfo->platform == INTEL_PLATFORM_BYT ? BYT_SQGPCI_DEFAULT : SQGPCI_DEFAULT;
#endif
l3sqc.L3SQHighPriorityCreditInitialization = SQHPCI_DEFAULT;
}
anv_batch_write_reg(batch, GENX(L3CNTLREG2), l3cr2) {
l3cr2.SLMEnable = cfg->n[INTEL_L3P_SLM];
l3cr2.URBLowBandwidth = urb_low_bw;
l3cr2.URBAllocation = cfg->n[INTEL_L3P_URB] - n0_urb;
#if !GFX_VERx10 == 75
l3cr2.ALLAllocation = cfg->n[INTEL_L3P_ALL];
#endif
l3cr2.ROAllocation = cfg->n[INTEL_L3P_RO];
l3cr2.DCAllocation = cfg->n[INTEL_L3P_DC];
}
anv_batch_write_reg(batch, GENX(L3CNTLREG3), l3cr3) {
l3cr3.ISAllocation = cfg->n[INTEL_L3P_IS];
l3cr3.ISLowBandwidth = 0;
l3cr3.CAllocation = cfg->n[INTEL_L3P_C];
l3cr3.CLowBandwidth = 0;
l3cr3.TAllocation = cfg->n[INTEL_L3P_T];
l3cr3.TLowBandwidth = 0;
}
#if GFX_VERx10 == 75
if (device->physical->cmd_parser_version >= 4) {
/* Enable L3 atomics on HSW if we have a DC partition, otherwise keep
* them disabled to avoid crashing the system hard.
*/
anv_batch_write_reg(batch, GENX(SCRATCH1), s1) {
s1.L3AtomicDisable = !has_dc;
}
anv_batch_write_reg(batch, GENX(CHICKEN3), c3) {
c3.L3AtomicDisableMask = true;
c3.L3AtomicDisable = !has_dc;
}
}
#endif /* GFX_VERx10 == 75 */
#endif /* GFX_VER < 8 */
}
void
@ -944,7 +867,7 @@ VkResult genX(CreateSampler)(
sampler->n_planes = 1;
uint32_t border_color_stride = GFX_VERx10 == 75 ? 512 : 64;
uint32_t border_color_stride = 64;
uint32_t border_color_offset;
ASSERTED bool has_custom_color = false;
if (pCreateInfo->borderColor <= VK_BORDER_COLOR_INT_OPAQUE_WHITE) {
@ -952,7 +875,6 @@ VkResult genX(CreateSampler)(
pCreateInfo->borderColor *
border_color_stride;
} else {
assert(GFX_VER >= 8);
sampler->custom_border_color =
anv_state_reserved_pool_alloc(&device->custom_border_colors);
border_color_offset = sampler->custom_border_color.offset;
@ -1077,11 +999,7 @@ VkResult genX(CreateSampler)(
.CPSLODCompensationEnable = true,
#endif
#if GFX_VER >= 8
.LODPreClampMode = CLAMP_MODE_OGL,
#else
.LODPreClampEnable = CLAMP_ENABLE_OGL,
#endif
#if GFX_VER == 8
.BaseMipLevel = 0.0,
@ -1104,9 +1022,7 @@ VkResult genX(CreateSampler)(
.BorderColorPointer = border_color_offset,
#if GFX_VER >= 8
.LODClampMagnificationMode = MIPNONE,
#endif
.MaximumAnisotropy = vk_to_intel_max_anisotropy(pCreateInfo->maxAnisotropy),
.RAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding,

View file

@ -74,20 +74,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
lri.DataDWord = cache_mode;
}
#elif GFX_VER == 8
uint32_t cache_mode;
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
.NPPMAFixEnable = enable,
.NPEarlyZFailsDisable = enable,
.NPPMAFixEnableMask = true,
.NPEarlyZFailsDisableMask = true);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(CACHE_MODE_1_num);
lri.DataDWord = cache_mode;
}
#endif /* GFX_VER == 8 */
#endif /* GFX_VER == 9 */
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
* Flush bits is often necessary. We do it regardless because it's easier.
@ -106,96 +93,6 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
}
}
UNUSED static bool
want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
{
assert(GFX_VER == 8);
/* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
*
* SW must set this bit in order to enable this fix when following
* expression is TRUE.
*
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
* (3DSTATE_PS_EXTRA::PixelShaderValid) &&
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
* (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
* 3DSTATE_WM::ForceKillPix != ForceOff &&
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
/* These are always true:
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
*/
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
* and there is no harm.
*
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
*/
if (!cmd_buffer->state.hiz_enabled)
return false;
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
return false;
/* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->early_fragment_tests)
return false;
/* We never use anv_pipeline for HiZ ops so this is trivially true:
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
*/
/* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
if (!ds->depth.test_enable)
return false;
/* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
* 3DSTATE_WM::ForceKillPix != ForceOff &&
* ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
* 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
return (pipeline->kill_pixel && (ds->depth.write_enable ||
ds->stencil.write_enable)) ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
@ -331,15 +228,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
};
#if GFX_VER == 8
if (cmd_buffer->device->info->platform == INTEL_PLATFORM_CHV) {
sf.CHVLineWidth = dyn->rs.line.width;
} else {
sf.LineWidth = dyn->rs.line.width;
}
#else
sf.LineWidth = dyn->rs.line.width,
#endif
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
}
@ -394,75 +284,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
* across different state packets for gfx8 and gfx9. We handle that by
* using a big old #if switch here.
*/
#if GFX_VER == 8
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
.BlendConstantColorRed = dyn->cb.blend_constants[0],
.BlendConstantColorGreen = dyn->cb.blend_constants[1],
.BlendConstantColorBlue = dyn->cb.blend_constants[2],
.BlendConstantColorAlpha = dyn->cb.blend_constants[3],
.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = cc_state.offset;
ccp.ColorCalcStatePointerValid = true;
}
}
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
ds.DoubleSidedStencilEnable = true;
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
ds.DepthTestEnable = opt_ds.depth.test_enable;
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
ds.StencilTestEnable = opt_ds.stencil.test_enable;
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
}
const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
#else
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
@ -531,7 +352,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
#endif
#if GFX_VER >= 12
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||