blorp: Write to depth/stencil images as depth/stencil when possible

On Gen4 and G45 and earlier, we have to handle weird offsetting to write
to depth and stencil due to a lack of proper depth mipmapping support in
hardware.  On Gen6, we have to deal with strange HiZ and stencil
layouts.  Prior to Gen9, we also had to do crazy things for stencil
writes because we didn't support GL_ARB_shader_stencil_export and
friends in hardware.  However, starting with Gen7 for depth and Gen9 for
stencil, we can easily write out with the "right" hardware.  This allows
us to leave HiZ and other compression enabled for blorp_blit() and
blorp_copy() operations.

Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3717>
This commit is contained in:
Jason Ekstrand 2020-02-05 03:59:01 -06:00 committed by Marge Bot
parent 4531f0ffce
commit 6cec618e82
3 changed files with 77 additions and 18 deletions

View file

@ -59,9 +59,6 @@ struct brw_blorp_blit_vars {
nir_variable *v_src_offset; nir_variable *v_src_offset;
nir_variable *v_dst_offset; nir_variable *v_dst_offset;
nir_variable *v_src_inv_size; nir_variable *v_src_inv_size;
/* gl_FragColor */
nir_variable *color_out;
}; };
static void static void
@ -80,10 +77,6 @@ brw_blorp_blit_vars_init(nir_builder *b, struct brw_blorp_blit_vars *v,
LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2)) LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2))
#undef LOAD_INPUT #undef LOAD_INPUT
v->color_out = nir_variable_create(b->shader, nir_var_shader_out,
glsl_vec4_type(), "gl_FragColor");
v->color_out->data.location = FRAG_RESULT_COLOR;
} }
static nir_ssa_def * static nir_ssa_def *
@ -1472,7 +1465,27 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
color = nir_vec4(&b, color_component, u, u, u); color = nir_vec4(&b, color_component, u, u, u);
} }
nir_store_var(&b, v.color_out, color, 0xf); if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) {
nir_variable *color_out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_vec4_type(), "gl_FragColor");
color_out->data.location = FRAG_RESULT_COLOR;
nir_store_var(&b, color_out, color, 0xf);
} else if (key->dst_usage == ISL_SURF_USAGE_DEPTH_BIT) {
nir_variable *depth_out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_float_type(), "gl_FragDepth");
depth_out->data.location = FRAG_RESULT_DEPTH;
nir_store_var(&b, depth_out, nir_channel(&b, color, 0), 0x1);
} else if (key->dst_usage == ISL_SURF_USAGE_STENCIL_BIT) {
nir_variable *stencil_out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_int_type(), "gl_FragStencilRef");
stencil_out->data.location = FRAG_RESULT_STENCIL;
nir_store_var(&b, stencil_out, nir_channel(&b, color, 0), 0x1);
} else {
unreachable("Invalid destination usage");
}
return b.shader; return b.shader;
} }
@ -1808,6 +1821,30 @@ try_blorp_blit(struct blorp_batch *batch,
{ {
const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
if (params->dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) {
if (devinfo->gen >= 7) {
/* We can render as depth on Gen5 but there's no real advantage since
* it doesn't support MSAA or HiZ. On Gen4, we can't always render
* to depth due to issues with depth buffers and mip-mapping. On
* Gen6, we can do everything but we have weird offsetting for HiZ
* and stencil. It's easier to just render using the color pipe
* on those platforms.
*/
wm_prog_key->dst_usage = ISL_SURF_USAGE_DEPTH_BIT;
} else {
wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
}
} else if (params->dst.surf.usage & ISL_SURF_USAGE_STENCIL_BIT) {
assert(params->dst.surf.format == ISL_FORMAT_R8_UINT);
if (devinfo->gen >= 9) {
wm_prog_key->dst_usage = ISL_SURF_USAGE_STENCIL_BIT;
} else {
wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
}
} else {
wm_prog_key->dst_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
}
if (isl_format_has_sint_channel(params->src.view.format)) { if (isl_format_has_sint_channel(params->src.view.format)) {
wm_prog_key->texture_data_type = nir_type_int; wm_prog_key->texture_data_type = nir_type_int;
} else if (isl_format_has_uint_channel(params->src.view.format)) { } else if (isl_format_has_uint_channel(params->src.view.format)) {
@ -1862,6 +1899,7 @@ try_blorp_blit(struct blorp_batch *batch,
} }
if (devinfo->gen > 6 && if (devinfo->gen > 6 &&
!isl_surf_usage_is_depth_or_stencil(wm_prog_key->dst_usage) &&
params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
assert(params->dst.surf.samples > 1); assert(params->dst.surf.samples > 1);
@ -1890,7 +1928,8 @@ try_blorp_blit(struct blorp_batch *batch,
wm_prog_key->need_dst_offset = true; wm_prog_key->need_dst_offset = true;
} }
if (params->dst.surf.tiling == ISL_TILING_W) { if (params->dst.surf.tiling == ISL_TILING_W &&
wm_prog_key->dst_usage != ISL_SURF_USAGE_STENCIL_BIT) {
/* We must modify the rectangle we send through the rendering pipeline /* We must modify the rectangle we send through the rendering pipeline
* (and the size and x/y offset of the destination surface), to account * (and the size and x/y offset of the destination surface), to account
* for the fact that we are mapping it as Y-tiled when it is in fact * for the fact that we are mapping it as Y-tiled when it is in fact
@ -2034,7 +2073,8 @@ try_blorp_blit(struct blorp_batch *batch,
/* We can handle RGBX formats easily enough by treating them as RGBA */ /* We can handle RGBX formats easily enough by treating them as RGBA */
params->dst.view.format = params->dst.view.format =
isl_format_rgbx_to_rgba(params->dst.view.format); isl_format_rgbx_to_rgba(params->dst.view.format);
} else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS &&
wm_prog_key->dst_usage != ISL_SURF_USAGE_DEPTH_BIT) {
wm_prog_key->dst_format = params->dst.view.format; wm_prog_key->dst_format = params->dst.view.format;
params->dst.view.format = ISL_FORMAT_R32_UINT; params->dst.view.format = ISL_FORMAT_R32_UINT;
} else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) { } else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) {
@ -2106,6 +2146,15 @@ try_blorp_blit(struct blorp_batch *batch,
result |= BLIT_DST_HEIGHT_SHRINK; result |= BLIT_DST_HEIGHT_SHRINK;
if (result == 0) { if (result == 0) {
if (wm_prog_key->dst_usage == ISL_SURF_USAGE_DEPTH_BIT) {
params->depth = params->dst;
memset(&params->dst, 0, sizeof(params->dst));
} else if (wm_prog_key->dst_usage == ISL_SURF_USAGE_STENCIL_BIT) {
params->stencil = params->dst;
params->stencil_mask = 0xff;
memset(&params->dst, 0, sizeof(params->dst));
}
batch->blorp->exec(batch, params); batch->blorp->exec(batch, params);
} }
@ -2628,18 +2677,20 @@ blorp_copy(struct blorp_batch *batch,
params.src.aux_usage == ISL_AUX_USAGE_MCS || params.src.aux_usage == ISL_AUX_USAGE_MCS ||
params.src.aux_usage == ISL_AUX_USAGE_MCS_CCS || params.src.aux_usage == ISL_AUX_USAGE_MCS_CCS ||
params.src.aux_usage == ISL_AUX_USAGE_CCS_E); params.src.aux_usage == ISL_AUX_USAGE_CCS_E);
assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE ||
params.dst.aux_usage == ISL_AUX_USAGE_MCS ||
params.dst.aux_usage == ISL_AUX_USAGE_MCS_CCS ||
params.dst.aux_usage == ISL_AUX_USAGE_CCS_E);
if (params.src.aux_usage == ISL_AUX_USAGE_HIZ) { if (params.src.aux_usage == ISL_AUX_USAGE_HIZ) {
/* Depth <-> Color copies are not allowed and HiZ isn't allowed in /* In order to use HiZ, we have to use the real format for the source.
* destinations because we draw as color. * Depth <-> Color copies are not allowed.
*/ */
assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE);
params.src.view.format = params.src.surf.format; params.src.view.format = params.src.surf.format;
params.dst.view.format = params.src.surf.format; params.dst.view.format = params.src.surf.format;
} else if ((params.dst.surf.usage & ISL_SURF_USAGE_DEPTH_BIT) &&
isl_dev->info->gen >= 7) {
/* On Gen7 and higher, we use actual depth writes for blits into depth
* buffers so we need the real format.
*/
params.src.view.format = params.dst.surf.format;
params.dst.view.format = params.dst.surf.format;
} else if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) { } else if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) {
params.dst.view.format = get_ccs_compatible_copy_format(dst_fmtl); params.dst.view.format = get_ccs_compatible_copy_format(dst_fmtl);
if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) { if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) {

View file

@ -902,6 +902,10 @@ blorp_emit_ps_config(struct blorp_batch *batch,
psx.PixelShaderValid = true; psx.PixelShaderValid = true;
psx.AttributeEnable = prog_data->num_varying_inputs > 0; psx.AttributeEnable = prog_data->num_varying_inputs > 0;
psx.PixelShaderIsPerSample = prog_data->persample_dispatch; psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
psx.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
#if GEN_GEN >= 9
psx.PixelShaderComputesStencil = prog_data->computed_stencil;
#endif
} }
if (params->src.enabled) if (params->src.enabled)
@ -927,8 +931,10 @@ blorp_emit_ps_config(struct blorp_batch *batch,
unreachable("not reached"); unreachable("not reached");
} }
if (prog_data) if (prog_data) {
wm.ThreadDispatchEnable = true; wm.ThreadDispatchEnable = true;
wm.PixelShaderComputedDepthMode = prog_data->computed_depth_mode;
}
if (params->src.enabled) if (params->src.enabled)
wm.PixelShaderKillsPixel = true; wm.PixelShaderKillsPixel = true;

View file

@ -319,6 +319,8 @@ struct brw_blorp_blit_prog_key
*/ */
bool dst_rgb; bool dst_rgb;
isl_surf_usage_flags_t dst_usage;
enum blorp_filter filter; enum blorp_filter filter;
/* True if the rectangle being sent through the rendering pipeline might be /* True if the rectangle being sent through the rendering pipeline might be