pvr: Setup ZLS depth and stencil load/store separately

Previously the code assumed that you could only have depth-stencil
attachments so no stencil only or depth only, for ZLS load/stores.
This isn't true as we can have stencil only attachments so the
ZLS depth and stencil store/load enable have to be set separately.

Other ZLSCTL setup has also been adjusted for separate depth-stencil.
E.g. the z{load,store}format, and {load,store}twiddled.

Co-Authored-By: Soroush Kashani <soroush.kashani@imgtec.com>
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Signed-off-by: Soroush Kashani <soroush.kashani@imgtec.com>
Reviewed-by: Frank Binns <frank.binns@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23830>
This commit is contained in:
Karmjit Mahil 2023-06-13 11:20:08 +01:00 committed by Marge Bot
parent 59518b6dc6
commit 4096bd8d85
5 changed files with 127 additions and 63 deletions

View file

@ -142,17 +142,14 @@ SOFTWARE.
<value name="GAMMA_BOTH_CHANNELS" value="1"/>
</enum>
<enum name="ZLOADFORMAT_TYPE">
<value name="F32Z" value="0"/>
<value name="24BITINT" value="1"/>
<value name="16BITINT" value="2"/>
<value name="F64Z" value="3"/>
</enum>
<enum name="ZSTOREFORMAT_TYPE">
<enum name="ZLS_FORMAT_TYPE">
<!-- Separate depth and stencil -->
<value name="F32Z" value="0"/>
<!-- Packed depth and stencil -->
<value name="24BITINT" value="1"/>
<!-- Separate depth and stencil -->
<value name="16BITINT" value="2"/>
<!-- Packed depth and stencil -->
<value name="F64Z" value="3"/>
</enum>
@ -554,8 +551,8 @@ SOFTWARE.
<field name="zlsextent_x_s" start="38" end="47" type="uint"/>
<field name="stencil_extent_enable" start="37" end="37" type="bool"/>
<field name="zlsextent_y_z" start="27" end="36" type="uint"/>
<field name="zstoreformat" start="25" end="26" type="ZSTOREFORMAT_TYPE"/>
<field name="zloadformat" start="23" end="24" type="ZLOADFORMAT_TYPE"/>
<field name="zstoreformat" start="25" end="26" type="ZLS_FORMAT_TYPE"/>
<field name="zloadformat" start="23" end="24" type="ZLS_FORMAT_TYPE"/>
<field name="fb_storeen" start="22" end="22" type="bool"/>
<field name="fb_loaden" start="21" end="21" type="bool"/>
<field name="mstoreen" start="20" end="20" type="bool"/>

View file

@ -1080,6 +1080,17 @@ pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
return util_next_power_of_two(width);
}
static inline bool
pvr_ds_attachment_requires_zls(const struct pvr_ds_attachment *attachment)
{
bool zls_used;
zls_used = attachment->load.d || attachment->load.s;
zls_used |= attachment->store.d || attachment->store.s;
return zls_used;
}
/**
* \brief If depth and/or stencil attachment dimensions are not tile-aligned,
* then we may need to insert some additional transfer subcommands.
@ -1119,7 +1130,7 @@ static bool pvr_sub_cmd_gfx_requires_ds_subtile_alignment(
/* No ZLS functions enabled; nothing to do. */
if ((!job->has_depth_attachment && !job->has_stencil_attachment) ||
(!job->ds.load && !job->ds.store)) {
!pvr_ds_attachment_requires_zls(&job->ds)) {
return false;
}
@ -1151,7 +1162,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer,
assert(list_last_entry(&cmd_buffer->sub_cmds, struct pvr_sub_cmd, link) ==
prev_sub_cmd);
if (!ds->load && !ds->store)
if (!pvr_ds_attachment_requires_zls(ds))
return VK_SUCCESS;
rogue_get_zls_tile_size_xy(&cmd_buffer->device->pdevice->dev_info,
@ -1203,7 +1214,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer,
},
};
if (ds->load) {
if (ds->load.d || ds->load.s) {
cmd_buffer->state.current_sub_cmd = NULL;
result =
@ -1235,7 +1246,7 @@ pvr_sub_cmd_gfx_align_ds_subtiles(struct pvr_cmd_buffer *const cmd_buffer,
cmd_buffer->state.current_sub_cmd = prev_sub_cmd;
}
if (ds->store) {
if (ds->store.d || ds->store.s) {
cmd_buffer->state.current_sub_cmd = NULL;
result =
@ -1501,7 +1512,24 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
job->ds_clear_value.stencil = clear_values->stencil;
}
job->ds.vk_format = ds_iview->vk.format;
switch (ds_iview->vk.format) {
case VK_FORMAT_D16_UNORM:
job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_16BITINT);
break;
case VK_FORMAT_S8_UINT:
case VK_FORMAT_D32_SFLOAT:
job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_F32Z);
break;
case VK_FORMAT_D24_UNORM_S8_UINT:
job->ds.zls_format = PVRX(CR_ZLS_FORMAT_TYPE_24BITINT);
break;
default:
unreachable("Unsupported depth stencil format");
}
job->ds.memlayout = ds_image->memlayout;
if (job->has_depth_attachment) {
@ -1557,14 +1585,34 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
}
}
if (job->has_depth_attachment && job->has_stencil_attachment)
assert(d_store == s_store && d_load == s_load);
job->ds.load.d = d_load;
job->ds.load.s = s_load;
job->ds.store.d = d_store;
job->ds.store.s = s_store;
job->ds.store = d_store || s_store;
job->ds.load = d_load || s_load;
/* ZLS can't do masked writes for packed depth stencil formats so if
* we store anything, we have to store everything.
*/
if ((job->ds.store.d || job->ds.store.s) &&
pvr_zls_format_type_is_packed(job->ds.zls_format)) {
job->ds.store.d = true;
job->ds.store.s = true;
if (job->ds.load || job->ds.store || store_was_optimised_out)
/* In case we are only operating on one aspect of the attachment we
* need to load the unused one in order to preserve its contents due
* to the forced store which might otherwise corrupt it.
*/
if (hw_render->depth_init != VK_ATTACHMENT_LOAD_OP_CLEAR)
job->ds.load.d = true;
if (hw_render->stencil_init != VK_ATTACHMENT_LOAD_OP_CLEAR)
job->ds.load.s = true;
}
if (pvr_ds_attachment_requires_zls(&job->ds) ||
store_was_optimised_out) {
job->process_empty_tiles = true;
}
if (pvr_sub_cmd_gfx_requires_ds_subtile_alignment(dev_info, job)) {
result = pvr_sub_cmd_gfx_align_ds_subtiles(cmd_buffer, sub_cmd);

View file

@ -70,6 +70,44 @@ pvr_cr_isp_aa_mode_type(uint32_t samples)
}
}
/* clang-format off */
static inline bool
pvr_zls_format_type_is_packed(enum PVRX(CR_ZLS_FORMAT_TYPE) type)
/* clang-format on */
{
switch (type) {
case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT):
case PVRX(CR_ZLS_FORMAT_TYPE_F64Z):
return true;
case PVRX(CR_ZLS_FORMAT_TYPE_F32Z):
case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT):
return false;
default:
unreachable("Invalid ZLS format type");
}
}
/* clang-format off */
static inline bool
pvr_zls_format_type_is_int(enum PVRX(CR_ZLS_FORMAT_TYPE) type)
/* clang-format on */
{
switch (type) {
case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT):
case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT):
return true;
case PVRX(CR_ZLS_FORMAT_TYPE_F32Z):
case PVRX(CR_ZLS_FORMAT_TYPE_F64Z):
return false;
default:
unreachable("Invalid ZLS format type");
}
}
/******************************************************************************
PDS
******************************************************************************/

View file

@ -1064,8 +1064,8 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
const enum PVRX(CR_ISP_AA_MODE_TYPE)
isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
enum PVRX(CR_ZLS_FORMAT_TYPE) zload_format = PVRX(CR_ZLS_FORMAT_TYPE_F32Z);
uint32_t *stream_ptr = (uint32_t *)state->fw_stream;
enum PVRX(CR_ZLOADFORMAT_TYPE) zload_format;
uint32_t pixel_ctl;
uint32_t isp_ctl;
@ -1101,7 +1101,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
stream_ptr += pvr_cmd_length(CR_ISP_OCLQRY_BASE);
pvr_csb_pack ((uint64_t *)stream_ptr, CR_ISP_ZLSCTL, value) {
if (job->has_depth_attachment) {
if (job->has_depth_attachment || job->has_stencil_attachment) {
uint32_t alignment_x;
uint32_t alignment_y;
@ -1129,44 +1129,24 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
value.storetwiddled = true;
}
switch (job->ds.vk_format) {
case VK_FORMAT_D16_UNORM:
value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_16BITINT);
value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_16BITINT);
break;
case VK_FORMAT_D32_SFLOAT:
value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
break;
case VK_FORMAT_D24_UNORM_S8_UINT:
value.zloadformat = PVRX(CR_ZLOADFORMAT_TYPE_24BITINT);
value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_24BITINT);
break;
default:
unreachable("Unsupported depth format");
}
value.zloaden = job->ds.load;
value.forcezload = value.zloaden;
value.zstoreen = job->ds.store;
value.forcezstore = value.zstoreen;
value.zloadformat = job->ds.zls_format;
value.zstoreformat = job->ds.zls_format;
zload_format = value.zloadformat;
} else {
zload_format = PVRX(CR_ZLOADFORMAT_TYPE_F32Z);
}
if (job->has_depth_attachment) {
value.zloaden = job->ds.load.d;
value.zstoreen = job->ds.store.d;
}
if (job->has_stencil_attachment) {
value.sstoreen = job->ds.store;
value.forcezstore = value.sstoreen;
value.sloaden = job->ds.load;
value.forcezload = value.sloaden;
value.sloaden = job->ds.load.s;
value.sstoreen = job->ds.store.s;
}
value.forcezload = value.zloaden || value.sloaden;
value.forcezstore = value.zstoreen || value.sstoreen;
}
stream_ptr += pvr_cmd_length(CR_ISP_ZLSCTL);
@ -1184,7 +1164,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
* in CR_ISP_STENCIL_LOAD_BASE does not contain a depth component.
*/
assert(job->has_depth_attachment ||
job->ds.vk_format == VK_FORMAT_S8_UINT);
!pvr_zls_format_type_is_packed(job->ds.zls_format));
value.enable = !job->has_depth_attachment;
}
}
@ -1236,15 +1216,15 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
* - job->depth_clear_value is set to a sensible default in that case.
*/
switch (zload_format) {
case PVRX(CR_ZLOADFORMAT_TYPE_F32Z):
case PVRX(CR_ZLS_FORMAT_TYPE_F32Z):
value.value = fui(depth_clear);
break;
case PVRX(CR_ZLOADFORMAT_TYPE_16BITINT):
case PVRX(CR_ZLS_FORMAT_TYPE_16BITINT):
value.value = _mesa_float_to_unorm(depth_clear, 16);
break;
case PVRX(CR_ZLOADFORMAT_TYPE_24BITINT):
case PVRX(CR_ZLS_FORMAT_TYPE_24BITINT):
value.value = _mesa_float_to_unorm(depth_clear, 24);
break;
@ -1277,8 +1257,7 @@ static void pvr_frag_state_stream_init(struct pvr_render_ctx *ctx,
* bias factor of 1.0 equates to 1 ULP of increase to the depth value.
*/
value.dbias_is_int = PVR_HAS_ERN(dev_info, 42307) &&
(job->ds.vk_format == VK_FORMAT_D16_UNORM ||
job->ds.vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
pvr_zls_format_type_is_int(job->ds.zls_format);
}
/* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
* possible to fully pack CR_ISP_CTL above rather than having to OR in part

View file

@ -97,15 +97,17 @@ struct pvr_render_job {
* has_stencil_attachment are false, the contents are undefined.
*/
struct pvr_ds_attachment {
bool load;
bool store;
struct {
bool d : 1;
bool s : 1;
} load, store;
pvr_dev_addr_t addr;
uint32_t stride;
uint32_t height;
VkExtent2D physical_extent;
uint32_t layer_size;
VkFormat vk_format;
enum PVRX(CR_ZLS_FORMAT_TYPE) zls_format;
/* FIXME: This should be of type 'enum pvr_memlayout', but this is defined
* in pvr_private.h, which causes a circular include dependency. For now,
* treat it as a uint32_t. A couple of ways to possibly fix this: