From bcad1274bbb7a5a4fe07c94eb0413e766965bb72 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 1 Mar 2024 17:52:31 -0600 Subject: [PATCH] nvk: Disable the Out Of Range Address exception Part-of: (cherry picked from commit cc74a819e41c0275e5e4cbf93931d7554b05f665) --- .pick_status.json | 2 +- src/nouveau/vulkan/nvk_cmd_draw.c | 54 +++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index 39ba33497bd..e9148201f8d 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -324,7 +324,7 @@ "description": "nvk: Disable the Out Of Range Address exception", "nominated": false, "nomination_type": 3, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 166eb5799f2..2d0f2f671bb 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -141,6 +141,60 @@ nvk_queue_init_context_draw_state(struct nvk_queue *queue) P_INLINE_DATA(p, reg); } + /* Disable Out Of Range Address exceptions + * + * From the SPH documentation: + * + * "The SPH fields StoreReqStart and StoreReqEnd set a range of + * attributes whose corresponding Odmap values of ST or ST_LAST are + * treated as ST_REQ. Normally, for an attribute whose Omap bit is TRUE + * and Odmap value is ST, when the shader writes data to this output, it + * can not count on being able to read it back, since the next + * downstream shader might have its Imap bit FALSE, thereby causing the + * Bmap bit to be FALSE. By including a ST type of attribute in the + * range of StoreReqStart and StoreReqEnd, the attribute’s Odmap value + * is treated as ST_REQ, so an Omap bit being TRUE causes the Bmap bit + * to be TRUE. This guarantees the shader program can output the value + * and then read it back later. This will save register space." + * + * It's unclear exactly what's going on but this seems to imply that the + * hardware actually ANDs the output mask of one shader stage together with + * the input mask of the subsequent shader stage to determine which values + * are actually used. + * + * In the case when we have an empty fragment shader, it seems the hardware + * doesn't allocate any output memory for final geometry stage at all and + * so any writes to outputs from the final shader stage generates an Out Of + * Range Address exception. We could fix this by eliminating unused + * outputs via cross-stage linking but that won't work in the case of + * VK_EXT_shader_object and VK_EXT_graphics_pipeline_library fast-link. + * Instead, the easiest solution is to just disable the exception. + * + * NOTE (Faith): + * + * This above analysis is 100% conjecture on my part based on a creative + * reading of the SPH docs and what I saw when trying to run certain + * OpenGL CTS tests on NVK + Zink. Without access to NVIDIA HW + * engineers, have no way of verifying this analysis. + * + * The CTS test in question is: + * + * KHR-GL46.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_tessLevel + * + * This should also prevent any issues with array overruns on I/O arrays. + * Before, they would get an exception and kill the context whereas now + * they should gently get ignored. + * + * This clears bit 14 of gr_gpcs_tpcs_sms_hww_warp_esr_report_mask + */ + if (dev->pdev->info.cls_eng3d >= MAXWELL_B) { + unsigned reg = pdev->info.cls_eng3d >= VOLTA_A ? 0x419ea8 : 0x419e44; + P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SET_PRIV_REG)); + P_INLINE_DATA(p, 0); + P_INLINE_DATA(p, BITFIELD_BIT(14)); + P_INLINE_DATA(p, reg); + } + P_IMMD(p, NV9097, SET_RENDER_ENABLE_C, MODE_TRUE); P_IMMD(p, NV9097, SET_Z_COMPRESSION, ENABLE_TRUE);