hk: optimize !largePoints

should slightly help proton

requires reordering the uvs lowering to be after tes lowering since that can
insert psiz writes.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35658>
This commit is contained in:
Alyssa Rosenzweig 2025-05-30 16:25:13 -04:00 committed by Marge Bot
parent 9d7301b2d1
commit 691aa581c3
3 changed files with 65 additions and 41 deletions

View file

@ -1126,4 +1126,10 @@
<field name="Y" start="15" size="15" type="uint" modifier="minus(1)"/>
</struct>
<struct name="CR PPP Control" size="4">
<field name="OpenGL" start="0" size="1" type="bool"/>
<field name="Enable W Clamp" start="1" size="1" type="bool"/>
<field name="Default point size" start="8" size="1" type="bool"/>
<field name="Fixed point format" start="9" size="1" type="uint"/>
</struct>
</genxml>

View file

@ -107,7 +107,20 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
memset(c, 0, sizeof(*c));
c->vdm_ctrl_stream_base = cs->addr;
c->ppp_ctrl = 0x202;
agx_pack(&c->ppp_ctrl, CR_PPP_CONTROL, cfg) {
/* If largePoints is not enabled, we optimize out point size writes so
* need to force points to have size 1.0 with this bit.
*
* If largePoints is enabled, we can't set this bit since our point size
* writes will get ignored.
*
* Yes, the hardware engineers messed this up. Dates back to IMG days.
*/
cfg.default_point_size = !dev->vk.enabled_features.largePoints;
cfg.enable_w_clamp = true;
cfg.fixed_point_format = 1;
}
c->width_px = cs->cr.width;
c->height_px = cs->cr.height;

View file

@ -225,6 +225,7 @@ hk_populate_fs_key(struct hk_fs_key *key,
enum hk_feature_key {
HK_FEAT_MIN_LOD = BITFIELD_BIT(0),
HK_FEAT_CUSTOM_BORDER = BITFIELD_BIT(1),
HK_FEAT_LARGE_POINTS = BITFIELD_BIT(2),
};
static enum hk_feature_key
@ -234,7 +235,8 @@ hk_make_feature_key(const struct vk_features *features)
return ~0U;
return (features->minLod ? HK_FEAT_MIN_LOD : 0) |
(features->customBorderColors ? HK_FEAT_CUSTOM_BORDER : 0);
(features->customBorderColors ? HK_FEAT_CUSTOM_BORDER : 0) |
(features->largePoints ? HK_FEAT_LARGE_POINTS : 0);
}
static void
@ -885,12 +887,35 @@ lower_uniforms(nir_builder *b, nir_intrinsic_instr *intr, void *data)
return true;
}
static void
hk_lower_hw_vs(nir_shader *nir, struct hk_shader *shader,
enum hk_feature_key features)
{
if (features & HK_FEAT_LARGE_POINTS) {
/* Point size must be clamped, excessively large points don't render
* properly on G13.
*
* Must be synced with pointSizeRange.
*/
NIR_PASS(_, nir, nir_lower_point_size, 1.0f, 511.95f);
/* TODO: Optimize out for monolithic? */
NIR_PASS(_, nir, nir_lower_default_point_size);
}
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
NIR_PASS(_, nir, agx_nir_lower_cull_distance_vs);
NIR_PASS(_, nir, agx_nir_lower_uvs, &shader->info.uvs);
}
static VkResult
hk_compile_nir(struct hk_device *dev, const VkAllocationCallbacks *pAllocator,
nir_shader *nir, VkShaderCreateFlagsEXT shader_flags,
const struct vk_pipeline_robustness_state *rs,
const struct hk_fs_key *fs_key, struct hk_shader *shader,
gl_shader_stage sw_stage, bool hw, nir_xfb_info *xfb_info)
const struct hk_fs_key *fs_key, enum hk_feature_key features,
struct hk_shader *shader, gl_shader_stage sw_stage, bool hw,
nir_xfb_info *xfb_info)
{
unsigned nr_vbos = 0;
@ -946,11 +971,15 @@ hk_compile_nir(struct hk_device *dev, const VkAllocationCallbacks *pAllocator,
}
uint64_t outputs = nir->info.outputs_written;
if (!hw &&
(sw_stage == MESA_SHADER_VERTEX || sw_stage == MESA_SHADER_TESS_EVAL)) {
nir->info.stage = MESA_SHADER_COMPUTE;
memset(&nir->info.cs, 0, sizeof(nir->info.cs));
nir->xfb_info = NULL;
if (sw_stage == MESA_SHADER_VERTEX || sw_stage == MESA_SHADER_TESS_EVAL) {
if (hw) {
hk_lower_hw_vs(nir, shader, features);
} else {
NIR_PASS(_, nir, agx_nir_lower_vs_before_gs);
nir->info.stage = MESA_SHADER_COMPUTE;
memset(&nir->info.cs, 0, sizeof(nir->info.cs));
nir->xfb_info = NULL;
}
}
struct fixed_uniforms f = {.root = 0, .image_heap = 4};
@ -1097,25 +1126,6 @@ hk_api_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader,
vk_shader_free(&dev->vk, pAllocator, &obj->vk);
}
static void
hk_lower_hw_vs(nir_shader *nir, struct hk_shader *shader)
{
/* Point size must be clamped, excessively large points don't render
* properly on G13.
*
* Must be synced with pointSizeRange.
*/
NIR_PASS(_, nir, nir_lower_point_size, 1.0f, 511.95f);
/* TODO: Optimize out for monolithic? */
NIR_PASS(_, nir, nir_lower_default_point_size);
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
NIR_PASS(_, nir, agx_nir_lower_cull_distance_vs);
NIR_PASS(_, nir, agx_nir_lower_uvs, &shader->info.uvs);
}
VkResult
hk_compile_shader(struct hk_device *dev, struct vk_shader_compile_info *info,
const struct vk_graphics_pipeline_state *state,
@ -1188,7 +1198,7 @@ hk_compile_shader(struct hk_device *dev, struct vk_shader_compile_info *info,
if (!rast_disc) {
struct hk_shader *shader = &obj->variants[HK_GS_VARIANT_RAST];
hk_lower_hw_vs(rast, shader);
hk_lower_hw_vs(rast, shader, features);
shader->info.gs = count_variant->info.gs;
}
@ -1206,9 +1216,10 @@ hk_compile_shader(struct hk_device *dev, struct vk_shader_compile_info *info,
for (unsigned v = 0; v < ARRAY_SIZE(variants); ++v) {
if (variants[v].in) {
result = hk_compile_nir(dev, pAllocator, variants[v].in,
info->flags, info->robustness, NULL,
variants[v].out, sw_stage, true, NULL);
result =
hk_compile_nir(dev, pAllocator, variants[v].in, info->flags,
info->robustness, NULL, features,
variants[v].out, sw_stage, true, NULL);
if (result != VK_SUCCESS) {
hk_api_shader_destroy(&dev->vk, &obj->vk, pAllocator);
if (clone != nir) {
@ -1285,16 +1296,10 @@ hk_compile_shader(struct hk_device *dev, struct vk_shader_compile_info *info,
nir->info.inputs_read >> VERT_ATTRIB_GENERIC0;
}
if (hw) {
hk_lower_hw_vs(clone, shader);
} else {
NIR_PASS(_, clone, agx_nir_lower_vs_before_gs);
}
/* hk_compile_nir takes ownership of the clone */
result = hk_compile_nir(dev, pAllocator, clone, info->flags,
info->robustness, fs_key, shader, sw_stage, hw,
nir->xfb_info);
info->robustness, fs_key, features, shader,
sw_stage, hw, nir->xfb_info);
if (result != VK_SUCCESS) {
hk_api_shader_destroy(&dev->vk, &obj->vk, pAllocator);
ralloc_free(nir);
@ -1307,7 +1312,7 @@ hk_compile_shader(struct hk_device *dev, struct vk_shader_compile_info *info,
/* hk_compile_nir takes ownership of nir */
result =
hk_compile_nir(dev, pAllocator, nir, info->flags, info->robustness,
fs_key, shader, sw_stage, true, NULL);
fs_key, features, shader, sw_stage, true, NULL);
if (result != VK_SUCCESS) {
hk_api_shader_destroy(&dev->vk, &obj->vk, pAllocator);
return result;