mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-20 02:38:07 +02:00
kk: Support shaderCullDistance
Uses an approach based on HoneyKrisp. In the vertex shader, an extra output writes 1 if the cull distance is >= 0, otherwise it writes 0. In the fragment shader, if the extra outputs from the vertex shader interpolate zero, all cull distances are < 0, so the primitive is culled by discarding fragments. Reviewed-by: Arcady Goldmints-Orlov <arcady@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41463>
This commit is contained in:
parent
e81a292165
commit
2f63d09270
9 changed files with 150 additions and 41 deletions
|
|
@ -163,7 +163,6 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size)
|
|||
NIR_PASS(_, nir, nir_opt_idiv_const, 16);
|
||||
|
||||
msl_lower_textures(nir);
|
||||
msl_lower_nir_late(nir);
|
||||
|
||||
optimize(nir);
|
||||
|
||||
|
|
|
|||
|
|
@ -68,6 +68,12 @@ static const struct {
|
|||
[VARYING_SLOT_VIEWPORT] = {"viewport_array_index"},
|
||||
[VARYING_SLOT_CLIP_DIST0] = {"clip_0", .user = true, .scalarized = true},
|
||||
[VARYING_SLOT_CLIP_DIST1] = {"clip_1", .user = true, .scalarized = true},
|
||||
[VARYING_SLOT_CULL_DIST0] = {"cull_0", .user = true, .scalarized = true},
|
||||
[VARYING_SLOT_CULL_DIST1] = {"cull_1", .user = true, .scalarized = true},
|
||||
[VARYING_SLOT_CULL_PRIMITIVE] = {"cull_primitive_0", .user = true, .scalarized = true},
|
||||
/* Using cull primitive slots to emulate cull distances in fragment shader,
|
||||
* which may extend to one varying extra (which is otherwise unused) */
|
||||
[VARYING_SLOT_CULL_PRIMITIVE + 1] = {"cull_primitive_1", .user = true, .scalarized = true},
|
||||
[VARYING_SLOT_VAR0] = {"vary_00", .user = true},
|
||||
[VARYING_SLOT_VAR1] = {"vary_01", .user = true},
|
||||
[VARYING_SLOT_VAR2] = {"vary_02", .user = true},
|
||||
|
|
@ -183,7 +189,7 @@ vs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
|
|||
}
|
||||
|
||||
if (shader->info.clip_distance_array_size)
|
||||
P_IND(ctx, "float gl_ClipDistance [[clip_distance]] [%d];",
|
||||
P_IND(ctx, "float gl_ClipDistance [[clip_distance]] [%d];\n",
|
||||
shader->info.clip_distance_array_size);
|
||||
ctx->indentlevel--;
|
||||
P(ctx, "};\n");
|
||||
|
|
|
|||
|
|
@ -413,28 +413,141 @@ msl_nir_lower_sample_shading(nir_shader *nir)
|
|||
}
|
||||
|
||||
static bool
|
||||
lower_clip_distance(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
lower_clip_cull_distance_write(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
UNUSED void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
|
||||
unsigned component = nir_intrinsic_component(intr);
|
||||
if (io.location != VARYING_SLOT_CLIP_DIST0 &&
|
||||
io.location != VARYING_SLOT_CLIP_DIST1)
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (sem.location != VARYING_SLOT_CLIP_DIST0 &&
|
||||
sem.location != VARYING_SLOT_CLIP_DIST1 &&
|
||||
sem.location != VARYING_SLOT_CULL_DIST0 &&
|
||||
sem.location != VARYING_SLOT_CULL_DIST1)
|
||||
return false;
|
||||
|
||||
unsigned base = (io.location - VARYING_SLOT_CLIP_DIST0) * 4 + component;
|
||||
if (intr->intrinsic == nir_intrinsic_store_output) {
|
||||
assert(nir_src_num_components(intr->src[0]) == 1 && "must be scalarized");
|
||||
|
||||
signed location = sem.location + nir_src_as_uint(intr->src[1]);
|
||||
|
||||
if (sem.location == VARYING_SLOT_CLIP_DIST0 ||
|
||||
sem.location == VARYING_SLOT_CLIP_DIST1) {
|
||||
/* Clip distance, add write to MSL clip_distance output */
|
||||
unsigned component =
|
||||
(location - VARYING_SLOT_CLIP_DIST0) * 4 +
|
||||
nir_intrinsic_component(intr);
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
nir_store_clip_distance_kk(b, intr->src[0].ssa, .base = base);
|
||||
nir_store_clip_distance_kk(b, intr->src[0].ssa, .base = component);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (sem.location == VARYING_SLOT_CULL_DIST0 ||
|
||||
sem.location == VARYING_SLOT_CULL_DIST1) {
|
||||
/* Cull distance, add write to cull primitive output */
|
||||
unsigned component =
|
||||
(location - VARYING_SLOT_CULL_DIST0) * 4 +
|
||||
nir_intrinsic_component(intr);
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_def *offs = nir_imm_int(b, component / 4);
|
||||
nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0));
|
||||
|
||||
nir_store_output(b, v, offs, .component = component % 4,
|
||||
.src_type = nir_type_float32,
|
||||
.io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE,
|
||||
.io_semantics.num_slots = 2);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
msl_nir_lower_clip_cull_distance_vs(nir_shader *s)
|
||||
{
|
||||
if (s->info.clip_distance_array_size == 0 &&
|
||||
s->info.cull_distance_array_size == 0)
|
||||
return false;
|
||||
|
||||
nir_shader_intrinsics_pass(s, lower_clip_cull_distance_write,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
if (s->info.cull_distance_array_size > 0)
|
||||
s->info.outputs_written |=
|
||||
BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
|
||||
DIV_ROUND_UP(s->info.cull_distance_array_size, 4));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
msl_nir_lower_clip_distance(nir_shader *nir)
|
||||
static bool
|
||||
msl_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(nir, lower_clip_distance, nir_metadata_all,
|
||||
NULL);
|
||||
assert(s->info.stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
if (nr_distances == 0)
|
||||
return false;
|
||||
|
||||
nir_builder b_ =
|
||||
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
|
||||
nir_builder *b = &b_;
|
||||
|
||||
/* Test each half-space */
|
||||
nir_def *culled = nir_imm_false(b);
|
||||
|
||||
for (unsigned i = 0; i < nr_distances; ++i) {
|
||||
/* Load the cull primitive input for this cull distance */
|
||||
nir_def *baryc = nir_load_barycentric_pixel(
|
||||
b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
nir_def *cull = nir_load_interpolated_input(
|
||||
b, 1, 32, baryc, nir_imm_int(b, 0),
|
||||
.component = i & 3,
|
||||
.io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4),
|
||||
.io_semantics.num_slots = nr_distances / 4);
|
||||
|
||||
/* When the cull distance is negative in the vertex shader, the resulting
|
||||
* cull primitive output is zero, otherwise it is one. Thus, the
|
||||
* interpolated value will be zero only if all of its vertices had
|
||||
* negative cull distances, indicating the primitive should be called.
|
||||
* Note that, since the value is interpolated at the pixel center, we
|
||||
* don't have to worry about corner values. */
|
||||
culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cull, 0)));
|
||||
|
||||
}
|
||||
|
||||
/* Emulate primitive culling by discarding fragments */
|
||||
nir_demote_if(b, culled);
|
||||
|
||||
s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
|
||||
DIV_ROUND_UP(nr_distances, 4));
|
||||
|
||||
s->info.fs.uses_discard = true;
|
||||
return nir_progress(true, b->impl, nir_metadata_control_flow);
|
||||
}
|
||||
|
||||
/* Scalarize stores to CLIP_DIST* varyings */
|
||||
static bool
|
||||
scalarize_clip_cull_distance_filter(const nir_intrinsic_instr *intrin,
|
||||
UNUSED const void *_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
|
||||
return semantics.location == VARYING_SLOT_CLIP_DIST0 ||
|
||||
semantics.location == VARYING_SLOT_CLIP_DIST1 ||
|
||||
semantics.location == VARYING_SLOT_CULL_DIST0 ||
|
||||
semantics.location == VARYING_SLOT_CULL_DIST1;
|
||||
}
|
||||
|
||||
void
|
||||
msl_nir_lower_clip_cull_distance(nir_shader *nir, unsigned num_cull_distances)
|
||||
{
|
||||
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out,
|
||||
scalarize_clip_cull_distance_filter, NULL);
|
||||
NIR_PASS(_, nir, nir_separate_merged_clip_cull_io);
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(_, nir, msl_nir_lower_cull_distance_fs, num_cull_distances);
|
||||
else
|
||||
NIR_PASS(_, nir, msl_nir_lower_clip_cull_distance_vs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,5 +86,3 @@ bool msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def);
|
|||
void msl_nir_lower_subgroups(nir_shader *nir);
|
||||
|
||||
bool msl_nir_lower_algebraic_late(nir_shader *shader);
|
||||
|
||||
bool msl_nir_lower_clip_distance(nir_shader *nir);
|
||||
|
|
|
|||
|
|
@ -2125,26 +2125,6 @@ void msl_preprocess_nir_workarounds(struct nir_shader *nir,
|
|||
}
|
||||
}
|
||||
|
||||
/* Scalarize stores to CLIP_DIST* varyings */
|
||||
static bool
|
||||
scalarize_clip_distance_filter(const nir_intrinsic_instr *intrin,
|
||||
UNUSED const void *_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
|
||||
return semantics.location == VARYING_SLOT_CLIP_DIST0 ||
|
||||
semantics.location == VARYING_SLOT_CLIP_DIST1;
|
||||
}
|
||||
|
||||
void
|
||||
msl_lower_nir_late(nir_shader *nir)
|
||||
{
|
||||
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out,
|
||||
scalarize_clip_distance_filter, NULL);
|
||||
NIR_PASS(_, nir, msl_nir_lower_clip_distance);
|
||||
}
|
||||
|
||||
static void
|
||||
msl_gather_info(struct nir_to_msl_ctx *ctx, struct nir_to_msl_options *options)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -76,7 +76,8 @@ bool msl_nir_fs_io_types(nir_shader *nir);
|
|||
bool msl_nir_vs_io_types(nir_shader *nir);
|
||||
bool msl_nir_fake_guard_for_discards(struct nir_shader *nir);
|
||||
bool msl_nir_lower_sample_shading(nir_shader *nir);
|
||||
void msl_lower_nir_late(nir_shader *nir);
|
||||
void msl_nir_lower_clip_cull_distance(nir_shader *nir,
|
||||
unsigned num_cull_distances);
|
||||
|
||||
bool msl_gather_uses_per_draw_data(nir_shader *nir);
|
||||
|
||||
|
|
|
|||
|
|
@ -193,6 +193,7 @@ kk_get_device_features(
|
|||
.samplerAnisotropy = true,
|
||||
.sampleRateShading = true,
|
||||
.shaderClipDistance = true,
|
||||
.shaderCullDistance = true,
|
||||
.shaderImageGatherExtended = true,
|
||||
.shaderInt16 = true,
|
||||
.shaderInt64 = true,
|
||||
|
|
|
|||
|
|
@ -643,6 +643,7 @@ gather_shader_info(struct kk_shader *shader, nir_shader *nir,
|
|||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
nir_shader_intrinsics_pass(nir, gather_vs_inputs, nir_metadata_all,
|
||||
&shader->info.vs.attribs_read);
|
||||
shader->info.vs.num_cull_distances = nir->info.cull_distance_array_size;
|
||||
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Some meta shaders like vk-meta-resolve will have depth_layout as NONE
|
||||
* which is not a valid Metal layout */
|
||||
|
|
@ -687,6 +688,7 @@ modify_nir_info(nir_shader *nir)
|
|||
|
||||
static VkResult
|
||||
kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info,
|
||||
struct kk_shader *prev_stage,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
struct vk_shader **shader_out)
|
||||
|
|
@ -715,7 +717,9 @@ kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info,
|
|||
if (info->stage == MESA_SHADER_VERTEX) {
|
||||
kk_lower_vs_vbo(nir, state, info->robustness);
|
||||
}
|
||||
msl_lower_nir_late(nir);
|
||||
unsigned num_cull_distances =
|
||||
prev_stage ? prev_stage->info.vs.num_cull_distances : 0;
|
||||
msl_nir_lower_clip_cull_distance(nir, num_cull_distances);
|
||||
msl_optimize_nir(nir);
|
||||
modify_nir_info(nir);
|
||||
|
||||
|
|
@ -780,7 +784,7 @@ kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir,
|
|||
|
||||
struct vk_shader *shader = NULL;
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
VkResult result = kk_compile_shader(dev, &info, NULL, alloc, &shader);
|
||||
VkResult result = kk_compile_shader(dev, &info, NULL, NULL, alloc, &shader);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
|
@ -1162,8 +1166,11 @@ kk_compile_shaders(struct vk_device *device, uint32_t shader_count,
|
|||
nir_opts, NULL);
|
||||
|
||||
for (uint32_t i = 0; i < shader_count; i++) {
|
||||
struct kk_shader *prev_stage = i > 0 ?
|
||||
container_of(shaders_out[i - 1], struct kk_shader, vk) : NULL;
|
||||
result =
|
||||
kk_compile_shader(dev, &infos[i], state, pAllocator, &shaders_out[i]);
|
||||
kk_compile_shader(dev, &infos[i], prev_stage, state, pAllocator,
|
||||
&shaders_out[i]);
|
||||
if (result != VK_SUCCESS) {
|
||||
/* Clean up all the shaders before this point */
|
||||
for (uint32_t j = 0; j < i; j++)
|
||||
|
|
@ -1203,7 +1210,8 @@ kk_compile_shaders(struct vk_device *device, uint32_t shader_count,
|
|||
};
|
||||
struct vk_shader *frag_shader;
|
||||
result =
|
||||
kk_compile_shader(dev, &info, state, &dev->vk.alloc, &frag_shader);
|
||||
kk_compile_shader(dev, &info, fs, state, &dev->vk.alloc,
|
||||
&frag_shader);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
for (uint32_t i = 0; i < shader_count; i++)
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ struct kk_shader_info {
|
|||
uint32_t attribs_read;
|
||||
uint32_t sample_count;
|
||||
|
||||
/* Required for fragment shader cull distance discards. */
|
||||
uint8_t num_cull_distances;
|
||||
|
||||
/* Data needed for serialization. */
|
||||
enum mtl_primitive_topology_class topology;
|
||||
enum mtl_pixel_format rt_formats[MAX_DRAW_BUFFERS];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue