tu, ir3: Handle FDM shader builtins

Also, add the necessary transform to fixup gl_FragCoord.xy.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20304>
This commit is contained in:
Connor Abbott 2022-11-25 14:11:12 +01:00 committed by Marge Bot
parent b474ed1f3a
commit 6f2be52487
10 changed files with 309 additions and 13 deletions

View file

@ -1205,6 +1205,12 @@ intrinsic("cond_end_ir3", src_comp=[1])
# signals the TE that the patch is complete and can be tessellated. # signals the TE that the patch is complete and can be tessellated.
intrinsic("end_patch_ir3") intrinsic("end_patch_ir3")
# Per-view gl_FragSizeEXT and gl_FragCoord offset.
intrinsic("load_frag_size_ir3", src_comp=[1], dest_comp=2, indices=[RANGE],
flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
intrinsic("load_frag_offset_ir3", src_comp=[1], dest_comp=2, indices=[RANGE],
flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
# IR3-specific load/store intrinsics. These access a buffer used to pass data # IR3-specific load/store intrinsics. These access a buffer used to pass data
# between geometry stages - perhaps it's explicit access to the vertex cache. # between geometry stages - perhaps it's explicit access to the vertex cache.

View file

@ -112,6 +112,17 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
return create_uniform(ctx->block, r); return create_uniform(ctx->block, r);
} }
static struct ir3_instruction *
create_driver_param_indirect(struct ir3_context *ctx, enum ir3_driver_param dp,
struct ir3_instruction *address)
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
struct ir3_const_state *const_state = ir3_const_state(ctx->so);
unsigned n = const_state->offsets.driver_param;
return create_uniform_indirect(ctx->block, n * 4 + dp, TYPE_U32, address);
}
/* /*
* Adreno's comparisons produce a 1 for true and 0 for false, in either 16 or * Adreno's comparisons produce a 1 for true and 0 for false, in either 16 or
* 32-bit registers. We use NIR's 1-bit integers to represent bools, and * 32-bit registers. We use NIR's 1-bit integers to represent bools, and
@ -2367,6 +2378,32 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_INNER_LEVEL_X + i); dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_INNER_LEVEL_X + i);
} }
break; break;
case nir_intrinsic_load_frag_invocation_count:
dst[0] = create_driver_param(ctx, IR3_DP_FS_FRAG_INVOCATION_COUNT);
break;
case nir_intrinsic_load_frag_size_ir3:
case nir_intrinsic_load_frag_offset_ir3: {
enum ir3_driver_param param =
intr->intrinsic == nir_intrinsic_load_frag_size_ir3 ?
IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET;
if (nir_src_is_const(intr->src[0])) {
uint32_t view = nir_src_as_uint(intr->src[0]);
for (int i = 0; i < dest_components; i++) {
dst[i] = create_driver_param(ctx, param + 4 * view + i);
}
} else {
struct ir3_instruction *view = ir3_get_src(ctx, &intr->src[0])[0];
for (int i = 0; i < dest_components; i++) {
dst[i] = create_driver_param_indirect(ctx, param + i,
ir3_get_addr0(ctx, view, 4));
}
ctx->so->constlen =
MAX2(ctx->so->constlen,
const_state->offsets.driver_param + param / 4 +
nir_intrinsic_range(intr));
}
break;
}
case nir_intrinsic_discard_if: case nir_intrinsic_discard_if:
case nir_intrinsic_discard: case nir_intrinsic_discard:
case nir_intrinsic_demote: case nir_intrinsic_demote:

View file

@ -924,6 +924,20 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
layout->num_driver_params = MAX2(layout->num_driver_params, layout->num_driver_params = MAX2(layout->num_driver_params,
IR3_DP_HS_DEFAULT_INNER_LEVEL_Y + 1); IR3_DP_HS_DEFAULT_INNER_LEVEL_Y + 1);
break; break;
case nir_intrinsic_load_frag_size_ir3:
layout->num_driver_params = MAX2(layout->num_driver_params,
IR3_DP_FS_FRAG_SIZE + 2 +
(nir_intrinsic_range(intr) - 1) * 4);
break;
case nir_intrinsic_load_frag_offset_ir3:
layout->num_driver_params = MAX2(layout->num_driver_params,
IR3_DP_FS_FRAG_OFFSET + 2 +
(nir_intrinsic_range(intr) - 1) * 4);
break;
case nir_intrinsic_load_frag_invocation_count:
layout->num_driver_params = MAX2(layout->num_driver_params,
IR3_DP_FS_FRAG_INVOCATION_COUNT + 1);
break;
default: default:
break; break;
} }

View file

@ -86,6 +86,11 @@ enum ir3_driver_param {
/* fragment shader driver params: */ /* fragment shader driver params: */
IR3_DP_FS_SUBGROUP_SIZE = 0, IR3_DP_FS_SUBGROUP_SIZE = 0,
/* Dynamic params (that aren't known when compiling the shader) */
IR3_DP_FS_DYNAMIC = 4,
IR3_DP_FS_FRAG_INVOCATION_COUNT = IR3_DP_FS_DYNAMIC,
IR3_DP_FS_FRAG_SIZE = IR3_DP_FS_DYNAMIC + 4,
IR3_DP_FS_FRAG_OFFSET = IR3_DP_FS_DYNAMIC + 6,
}; };
#define IR3_MAX_SHADER_BUFFERS 32 #define IR3_MAX_SHADER_BUFFERS 32

View file

@ -648,6 +648,7 @@ tu6_update_msaa_samples(struct tu_cmd_buffer *cmd, VkSampleCountFlagBits samples
{ {
if (cmd->state.samples != samples) { if (cmd->state.samples != samples) {
cmd->state.samples = samples; cmd->state.samples = samples;
cmd->state.dirty |= TU_CMD_DIRTY_FS_PARAMS;
tu6_update_msaa(cmd); tu6_update_msaa(cmd);
} }
} }
@ -1489,7 +1490,8 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
*/ */
if (cmd->state.pass->has_fdm) { if (cmd->state.pass->has_fdm) {
cmd->state.dirty |= cmd->state.dirty |=
TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS; TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS |
TU_CMD_DIRTY_FS_PARAMS;
} }
} }
@ -1736,7 +1738,8 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
*/ */
if (cmd->state.pass->has_fdm) { if (cmd->state.pass->has_fdm) {
cmd->state.dirty |= cmd->state.dirty |=
TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS; TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS |
TU_CMD_DIRTY_FS_PARAMS;
} }
/* tu6_render_tile has cloned these tracepoints for each tile */ /* tu6_render_tile has cloned these tracepoints for each tile */
@ -2847,7 +2850,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
cmd->state.pipeline = pipeline; cmd->state.pipeline = pipeline;
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS | TU_CMD_DIRTY_SHADER_CONSTS | cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS | TU_CMD_DIRTY_SHADER_CONSTS |
TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS; TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS |
TU_CMD_DIRTY_FS_PARAMS;
if (pipeline->output.feedback_loop_may_involve_textures && if (pipeline->output.feedback_loop_may_involve_textures &&
!cmd->state.rp.disable_gmem) { !cmd->state.rp.disable_gmem) {
@ -5269,6 +5273,121 @@ fdm_apply_scissors(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views,
tu6_emit_scissor(cs, scissors, state->num_scissors); tu6_emit_scissor(cs, scissors, state->num_scissors);
} }
static uint32_t
fs_params_offset(struct tu_cmd_buffer *cmd)
{
const struct tu_program_descriptor_linkage *link =
&cmd->state.pipeline->program.link[MESA_SHADER_FRAGMENT];
const struct ir3_const_state *const_state = &link->const_state;
if (const_state->num_driver_params <= IR3_DP_FS_DYNAMIC)
return 0;
if (const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4 >= link->constlen)
return 0;
return const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4;
}
static uint32_t
fs_params_size(struct tu_cmd_buffer *cmd)
{
const struct tu_program_descriptor_linkage *link =
&cmd->state.pipeline->program.link[MESA_SHADER_FRAGMENT];
const struct ir3_const_state *const_state = &link->const_state;
return DIV_ROUND_UP(const_state->num_driver_params - IR3_DP_FS_DYNAMIC, 4);
}
struct apply_fs_params_state {
unsigned num_consts;
};
static void
fdm_apply_fs_params(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views,
VkExtent2D *frag_areas)
{
const struct apply_fs_params_state *state =
(const struct apply_fs_params_state *)data;
unsigned num_consts = state->num_consts;
for (unsigned i = 0; i < num_consts; i++) {
assert(i < views);
VkExtent2D area = frag_areas[i];
VkOffset2D offset = fdm_per_bin_offset(area, bin);
tu_cs_emit(cs, area.width);
tu_cs_emit(cs, area.height);
tu_cs_emit(cs, fui(offset.x));
tu_cs_emit(cs, fui(offset.y));
}
}
static void
tu6_emit_fs_params(struct tu_cmd_buffer *cmd)
{
uint32_t offset = fs_params_offset(cmd);
if (offset == 0) {
cmd->state.fs_params = (struct tu_draw_state) {};
return;
}
struct tu_pipeline *pipeline = cmd->state.pipeline;
unsigned num_units = fs_params_size(cmd);
if (pipeline->fs.fragment_density_map)
tu_cs_set_writeable(&cmd->sub_cs, true);
struct tu_cs cs;
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 4 + 4 * num_units, &cs);
if (result != VK_SUCCESS) {
tu_cs_set_writeable(&cmd->sub_cs, false);
vk_command_buffer_set_error(&cmd->vk, result);
return;
}
tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_units);
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
CP_LOAD_STATE6_0_NUM_UNIT(num_units));
tu_cs_emit(&cs, 0);
tu_cs_emit(&cs, 0);
STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC);
tu_cs_emit(&cs, pipeline->program.per_samp ? cmd->state.samples : 1);
tu_cs_emit(&cs, 0);
tu_cs_emit(&cs, 0);
tu_cs_emit(&cs, 0);
STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4);
STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6);
if (num_units > 1) {
if (pipeline->fs.fragment_density_map) {
struct apply_fs_params_state state = {
.num_consts = num_units - 1,
};
tu_create_fdm_bin_patchpoint(cmd, &cs, 4 * (num_units - 1),
fdm_apply_fs_params, state);
} else {
for (unsigned i = 1; i < num_units; i++) {
tu_cs_emit(&cs, 1);
tu_cs_emit(&cs, 1);
tu_cs_emit(&cs, fui(0.0f));
tu_cs_emit(&cs, fui(0.0f));
}
}
}
cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs);
if (pipeline->fs.fragment_density_map)
tu_cs_set_writeable(&cmd->sub_cs, false);
}
static VkResult static VkResult
tu6_draw_common(struct tu_cmd_buffer *cmd, tu6_draw_common(struct tu_cmd_buffer *cmd,
struct tu_cs *cs, struct tu_cs *cs,
@ -5464,6 +5583,9 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
if (dirty & TU_CMD_DIRTY_DESC_SETS) if (dirty & TU_CMD_DIRTY_DESC_SETS)
tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
if (dirty & TU_CMD_DIRTY_FS_PARAMS)
tu6_emit_fs_params(cmd);
/* for the first draw in a renderpass, re-emit all the draw states /* for the first draw in a renderpass, re-emit all the draw states
* *
* and if a draw-state disabling path (CmdClearAttachments 3D fallback) was * and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
@ -5487,6 +5609,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_MSAA, cmd->state.msaa); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_MSAA, cmd->state.msaa);
@ -5507,6 +5630,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
((dirty & TU_CMD_DIRTY_DESC_SETS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_DESC_SETS) ? 1 : 0) +
((dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
((dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) +
((dirty & TU_CMD_DIRTY_FS_PARAMS) ? 1 : 0) +
(dirty_lrz ? 1 : 0); (dirty_lrz ? 1 : 0);
if ((dirty & TU_CMD_DIRTY_VB_STRIDE) && if ((dirty & TU_CMD_DIRTY_VB_STRIDE) &&
@ -5553,6 +5677,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
} }
if (dirty & TU_CMD_DIRTY_VS_PARAMS) if (dirty & TU_CMD_DIRTY_VS_PARAMS)
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
if (dirty & TU_CMD_DIRTY_FS_PARAMS)
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params);
if (dirty_lrz) { if (dirty_lrz) {
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state);

View file

@ -29,6 +29,7 @@ enum tu_draw_state_group_id
TU_DRAW_STATE_DESC_SETS, TU_DRAW_STATE_DESC_SETS,
TU_DRAW_STATE_DESC_SETS_LOAD, TU_DRAW_STATE_DESC_SETS_LOAD,
TU_DRAW_STATE_VS_PARAMS, TU_DRAW_STATE_VS_PARAMS,
TU_DRAW_STATE_FS_PARAMS,
TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM, TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,
TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM, TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE,
@ -62,13 +63,14 @@ enum tu_cmd_dirty_bits
TU_CMD_DIRTY_SHADER_CONSTS = BIT(6), TU_CMD_DIRTY_SHADER_CONSTS = BIT(6),
TU_CMD_DIRTY_LRZ = BIT(7), TU_CMD_DIRTY_LRZ = BIT(7),
TU_CMD_DIRTY_VS_PARAMS = BIT(8), TU_CMD_DIRTY_VS_PARAMS = BIT(8),
TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(9), TU_CMD_DIRTY_FS_PARAMS = BIT(9),
TU_CMD_DIRTY_VIEWPORTS = BIT(10), TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(10),
TU_CMD_DIRTY_SCISSORS = BIT(11), TU_CMD_DIRTY_VIEWPORTS = BIT(11),
TU_CMD_DIRTY_BLEND = BIT(12), TU_CMD_DIRTY_SCISSORS = BIT(12),
TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(13), TU_CMD_DIRTY_BLEND = BIT(13),
TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(14),
/* all draw states were disabled and need to be re-enabled: */ /* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = BIT(14) TU_CMD_DIRTY_DRAW_STATE = BIT(15)
}; };
/* There are only three cache domains we have to care about: the CCU, or /* There are only three cache domains we have to care about: the CCU, or
@ -462,6 +464,7 @@ struct tu_cmd_state
struct tu_draw_state msaa; struct tu_draw_state msaa;
struct tu_draw_state vs_params; struct tu_draw_state vs_params;
struct tu_draw_state fs_params;
/* Index buffer */ /* Index buffer */
uint64_t index_va; uint64_t index_va;

View file

@ -274,6 +274,7 @@ struct tu_pipeline_builder
bool subpass_feedback_loop_ds; bool subpass_feedback_loop_ds;
bool feedback_loop_may_involve_textures; bool feedback_loop_may_involve_textures;
bool fragment_density_map; bool fragment_density_map;
uint8_t unscaled_input_fragcoord;
/* Each library defines at least one piece of state in /* Each library defines at least one piece of state in
* VkGraphicsPipelineLibraryFlagsEXT, and libraries cannot overlap, so * VkGraphicsPipelineLibraryFlagsEXT, and libraries cannot overlap, so
@ -620,14 +621,14 @@ tu6_emit_xs(struct tu_cs *cs,
} }
} }
/* emit FS driver param */ /* emit statically-known FS driver param */
if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) { if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
uint32_t base = const_state->offsets.driver_param; uint32_t base = const_state->offsets.driver_param;
int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4); int32_t size = DIV_ROUND_UP(MAX2(const_state->num_driver_params, 4), 4);
size = MAX2(MIN2(size + base, xs->constlen) - base, 0); size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
if (size > 0) { if (size > 0) {
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4); tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + 4);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) | tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
@ -636,7 +637,6 @@ tu6_emit_xs(struct tu_cs *cs,
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
assert(size == 1);
tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64); tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64);
tu_cs_emit(cs, 0); tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0); tu_cs_emit(cs, 0);
@ -1941,6 +1941,7 @@ tu6_emit_program(struct tu_cs *cs,
if (fs) { if (fs) {
tu6_emit_fs_inputs(cs, fs); tu6_emit_fs_inputs(cs, fs);
tu6_emit_fs_outputs(cs, fs, pipeline); tu6_emit_fs_outputs(cs, fs, pipeline);
pipeline->program.per_samp = fs->per_samp || fs->key.sample_shading;
} else { } else {
/* TODO: check if these can be skipped if fs is disabled */ /* TODO: check if these can be skipped if fs is disabled */
struct ir3_shader_variant dummy_variant = {}; struct ir3_shader_variant dummy_variant = {};
@ -3202,6 +3203,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading;
keys[MESA_SHADER_FRAGMENT].fragment_density_map =
builder->fragment_density_map;
keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord =
builder->unscaled_input_fragcoord;
pipeline->fs.fragment_density_map = builder->fragment_density_map; pipeline->fs.fragment_density_map = builder->fragment_density_map;
} }
@ -5111,6 +5116,7 @@ tu_pipeline_builder_init_graphics(
builder->subpass_raster_order_attachment_access = false; builder->subpass_raster_order_attachment_access = false;
builder->subpass_feedback_loop_ds = false; builder->subpass_feedback_loop_ds = false;
builder->subpass_feedback_loop_color = false; builder->subpass_feedback_loop_color = false;
builder->unscaled_input_fragcoord = 0;
rendering_flags = vk_get_pipeline_rendering_flags(builder->create_info); rendering_flags = vk_get_pipeline_rendering_flags(builder->create_info);
@ -5147,6 +5153,15 @@ tu_pipeline_builder_init_graphics(
rendering_flags |= rendering_flags |=
VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT;
builder->unscaled_input_fragcoord = 0;
for (unsigned i = 0; i < subpass->input_count; i++) {
/* Input attachments stored in GMEM must be loaded with unscaled
* FragCoord.
*/
if (subpass->input_attachments[i].patch_input_gmem)
builder->unscaled_input_fragcoord |= 1u << i;
}
if (!builder->rasterizer_discard) { if (!builder->rasterizer_discard) {
const uint32_t a = subpass->depth_stencil_attachment.attachment; const uint32_t a = subpass->depth_stencil_attachment.attachment;
builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ?

View file

@ -235,6 +235,7 @@ struct tu_pipeline
uint32_t cs_instrlen; uint32_t cs_instrlen;
bool writes_viewport; bool writes_viewport;
bool per_samp;
} program; } program;
struct struct

View file

@ -41,6 +41,7 @@ tu_spirv_to_nir(struct tu_device *dev,
.draw_parameters = true, .draw_parameters = true,
.float_controls = true, .float_controls = true,
.float16 = true, .float16 = true,
.fragment_density = true,
.geometry_streams = true, .geometry_streams = true,
.image_read_without_format = true, .image_read_without_format = true,
.image_write_without_format = true, .image_write_without_format = true,
@ -846,6 +847,81 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
return progress; return progress;
} }
struct lower_fdm_options {
unsigned num_views;
bool adjust_fragcoord;
bool multiview;
};
static bool
lower_fdm_filter(const nir_instr *instr, const void *data)
{
const struct lower_fdm_options *options =
(const struct lower_fdm_options *)data;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
return intrin->intrinsic == nir_intrinsic_load_frag_size ||
(intrin->intrinsic == nir_intrinsic_load_frag_coord &&
options->adjust_fragcoord);
}
static nir_ssa_def *
lower_fdm_instr(struct nir_builder *b, nir_instr *instr, void *data)
{
const struct lower_fdm_options *options =
(const struct lower_fdm_options *)data;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_ssa_def *view;
if (options->multiview) {
nir_variable *view_var =
nir_find_variable_with_location(b->shader, nir_var_shader_in,
VARYING_SLOT_VIEW_INDEX);
if (view_var == NULL) {
view_var = nir_variable_create(b->shader, nir_var_shader_in,
glsl_int_type(), NULL);
view_var->data.location = VARYING_SLOT_VIEW_INDEX;
view_var->data.interpolation = INTERP_MODE_FLAT;
view_var->data.driver_location = b->shader->num_inputs++;
}
view = nir_load_var(b, view_var);
} else {
view = nir_imm_int(b, 0);
}
nir_ssa_def *frag_size =
nir_load_frag_size_ir3(b, view, .range = options->num_views);
if (intrin->intrinsic == nir_intrinsic_load_frag_coord) {
nir_ssa_def *frag_offset =
nir_load_frag_offset_ir3(b, view, .range = options->num_views);
nir_ssa_def *unscaled_coord = nir_load_frag_coord_unscaled_ir3(b);
nir_ssa_def *xy = nir_channels(b, unscaled_coord, 0x3);
xy = nir_fmul(b, nir_fsub(b, xy, frag_offset), nir_i2f32(b, frag_size));
return nir_vec4(b,
nir_channel(b, xy, 0),
nir_channel(b, xy, 1),
nir_channel(b, unscaled_coord, 2),
nir_channel(b, unscaled_coord, 3));
}
assert(intrin->intrinsic == nir_intrinsic_load_frag_size);
return frag_size;
}
static bool
tu_nir_lower_fdm(nir_shader *shader, const struct lower_fdm_options *options)
{
return nir_shader_lower_instructions(shader, lower_fdm_filter,
lower_fdm_instr, (void *)options);
}
static void static void
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{ {
@ -931,10 +1007,21 @@ tu_shader_create(struct tu_device *dev,
* multiview is enabled. * multiview is enabled.
*/ */
.use_view_id_for_layer = key->multiview_mask != 0, .use_view_id_for_layer = key->multiview_mask != 0,
.unscaled_input_attachment_ir3 = key->unscaled_input_fragcoord,
}; };
NIR_PASS_V(nir, nir_lower_input_attachments, &att_options); NIR_PASS_V(nir, nir_lower_input_attachments, &att_options);
} }
/* This has to happen before lower_input_attachments, because we have to
* lower input attachment coordinates except if unscaled.
*/
const struct lower_fdm_options fdm_options = {
.num_views = MAX2(util_last_bit(key->multiview_mask), 1),
.adjust_fragcoord = key->fragment_density_map,
};
NIR_PASS_V(nir, tu_nir_lower_fdm, &fdm_options);
/* This needs to happen before multiview lowering which rewrites store /* This needs to happen before multiview lowering which rewrites store
* instructions of the position variable, so that we can just rewrite one * instructions of the position variable, so that we can just rewrite one
* store at the end instead of having to rewrite every store specified by * store at the end instead of having to rewrite every store specified by

View file

@ -54,6 +54,8 @@ struct tu_shader
struct tu_shader_key { struct tu_shader_key {
unsigned multiview_mask; unsigned multiview_mask;
bool force_sample_interp; bool force_sample_interp;
bool fragment_density_map;
uint8_t unscaled_input_fragcoord;
enum ir3_wavesize_option api_wavesize, real_wavesize; enum ir3_wavesize_option api_wavesize, real_wavesize;
}; };