From 6f2be52487bf2f84f1b4d12b27275847ff6facf0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 25 Nov 2022 14:11:12 +0100 Subject: [PATCH] tu, ir3: Handle FDM shader builtins Also, add the necessary transform to fixup gl_FragCoord.xy. Part-of: --- src/compiler/nir/nir_intrinsics.py | 6 ++ src/freedreno/ir3/ir3_compiler_nir.c | 37 ++++++++ src/freedreno/ir3/ir3_nir.c | 14 +++ src/freedreno/ir3/ir3_shader.h | 5 + src/freedreno/vulkan/tu_cmd_buffer.cc | 132 +++++++++++++++++++++++++- src/freedreno/vulkan/tu_cmd_buffer.h | 15 +-- src/freedreno/vulkan/tu_pipeline.cc | 23 ++++- src/freedreno/vulkan/tu_pipeline.h | 1 + src/freedreno/vulkan/tu_shader.cc | 87 +++++++++++++++++ src/freedreno/vulkan/tu_shader.h | 2 + 10 files changed, 309 insertions(+), 13 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 18763e1c2e3..f09af197451 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1205,6 +1205,12 @@ intrinsic("cond_end_ir3", src_comp=[1]) # signals the TE that the patch is complete and can be tessellated. intrinsic("end_patch_ir3") +# Per-view gl_FragSizeEXT and gl_FragCoord offset. +intrinsic("load_frag_size_ir3", src_comp=[1], dest_comp=2, indices=[RANGE], + flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) +intrinsic("load_frag_offset_ir3", src_comp=[1], dest_comp=2, indices=[RANGE], + flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) + # IR3-specific load/store intrinsics. These access a buffer used to pass data # between geometry stages - perhaps it's explicit access to the vertex cache. diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 9d573a47f5e..19f9cf6fd49 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -112,6 +112,17 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) return create_uniform(ctx->block, r); } +static struct ir3_instruction * +create_driver_param_indirect(struct ir3_context *ctx, enum ir3_driver_param dp, + struct ir3_instruction *address) +{ + /* first four vec4 sysval's reserved for UBOs: */ + /* NOTE: dp is in scalar, but there can be >4 dp components: */ + struct ir3_const_state *const_state = ir3_const_state(ctx->so); + unsigned n = const_state->offsets.driver_param; + return create_uniform_indirect(ctx->block, n * 4 + dp, TYPE_U32, address); +} + /* * Adreno's comparisons produce a 1 for true and 0 for false, in either 16 or * 32-bit registers. We use NIR's 1-bit integers to represent bools, and @@ -2367,6 +2378,32 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_INNER_LEVEL_X + i); } break; + case nir_intrinsic_load_frag_invocation_count: + dst[0] = create_driver_param(ctx, IR3_DP_FS_FRAG_INVOCATION_COUNT); + break; + case nir_intrinsic_load_frag_size_ir3: + case nir_intrinsic_load_frag_offset_ir3: { + enum ir3_driver_param param = + intr->intrinsic == nir_intrinsic_load_frag_size_ir3 ? + IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET; + if (nir_src_is_const(intr->src[0])) { + uint32_t view = nir_src_as_uint(intr->src[0]); + for (int i = 0; i < dest_components; i++) { + dst[i] = create_driver_param(ctx, param + 4 * view + i); + } + } else { + struct ir3_instruction *view = ir3_get_src(ctx, &intr->src[0])[0]; + for (int i = 0; i < dest_components; i++) { + dst[i] = create_driver_param_indirect(ctx, param + i, + ir3_get_addr0(ctx, view, 4)); + } + ctx->so->constlen = + MAX2(ctx->so->constlen, + const_state->offsets.driver_param + param / 4 + + nir_intrinsic_range(intr)); + } + break; + } case nir_intrinsic_discard_if: case nir_intrinsic_discard: case nir_intrinsic_demote: diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 00d9733d977..8bf3d720038 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -924,6 +924,20 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st layout->num_driver_params = MAX2(layout->num_driver_params, IR3_DP_HS_DEFAULT_INNER_LEVEL_Y + 1); break; + case nir_intrinsic_load_frag_size_ir3: + layout->num_driver_params = MAX2(layout->num_driver_params, + IR3_DP_FS_FRAG_SIZE + 2 + + (nir_intrinsic_range(intr) - 1) * 4); + break; + case nir_intrinsic_load_frag_offset_ir3: + layout->num_driver_params = MAX2(layout->num_driver_params, + IR3_DP_FS_FRAG_OFFSET + 2 + + (nir_intrinsic_range(intr) - 1) * 4); + break; + case nir_intrinsic_load_frag_invocation_count: + layout->num_driver_params = MAX2(layout->num_driver_params, + IR3_DP_FS_FRAG_INVOCATION_COUNT + 1); + break; default: break; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index e124dbf943f..25125332e7c 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -86,6 +86,11 @@ enum ir3_driver_param { /* fragment shader driver params: */ IR3_DP_FS_SUBGROUP_SIZE = 0, + /* Dynamic params (that aren't known when compiling the shader) */ + IR3_DP_FS_DYNAMIC = 4, + IR3_DP_FS_FRAG_INVOCATION_COUNT = IR3_DP_FS_DYNAMIC, + IR3_DP_FS_FRAG_SIZE = IR3_DP_FS_DYNAMIC + 4, + IR3_DP_FS_FRAG_OFFSET = IR3_DP_FS_DYNAMIC + 6, }; #define IR3_MAX_SHADER_BUFFERS 32 diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 967904782fe..55d43dc4a61 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -648,6 +648,7 @@ tu6_update_msaa_samples(struct tu_cmd_buffer *cmd, VkSampleCountFlagBits samples { if (cmd->state.samples != samples) { cmd->state.samples = samples; + cmd->state.dirty |= TU_CMD_DIRTY_FS_PARAMS; tu6_update_msaa(cmd); } } @@ -1489,7 +1490,8 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd, */ if (cmd->state.pass->has_fdm) { cmd->state.dirty |= - TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS; + TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS | + TU_CMD_DIRTY_FS_PARAMS; } } @@ -1736,7 +1738,8 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, */ if (cmd->state.pass->has_fdm) { cmd->state.dirty |= - TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS; + TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS | + TU_CMD_DIRTY_FS_PARAMS; } /* tu6_render_tile has cloned these tracepoints for each tile */ @@ -2847,7 +2850,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, cmd->state.pipeline = pipeline; cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS | TU_CMD_DIRTY_SHADER_CONSTS | - TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS; + TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS | + TU_CMD_DIRTY_FS_PARAMS; if (pipeline->output.feedback_loop_may_involve_textures && !cmd->state.rp.disable_gmem) { @@ -5269,6 +5273,121 @@ fdm_apply_scissors(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, tu6_emit_scissor(cs, scissors, state->num_scissors); } +static uint32_t +fs_params_offset(struct tu_cmd_buffer *cmd) +{ + const struct tu_program_descriptor_linkage *link = + &cmd->state.pipeline->program.link[MESA_SHADER_FRAGMENT]; + const struct ir3_const_state *const_state = &link->const_state; + + if (const_state->num_driver_params <= IR3_DP_FS_DYNAMIC) + return 0; + + if (const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4 >= link->constlen) + return 0; + + return const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4; +} + +static uint32_t +fs_params_size(struct tu_cmd_buffer *cmd) +{ + const struct tu_program_descriptor_linkage *link = + &cmd->state.pipeline->program.link[MESA_SHADER_FRAGMENT]; + const struct ir3_const_state *const_state = &link->const_state; + + return DIV_ROUND_UP(const_state->num_driver_params - IR3_DP_FS_DYNAMIC, 4); +} + +struct apply_fs_params_state { + unsigned num_consts; +}; + +static void +fdm_apply_fs_params(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views, + VkExtent2D *frag_areas) +{ + const struct apply_fs_params_state *state = + (const struct apply_fs_params_state *)data; + unsigned num_consts = state->num_consts; + + for (unsigned i = 0; i < num_consts; i++) { + assert(i < views); + VkExtent2D area = frag_areas[i]; + VkOffset2D offset = fdm_per_bin_offset(area, bin); + + tu_cs_emit(cs, area.width); + tu_cs_emit(cs, area.height); + tu_cs_emit(cs, fui(offset.x)); + tu_cs_emit(cs, fui(offset.y)); + } +} + +static void +tu6_emit_fs_params(struct tu_cmd_buffer *cmd) +{ + uint32_t offset = fs_params_offset(cmd); + + if (offset == 0) { + cmd->state.fs_params = (struct tu_draw_state) {}; + return; + } + + struct tu_pipeline *pipeline = cmd->state.pipeline; + + unsigned num_units = fs_params_size(cmd); + + if (pipeline->fs.fragment_density_map) + tu_cs_set_writeable(&cmd->sub_cs, true); + + struct tu_cs cs; + VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 4 + 4 * num_units, &cs); + if (result != VK_SUCCESS) { + tu_cs_set_writeable(&cmd->sub_cs, false); + vk_command_buffer_set_error(&cmd->vk, result); + return; + } + + tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_units); + tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | + CP_LOAD_STATE6_0_NUM_UNIT(num_units)); + tu_cs_emit(&cs, 0); + tu_cs_emit(&cs, 0); + + STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC); + tu_cs_emit(&cs, pipeline->program.per_samp ? cmd->state.samples : 1); + tu_cs_emit(&cs, 0); + tu_cs_emit(&cs, 0); + tu_cs_emit(&cs, 0); + + STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4); + STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6); + if (num_units > 1) { + if (pipeline->fs.fragment_density_map) { + struct apply_fs_params_state state = { + .num_consts = num_units - 1, + }; + tu_create_fdm_bin_patchpoint(cmd, &cs, 4 * (num_units - 1), + fdm_apply_fs_params, state); + } else { + for (unsigned i = 1; i < num_units; i++) { + tu_cs_emit(&cs, 1); + tu_cs_emit(&cs, 1); + tu_cs_emit(&cs, fui(0.0f)); + tu_cs_emit(&cs, fui(0.0f)); + } + } + } + + cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs); + + if (pipeline->fs.fragment_density_map) + tu_cs_set_writeable(&cmd->sub_cs, false); +} + static VkResult tu6_draw_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, @@ -5464,6 +5583,9 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, if (dirty & TU_CMD_DIRTY_DESC_SETS) tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); + if (dirty & TU_CMD_DIRTY_FS_PARAMS) + tu6_emit_fs_params(cmd); + /* for the first draw in a renderpass, re-emit all the draw states * * and if a draw-state disabling path (CmdClearAttachments 3D fallback) was @@ -5487,6 +5609,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_MSAA, cmd->state.msaa); @@ -5507,6 +5630,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, ((dirty & TU_CMD_DIRTY_DESC_SETS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) + ((dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) + + ((dirty & TU_CMD_DIRTY_FS_PARAMS) ? 1 : 0) + (dirty_lrz ? 1 : 0); if ((dirty & TU_CMD_DIRTY_VB_STRIDE) && @@ -5553,6 +5677,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, } if (dirty & TU_CMD_DIRTY_VS_PARAMS) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); + if (dirty & TU_CMD_DIRTY_FS_PARAMS) + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params); if (dirty_lrz) { tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 29db0ba9740..1925c085837 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -29,6 +29,7 @@ enum tu_draw_state_group_id TU_DRAW_STATE_DESC_SETS, TU_DRAW_STATE_DESC_SETS_LOAD, TU_DRAW_STATE_VS_PARAMS, + TU_DRAW_STATE_FS_PARAMS, TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM, TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, @@ -62,13 +63,14 @@ enum tu_cmd_dirty_bits TU_CMD_DIRTY_SHADER_CONSTS = BIT(6), TU_CMD_DIRTY_LRZ = BIT(7), TU_CMD_DIRTY_VS_PARAMS = BIT(8), - TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(9), - TU_CMD_DIRTY_VIEWPORTS = BIT(10), - TU_CMD_DIRTY_SCISSORS = BIT(11), - TU_CMD_DIRTY_BLEND = BIT(12), - TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(13), + TU_CMD_DIRTY_FS_PARAMS = BIT(9), + TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(10), + TU_CMD_DIRTY_VIEWPORTS = BIT(11), + TU_CMD_DIRTY_SCISSORS = BIT(12), + TU_CMD_DIRTY_BLEND = BIT(13), + TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(14), /* all draw states were disabled and need to be re-enabled: */ - TU_CMD_DIRTY_DRAW_STATE = BIT(14) + TU_CMD_DIRTY_DRAW_STATE = BIT(15) }; /* There are only three cache domains we have to care about: the CCU, or @@ -462,6 +464,7 @@ struct tu_cmd_state struct tu_draw_state msaa; struct tu_draw_state vs_params; + struct tu_draw_state fs_params; /* Index buffer */ uint64_t index_va; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index bccdff4e8a9..51efd3efad0 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -274,6 +274,7 @@ struct tu_pipeline_builder bool subpass_feedback_loop_ds; bool feedback_loop_may_involve_textures; bool fragment_density_map; + uint8_t unscaled_input_fragcoord; /* Each library defines at least one piece of state in * VkGraphicsPipelineLibraryFlagsEXT, and libraries cannot overlap, so @@ -620,14 +621,14 @@ tu6_emit_xs(struct tu_cs *cs, } } - /* emit FS driver param */ + /* emit statically-known FS driver param */ if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) { uint32_t base = const_state->offsets.driver_param; - int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4); + int32_t size = DIV_ROUND_UP(MAX2(const_state->num_driver_params, 4), 4); size = MAX2(MIN2(size + base, xs->constlen) - base, 0); if (size > 0) { - tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4); + tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + 4); tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | @@ -636,7 +637,6 @@ tu6_emit_xs(struct tu_cs *cs, tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - assert(size == 1); tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64); tu_cs_emit(cs, 0); tu_cs_emit(cs, 0); @@ -1941,6 +1941,7 @@ tu6_emit_program(struct tu_cs *cs, if (fs) { tu6_emit_fs_inputs(cs, fs); tu6_emit_fs_outputs(cs, fs, pipeline); + pipeline->program.per_samp = fs->per_samp || fs->key.sample_shading; } else { /* TODO: check if these can be skipped if fs is disabled */ struct ir3_shader_variant dummy_variant = {}; @@ -3202,6 +3203,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; + keys[MESA_SHADER_FRAGMENT].fragment_density_map = + builder->fragment_density_map; + keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord = + builder->unscaled_input_fragcoord; pipeline->fs.fragment_density_map = builder->fragment_density_map; } @@ -5111,6 +5116,7 @@ tu_pipeline_builder_init_graphics( builder->subpass_raster_order_attachment_access = false; builder->subpass_feedback_loop_ds = false; builder->subpass_feedback_loop_color = false; + builder->unscaled_input_fragcoord = 0; rendering_flags = vk_get_pipeline_rendering_flags(builder->create_info); @@ -5147,6 +5153,15 @@ tu_pipeline_builder_init_graphics( rendering_flags |= VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT; + builder->unscaled_input_fragcoord = 0; + for (unsigned i = 0; i < subpass->input_count; i++) { + /* Input attachments stored in GMEM must be loaded with unscaled + * FragCoord. + */ + if (subpass->input_attachments[i].patch_input_gmem) + builder->unscaled_input_fragcoord |= 1u << i; + } + if (!builder->rasterizer_discard) { const uint32_t a = subpass->depth_stencil_attachment.attachment; builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index b95ae6589ec..21c2b9dccd8 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -235,6 +235,7 @@ struct tu_pipeline uint32_t cs_instrlen; bool writes_viewport; + bool per_samp; } program; struct diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 18e0a6ff115..8ff812599c7 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -41,6 +41,7 @@ tu_spirv_to_nir(struct tu_device *dev, .draw_parameters = true, .float_controls = true, .float16 = true, + .fragment_density = true, .geometry_streams = true, .image_read_without_format = true, .image_write_without_format = true, @@ -846,6 +847,81 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev, return progress; } +struct lower_fdm_options { + unsigned num_views; + bool adjust_fragcoord; + bool multiview; +}; + +static bool +lower_fdm_filter(const nir_instr *instr, const void *data) +{ + const struct lower_fdm_options *options = + (const struct lower_fdm_options *)data; + + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + return intrin->intrinsic == nir_intrinsic_load_frag_size || + (intrin->intrinsic == nir_intrinsic_load_frag_coord && + options->adjust_fragcoord); +} + +static nir_ssa_def * +lower_fdm_instr(struct nir_builder *b, nir_instr *instr, void *data) +{ + const struct lower_fdm_options *options = + (const struct lower_fdm_options *)data; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + nir_ssa_def *view; + if (options->multiview) { + nir_variable *view_var = + nir_find_variable_with_location(b->shader, nir_var_shader_in, + VARYING_SLOT_VIEW_INDEX); + + if (view_var == NULL) { + view_var = nir_variable_create(b->shader, nir_var_shader_in, + glsl_int_type(), NULL); + view_var->data.location = VARYING_SLOT_VIEW_INDEX; + view_var->data.interpolation = INTERP_MODE_FLAT; + view_var->data.driver_location = b->shader->num_inputs++; + } + + view = nir_load_var(b, view_var); + } else { + view = nir_imm_int(b, 0); + } + + nir_ssa_def *frag_size = + nir_load_frag_size_ir3(b, view, .range = options->num_views); + + if (intrin->intrinsic == nir_intrinsic_load_frag_coord) { + nir_ssa_def *frag_offset = + nir_load_frag_offset_ir3(b, view, .range = options->num_views); + nir_ssa_def *unscaled_coord = nir_load_frag_coord_unscaled_ir3(b); + nir_ssa_def *xy = nir_channels(b, unscaled_coord, 0x3); + xy = nir_fmul(b, nir_fsub(b, xy, frag_offset), nir_i2f32(b, frag_size)); + return nir_vec4(b, + nir_channel(b, xy, 0), + nir_channel(b, xy, 1), + nir_channel(b, unscaled_coord, 2), + nir_channel(b, unscaled_coord, 3)); + } + + assert(intrin->intrinsic == nir_intrinsic_load_frag_size); + return frag_size; +} + +static bool +tu_nir_lower_fdm(nir_shader *shader, const struct lower_fdm_options *options) +{ + return nir_shader_lower_instructions(shader, lower_fdm_filter, + lower_fdm_instr, (void *)options); +} + static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) { @@ -931,10 +1007,21 @@ tu_shader_create(struct tu_device *dev, * multiview is enabled. */ .use_view_id_for_layer = key->multiview_mask != 0, + .unscaled_input_attachment_ir3 = key->unscaled_input_fragcoord, }; NIR_PASS_V(nir, nir_lower_input_attachments, &att_options); } + /* This has to happen before lower_input_attachments, because we have to + * lower input attachment coordinates except if unscaled. + */ + const struct lower_fdm_options fdm_options = { + .num_views = MAX2(util_last_bit(key->multiview_mask), 1), + .adjust_fragcoord = key->fragment_density_map, + }; + NIR_PASS_V(nir, tu_nir_lower_fdm, &fdm_options); + + /* This needs to happen before multiview lowering which rewrites store * instructions of the position variable, so that we can just rewrite one * store at the end instead of having to rewrite every store specified by diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 51f87f7f376..166816f6858 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -54,6 +54,8 @@ struct tu_shader struct tu_shader_key { unsigned multiview_mask; bool force_sample_interp; + bool fragment_density_map; + uint8_t unscaled_input_fragcoord; enum ir3_wavesize_option api_wavesize, real_wavesize; };