mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
tu, ir3: Handle FDM shader builtins
Also, add the necessary transform to fixup gl_FragCoord.xy. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20304>
This commit is contained in:
parent
b474ed1f3a
commit
6f2be52487
10 changed files with 309 additions and 13 deletions
|
|
@ -1205,6 +1205,12 @@ intrinsic("cond_end_ir3", src_comp=[1])
|
|||
# signals the TE that the patch is complete and can be tessellated.
|
||||
intrinsic("end_patch_ir3")
|
||||
|
||||
# Per-view gl_FragSizeEXT and gl_FragCoord offset.
|
||||
intrinsic("load_frag_size_ir3", src_comp=[1], dest_comp=2, indices=[RANGE],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
|
||||
intrinsic("load_frag_offset_ir3", src_comp=[1], dest_comp=2, indices=[RANGE],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
|
||||
|
||||
# IR3-specific load/store intrinsics. These access a buffer used to pass data
|
||||
# between geometry stages - perhaps it's explicit access to the vertex cache.
|
||||
|
||||
|
|
|
|||
|
|
@ -112,6 +112,17 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
|
|||
return create_uniform(ctx->block, r);
|
||||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
create_driver_param_indirect(struct ir3_context *ctx, enum ir3_driver_param dp,
|
||||
struct ir3_instruction *address)
|
||||
{
|
||||
/* first four vec4 sysval's reserved for UBOs: */
|
||||
/* NOTE: dp is in scalar, but there can be >4 dp components: */
|
||||
struct ir3_const_state *const_state = ir3_const_state(ctx->so);
|
||||
unsigned n = const_state->offsets.driver_param;
|
||||
return create_uniform_indirect(ctx->block, n * 4 + dp, TYPE_U32, address);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adreno's comparisons produce a 1 for true and 0 for false, in either 16 or
|
||||
* 32-bit registers. We use NIR's 1-bit integers to represent bools, and
|
||||
|
|
@ -2367,6 +2378,32 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
dst[i] = create_driver_param(ctx, IR3_DP_HS_DEFAULT_INNER_LEVEL_X + i);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_frag_invocation_count:
|
||||
dst[0] = create_driver_param(ctx, IR3_DP_FS_FRAG_INVOCATION_COUNT);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
case nir_intrinsic_load_frag_offset_ir3: {
|
||||
enum ir3_driver_param param =
|
||||
intr->intrinsic == nir_intrinsic_load_frag_size_ir3 ?
|
||||
IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET;
|
||||
if (nir_src_is_const(intr->src[0])) {
|
||||
uint32_t view = nir_src_as_uint(intr->src[0]);
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param(ctx, param + 4 * view + i);
|
||||
}
|
||||
} else {
|
||||
struct ir3_instruction *view = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
for (int i = 0; i < dest_components; i++) {
|
||||
dst[i] = create_driver_param_indirect(ctx, param + i,
|
||||
ir3_get_addr0(ctx, view, 4));
|
||||
}
|
||||
ctx->so->constlen =
|
||||
MAX2(ctx->so->constlen,
|
||||
const_state->offsets.driver_param + param / 4 +
|
||||
nir_intrinsic_range(intr));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_discard_if:
|
||||
case nir_intrinsic_discard:
|
||||
case nir_intrinsic_demote:
|
||||
|
|
|
|||
|
|
@ -924,6 +924,20 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
|||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_HS_DEFAULT_INNER_LEVEL_Y + 1);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_size_ir3:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_FS_FRAG_SIZE + 2 +
|
||||
(nir_intrinsic_range(intr) - 1) * 4);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_offset_ir3:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_FS_FRAG_OFFSET + 2 +
|
||||
(nir_intrinsic_range(intr) - 1) * 4);
|
||||
break;
|
||||
case nir_intrinsic_load_frag_invocation_count:
|
||||
layout->num_driver_params = MAX2(layout->num_driver_params,
|
||||
IR3_DP_FS_FRAG_INVOCATION_COUNT + 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,6 +86,11 @@ enum ir3_driver_param {
|
|||
|
||||
/* fragment shader driver params: */
|
||||
IR3_DP_FS_SUBGROUP_SIZE = 0,
|
||||
/* Dynamic params (that aren't known when compiling the shader) */
|
||||
IR3_DP_FS_DYNAMIC = 4,
|
||||
IR3_DP_FS_FRAG_INVOCATION_COUNT = IR3_DP_FS_DYNAMIC,
|
||||
IR3_DP_FS_FRAG_SIZE = IR3_DP_FS_DYNAMIC + 4,
|
||||
IR3_DP_FS_FRAG_OFFSET = IR3_DP_FS_DYNAMIC + 6,
|
||||
};
|
||||
|
||||
#define IR3_MAX_SHADER_BUFFERS 32
|
||||
|
|
|
|||
|
|
@ -648,6 +648,7 @@ tu6_update_msaa_samples(struct tu_cmd_buffer *cmd, VkSampleCountFlagBits samples
|
|||
{
|
||||
if (cmd->state.samples != samples) {
|
||||
cmd->state.samples = samples;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_FS_PARAMS;
|
||||
tu6_update_msaa(cmd);
|
||||
}
|
||||
}
|
||||
|
|
@ -1489,7 +1490,8 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
|
|||
*/
|
||||
if (cmd->state.pass->has_fdm) {
|
||||
cmd->state.dirty |=
|
||||
TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS;
|
||||
TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS |
|
||||
TU_CMD_DIRTY_FS_PARAMS;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1736,7 +1738,8 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
*/
|
||||
if (cmd->state.pass->has_fdm) {
|
||||
cmd->state.dirty |=
|
||||
TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS;
|
||||
TU_CMD_DIRTY_VIEWPORTS | TU_CMD_DIRTY_SCISSORS |
|
||||
TU_CMD_DIRTY_FS_PARAMS;
|
||||
}
|
||||
|
||||
/* tu6_render_tile has cloned these tracepoints for each tile */
|
||||
|
|
@ -2847,7 +2850,8 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
|
||||
cmd->state.pipeline = pipeline;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS | TU_CMD_DIRTY_SHADER_CONSTS |
|
||||
TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS;
|
||||
TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_VS_PARAMS |
|
||||
TU_CMD_DIRTY_FS_PARAMS;
|
||||
|
||||
if (pipeline->output.feedback_loop_may_involve_textures &&
|
||||
!cmd->state.rp.disable_gmem) {
|
||||
|
|
@ -5269,6 +5273,121 @@ fdm_apply_scissors(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views,
|
|||
tu6_emit_scissor(cs, scissors, state->num_scissors);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
fs_params_offset(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_program_descriptor_linkage *link =
|
||||
&cmd->state.pipeline->program.link[MESA_SHADER_FRAGMENT];
|
||||
const struct ir3_const_state *const_state = &link->const_state;
|
||||
|
||||
if (const_state->num_driver_params <= IR3_DP_FS_DYNAMIC)
|
||||
return 0;
|
||||
|
||||
if (const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4 >= link->constlen)
|
||||
return 0;
|
||||
|
||||
return const_state->offsets.driver_param + IR3_DP_FS_DYNAMIC / 4;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
fs_params_size(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const struct tu_program_descriptor_linkage *link =
|
||||
&cmd->state.pipeline->program.link[MESA_SHADER_FRAGMENT];
|
||||
const struct ir3_const_state *const_state = &link->const_state;
|
||||
|
||||
return DIV_ROUND_UP(const_state->num_driver_params - IR3_DP_FS_DYNAMIC, 4);
|
||||
}
|
||||
|
||||
struct apply_fs_params_state {
|
||||
unsigned num_consts;
|
||||
};
|
||||
|
||||
static void
|
||||
fdm_apply_fs_params(struct tu_cs *cs, void *data, VkRect2D bin, unsigned views,
|
||||
VkExtent2D *frag_areas)
|
||||
{
|
||||
const struct apply_fs_params_state *state =
|
||||
(const struct apply_fs_params_state *)data;
|
||||
unsigned num_consts = state->num_consts;
|
||||
|
||||
for (unsigned i = 0; i < num_consts; i++) {
|
||||
assert(i < views);
|
||||
VkExtent2D area = frag_areas[i];
|
||||
VkOffset2D offset = fdm_per_bin_offset(area, bin);
|
||||
|
||||
tu_cs_emit(cs, area.width);
|
||||
tu_cs_emit(cs, area.height);
|
||||
tu_cs_emit(cs, fui(offset.x));
|
||||
tu_cs_emit(cs, fui(offset.y));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_fs_params(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
uint32_t offset = fs_params_offset(cmd);
|
||||
|
||||
if (offset == 0) {
|
||||
cmd->state.fs_params = (struct tu_draw_state) {};
|
||||
return;
|
||||
}
|
||||
|
||||
struct tu_pipeline *pipeline = cmd->state.pipeline;
|
||||
|
||||
unsigned num_units = fs_params_size(cmd);
|
||||
|
||||
if (pipeline->fs.fragment_density_map)
|
||||
tu_cs_set_writeable(&cmd->sub_cs, true);
|
||||
|
||||
struct tu_cs cs;
|
||||
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 4 + 4 * num_units, &cs);
|
||||
if (result != VK_SUCCESS) {
|
||||
tu_cs_set_writeable(&cmd->sub_cs, false);
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_units);
|
||||
tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(offset) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(num_units));
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
|
||||
STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC);
|
||||
tu_cs_emit(&cs, pipeline->program.per_samp ? cmd->state.samples : 1);
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
|
||||
STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4);
|
||||
STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6);
|
||||
if (num_units > 1) {
|
||||
if (pipeline->fs.fragment_density_map) {
|
||||
struct apply_fs_params_state state = {
|
||||
.num_consts = num_units - 1,
|
||||
};
|
||||
tu_create_fdm_bin_patchpoint(cmd, &cs, 4 * (num_units - 1),
|
||||
fdm_apply_fs_params, state);
|
||||
} else {
|
||||
for (unsigned i = 1; i < num_units; i++) {
|
||||
tu_cs_emit(&cs, 1);
|
||||
tu_cs_emit(&cs, 1);
|
||||
tu_cs_emit(&cs, fui(0.0f));
|
||||
tu_cs_emit(&cs, fui(0.0f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs);
|
||||
|
||||
if (pipeline->fs.fragment_density_map)
|
||||
tu_cs_set_writeable(&cmd->sub_cs, false);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
|
|
@ -5464,6 +5583,9 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
if (dirty & TU_CMD_DIRTY_DESC_SETS)
|
||||
tu6_emit_descriptor_sets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
|
||||
if (dirty & TU_CMD_DIRTY_FS_PARAMS)
|
||||
tu6_emit_fs_params(cmd);
|
||||
|
||||
/* for the first draw in a renderpass, re-emit all the draw states
|
||||
*
|
||||
* and if a draw-state disabling path (CmdClearAttachments 3D fallback) was
|
||||
|
|
@ -5487,6 +5609,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_MSAA, cmd->state.msaa);
|
||||
|
||||
|
|
@ -5507,6 +5630,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
((dirty & TU_CMD_DIRTY_DESC_SETS) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) +
|
||||
((dirty & TU_CMD_DIRTY_FS_PARAMS) ? 1 : 0) +
|
||||
(dirty_lrz ? 1 : 0);
|
||||
|
||||
if ((dirty & TU_CMD_DIRTY_VB_STRIDE) &&
|
||||
|
|
@ -5553,6 +5677,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
if (dirty & TU_CMD_DIRTY_VS_PARAMS)
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
|
||||
if (dirty & TU_CMD_DIRTY_FS_PARAMS)
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_FS_PARAMS, cmd->state.fs_params);
|
||||
|
||||
if (dirty_lrz) {
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state);
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ enum tu_draw_state_group_id
|
|||
TU_DRAW_STATE_DESC_SETS,
|
||||
TU_DRAW_STATE_DESC_SETS_LOAD,
|
||||
TU_DRAW_STATE_VS_PARAMS,
|
||||
TU_DRAW_STATE_FS_PARAMS,
|
||||
TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,
|
||||
TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,
|
||||
TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE,
|
||||
|
|
@ -62,13 +63,14 @@ enum tu_cmd_dirty_bits
|
|||
TU_CMD_DIRTY_SHADER_CONSTS = BIT(6),
|
||||
TU_CMD_DIRTY_LRZ = BIT(7),
|
||||
TU_CMD_DIRTY_VS_PARAMS = BIT(8),
|
||||
TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(9),
|
||||
TU_CMD_DIRTY_VIEWPORTS = BIT(10),
|
||||
TU_CMD_DIRTY_SCISSORS = BIT(11),
|
||||
TU_CMD_DIRTY_BLEND = BIT(12),
|
||||
TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(13),
|
||||
TU_CMD_DIRTY_FS_PARAMS = BIT(9),
|
||||
TU_CMD_DIRTY_PC_RASTER_CNTL = BIT(10),
|
||||
TU_CMD_DIRTY_VIEWPORTS = BIT(11),
|
||||
TU_CMD_DIRTY_SCISSORS = BIT(12),
|
||||
TU_CMD_DIRTY_BLEND = BIT(13),
|
||||
TU_CMD_DIRTY_PATCH_CONTROL_POINTS = BIT(14),
|
||||
/* all draw states were disabled and need to be re-enabled: */
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(14)
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(15)
|
||||
};
|
||||
|
||||
/* There are only three cache domains we have to care about: the CCU, or
|
||||
|
|
@ -462,6 +464,7 @@ struct tu_cmd_state
|
|||
struct tu_draw_state msaa;
|
||||
|
||||
struct tu_draw_state vs_params;
|
||||
struct tu_draw_state fs_params;
|
||||
|
||||
/* Index buffer */
|
||||
uint64_t index_va;
|
||||
|
|
|
|||
|
|
@ -274,6 +274,7 @@ struct tu_pipeline_builder
|
|||
bool subpass_feedback_loop_ds;
|
||||
bool feedback_loop_may_involve_textures;
|
||||
bool fragment_density_map;
|
||||
uint8_t unscaled_input_fragcoord;
|
||||
|
||||
/* Each library defines at least one piece of state in
|
||||
* VkGraphicsPipelineLibraryFlagsEXT, and libraries cannot overlap, so
|
||||
|
|
@ -620,14 +621,14 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
}
|
||||
}
|
||||
|
||||
/* emit FS driver param */
|
||||
/* emit statically-known FS driver param */
|
||||
if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
|
||||
uint32_t base = const_state->offsets.driver_param;
|
||||
int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4);
|
||||
int32_t size = DIV_ROUND_UP(MAX2(const_state->num_driver_params, 4), 4);
|
||||
size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
|
||||
|
||||
if (size > 0) {
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4);
|
||||
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + 4);
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
|
|
@ -636,7 +637,6 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
assert(size == 1);
|
||||
tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
|
@ -1941,6 +1941,7 @@ tu6_emit_program(struct tu_cs *cs,
|
|||
if (fs) {
|
||||
tu6_emit_fs_inputs(cs, fs);
|
||||
tu6_emit_fs_outputs(cs, fs, pipeline);
|
||||
pipeline->program.per_samp = fs->per_samp || fs->key.sample_shading;
|
||||
} else {
|
||||
/* TODO: check if these can be skipped if fs is disabled */
|
||||
struct ir3_shader_variant dummy_variant = {};
|
||||
|
|
@ -3202,6 +3203,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
|
|||
if (builder->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
|
||||
keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
|
||||
keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading;
|
||||
keys[MESA_SHADER_FRAGMENT].fragment_density_map =
|
||||
builder->fragment_density_map;
|
||||
keys[MESA_SHADER_FRAGMENT].unscaled_input_fragcoord =
|
||||
builder->unscaled_input_fragcoord;
|
||||
pipeline->fs.fragment_density_map = builder->fragment_density_map;
|
||||
}
|
||||
|
||||
|
|
@ -5111,6 +5116,7 @@ tu_pipeline_builder_init_graphics(
|
|||
builder->subpass_raster_order_attachment_access = false;
|
||||
builder->subpass_feedback_loop_ds = false;
|
||||
builder->subpass_feedback_loop_color = false;
|
||||
builder->unscaled_input_fragcoord = 0;
|
||||
|
||||
rendering_flags = vk_get_pipeline_rendering_flags(builder->create_info);
|
||||
|
||||
|
|
@ -5147,6 +5153,15 @@ tu_pipeline_builder_init_graphics(
|
|||
rendering_flags |=
|
||||
VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT;
|
||||
|
||||
builder->unscaled_input_fragcoord = 0;
|
||||
for (unsigned i = 0; i < subpass->input_count; i++) {
|
||||
/* Input attachments stored in GMEM must be loaded with unscaled
|
||||
* FragCoord.
|
||||
*/
|
||||
if (subpass->input_attachments[i].patch_input_gmem)
|
||||
builder->unscaled_input_fragcoord |= 1u << i;
|
||||
}
|
||||
|
||||
if (!builder->rasterizer_discard) {
|
||||
const uint32_t a = subpass->depth_stencil_attachment.attachment;
|
||||
builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ?
|
||||
|
|
|
|||
|
|
@ -235,6 +235,7 @@ struct tu_pipeline
|
|||
uint32_t cs_instrlen;
|
||||
|
||||
bool writes_viewport;
|
||||
bool per_samp;
|
||||
} program;
|
||||
|
||||
struct
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
.draw_parameters = true,
|
||||
.float_controls = true,
|
||||
.float16 = true,
|
||||
.fragment_density = true,
|
||||
.geometry_streams = true,
|
||||
.image_read_without_format = true,
|
||||
.image_write_without_format = true,
|
||||
|
|
@ -846,6 +847,81 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
|
|||
return progress;
|
||||
}
|
||||
|
||||
struct lower_fdm_options {
|
||||
unsigned num_views;
|
||||
bool adjust_fragcoord;
|
||||
bool multiview;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_fdm_filter(const nir_instr *instr, const void *data)
|
||||
{
|
||||
const struct lower_fdm_options *options =
|
||||
(const struct lower_fdm_options *)data;
|
||||
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
return intrin->intrinsic == nir_intrinsic_load_frag_size ||
|
||||
(intrin->intrinsic == nir_intrinsic_load_frag_coord &&
|
||||
options->adjust_fragcoord);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_fdm_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
const struct lower_fdm_options *options =
|
||||
(const struct lower_fdm_options *)data;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
nir_ssa_def *view;
|
||||
if (options->multiview) {
|
||||
nir_variable *view_var =
|
||||
nir_find_variable_with_location(b->shader, nir_var_shader_in,
|
||||
VARYING_SLOT_VIEW_INDEX);
|
||||
|
||||
if (view_var == NULL) {
|
||||
view_var = nir_variable_create(b->shader, nir_var_shader_in,
|
||||
glsl_int_type(), NULL);
|
||||
view_var->data.location = VARYING_SLOT_VIEW_INDEX;
|
||||
view_var->data.interpolation = INTERP_MODE_FLAT;
|
||||
view_var->data.driver_location = b->shader->num_inputs++;
|
||||
}
|
||||
|
||||
view = nir_load_var(b, view_var);
|
||||
} else {
|
||||
view = nir_imm_int(b, 0);
|
||||
}
|
||||
|
||||
nir_ssa_def *frag_size =
|
||||
nir_load_frag_size_ir3(b, view, .range = options->num_views);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_frag_coord) {
|
||||
nir_ssa_def *frag_offset =
|
||||
nir_load_frag_offset_ir3(b, view, .range = options->num_views);
|
||||
nir_ssa_def *unscaled_coord = nir_load_frag_coord_unscaled_ir3(b);
|
||||
nir_ssa_def *xy = nir_channels(b, unscaled_coord, 0x3);
|
||||
xy = nir_fmul(b, nir_fsub(b, xy, frag_offset), nir_i2f32(b, frag_size));
|
||||
return nir_vec4(b,
|
||||
nir_channel(b, xy, 0),
|
||||
nir_channel(b, xy, 1),
|
||||
nir_channel(b, unscaled_coord, 2),
|
||||
nir_channel(b, unscaled_coord, 3));
|
||||
}
|
||||
|
||||
assert(intrin->intrinsic == nir_intrinsic_load_frag_size);
|
||||
return frag_size;
|
||||
}
|
||||
|
||||
static bool
|
||||
tu_nir_lower_fdm(nir_shader *shader, const struct lower_fdm_options *options)
|
||||
{
|
||||
return nir_shader_lower_instructions(shader, lower_fdm_filter,
|
||||
lower_fdm_instr, (void *)options);
|
||||
}
|
||||
|
||||
static void
|
||||
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
||||
{
|
||||
|
|
@ -931,10 +1007,21 @@ tu_shader_create(struct tu_device *dev,
|
|||
* multiview is enabled.
|
||||
*/
|
||||
.use_view_id_for_layer = key->multiview_mask != 0,
|
||||
.unscaled_input_attachment_ir3 = key->unscaled_input_fragcoord,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_input_attachments, &att_options);
|
||||
}
|
||||
|
||||
/* This has to happen before lower_input_attachments, because we have to
|
||||
* lower input attachment coordinates except if unscaled.
|
||||
*/
|
||||
const struct lower_fdm_options fdm_options = {
|
||||
.num_views = MAX2(util_last_bit(key->multiview_mask), 1),
|
||||
.adjust_fragcoord = key->fragment_density_map,
|
||||
};
|
||||
NIR_PASS_V(nir, tu_nir_lower_fdm, &fdm_options);
|
||||
|
||||
|
||||
/* This needs to happen before multiview lowering which rewrites store
|
||||
* instructions of the position variable, so that we can just rewrite one
|
||||
* store at the end instead of having to rewrite every store specified by
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ struct tu_shader
|
|||
struct tu_shader_key {
|
||||
unsigned multiview_mask;
|
||||
bool force_sample_interp;
|
||||
bool fragment_density_map;
|
||||
uint8_t unscaled_input_fragcoord;
|
||||
enum ir3_wavesize_option api_wavesize, real_wavesize;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue