diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 072776d1909..e134853ec0f 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4682,7 +4682,7 @@ iris_compute_first_urb_slot_required(uint64_t inputs_read, const struct intel_vue_map *prev_stage_vue_map) { #if GFX_VER >= 9 - return brw_compute_first_urb_slot_required(inputs_read, prev_stage_vue_map); + return brw_compute_first_fs_urb_slot_required(inputs_read, prev_stage_vue_map); #else return elk_compute_first_urb_slot_required(inputs_read, prev_stage_vue_map); #endif diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index b20ec1bf972..979d067a45f 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -791,18 +791,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo, } else { assert(!nir->info.per_primitive_inputs); - uint64_t vue_header_bits = - VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT; + const uint64_t vue_header_bits = BRW_VUE_HEADER_VARYING_MASK; - uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK; - - /* VUE header fields all live in the same URB slot, so we pass them - * as a single FS input attribute. We want to only count them once. - */ - if (inputs_read & vue_header_bits) { - unique_fs_attrs &= ~vue_header_bits; - unique_fs_attrs |= VARYING_BIT_PSIZ; - } + uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits; if (util_bitcount64(unique_fs_attrs) <= 16) { /* The SF/SBE pipeline stage can do arbitrary rearrangement of the @@ -813,20 +804,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo, * fragment shader won't take up valuable register space, and (b) we * won't have to recompile the fragment shader if it gets paired with * a different vertex (or geometry) shader. - * - * VUE header fields share the same FS input attribute. */ - if (inputs_read & vue_header_bits) { - if (inputs_read & VARYING_BIT_PSIZ) - prog_data->urb_setup[VARYING_SLOT_PSIZ] = urb_next; - if (inputs_read & VARYING_BIT_LAYER) - prog_data->urb_setup[VARYING_SLOT_LAYER] = urb_next; - if (inputs_read & VARYING_BIT_VIEWPORT) - prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = urb_next; - - urb_next++; - } - for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { if (inputs_read & BRW_FS_VARYING_INPUT_MASK & ~vue_header_bits & BITFIELD64_BIT(i)) { @@ -850,8 +828,8 @@ calculate_urb_setup(const struct intel_device_info *devinfo, nir->info.separate_shader, 1); int first_slot = - brw_compute_first_urb_slot_required(inputs_read, - &prev_stage_vue_map); + brw_compute_first_fs_urb_slot_required(unique_fs_attrs, + &prev_stage_vue_map); assert(prev_stage_vue_map.num_slots <= first_slot + 32); for (int slot = first_slot; slot < prev_stage_vue_map.num_slots; @@ -1908,3 +1886,29 @@ brw_print_fs_urb_setup(FILE *fp, const struct brw_wm_prog_data *prog_data) MESA_SHADER_FRAGMENT)); } } + +extern "C" int +brw_compute_first_fs_urb_slot_required(uint64_t inputs_read, + const struct intel_vue_map *prev_stage_vue_map) +{ + /* The header slots are irrelevant for the URB varying slots. They are + * delivered somewhere else in the thread payload. + * + * For example on DG2: + * - PRIMITIVE_SHADING_RATE : R1.0, ActualCoarsePixelShadingSize.(X|Y) + * - LAYER : R1.1, Render Target Array Index + * - VIEWPORT : R1.1, Viewport Index + * - PSIZ : not available in fragment shaders + */ + inputs_read &= ~BRW_VUE_HEADER_VARYING_MASK; + + for (int i = 0; i < prev_stage_vue_map->num_slots; i++) { + int varying = prev_stage_vue_map->slot_to_varying[i]; + if (varying != BRW_VARYING_SLOT_PAD && varying > 0 && + (inputs_read & BITFIELD64_BIT(varying)) != 0) { + return ROUND_DOWN_TO(i, 2); + } + } + + return 0; +} diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 7b75b4b0dda..ff2bb206c7c 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1021,6 +1021,13 @@ typedef enum BRW_VARYING_SLOT_COUNT } brw_varying_slot; + +#define BRW_VUE_HEADER_VARYING_MASK \ + (VARYING_BIT_VIEWPORT | \ + VARYING_BIT_LAYER | \ + VARYING_BIT_PRIMITIVE_SHADING_RATE | \ + VARYING_BIT_PSIZ) + /** * Bitmask indicating which fragment shader inputs represent varyings (and * hence have to be delivered to the fragment shader by the SF/SBE stage). @@ -1605,31 +1612,13 @@ brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo, * that is used in the next stage. We do this by testing the varying slots in * the previous stage's vue map against the inputs read in the next stage. * - * Note that: - * - * - Each URB offset contains two varying slots and we can only skip a - * full offset if both slots are unused, so the value we return here is always - * rounded down to the closest multiple of two. - * - * - gl_Layer and gl_ViewportIndex don't have their own varying slots, they are - * part of the vue header, so if these are read we can't skip anything. + * Note that each URB offset contains two varying slots and we can only skip a + * full offset if both slots are unused, so the value we return here is always + * rounded down to the closest multiple of two. */ -static inline int -brw_compute_first_urb_slot_required(uint64_t inputs_read, - const struct intel_vue_map *prev_stage_vue_map) -{ - if ((inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PRIMITIVE_SHADING_RATE)) == 0) { - for (int i = 0; i < prev_stage_vue_map->num_slots; i++) { - int varying = prev_stage_vue_map->slot_to_varying[i]; - if (varying != BRW_VARYING_SLOT_PAD && varying > 0 && - (inputs_read & BITFIELD64_BIT(varying)) != 0) { - return ROUND_DOWN_TO(i, 2); - } - } - } - - return 0; -} +int +brw_compute_first_fs_urb_slot_required(uint64_t inputs_read, + const struct intel_vue_map *prev_stage_vue_map); /* From InlineData in 3DSTATE_TASK_SHADER_DATA and 3DSTATE_MESH_SHADER_DATA. */ #define BRW_TASK_MESH_INLINE_DATA_SIZE_DW 8 diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index d289c14e07c..db0506c24ed 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -685,8 +685,8 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; int first_slot = - brw_compute_first_urb_slot_required(wm_prog_data->inputs, - fs_input_map); + brw_compute_first_fs_urb_slot_required(wm_prog_data->inputs, + fs_input_map); assert(first_slot % 2 == 0); unsigned urb_entry_read_offset = first_slot / 2; int max_source_attr = 0; @@ -696,15 +696,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline) assert(0 <= input_index); - /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the - * VUE header - */ - if (attr == VARYING_SLOT_VIEWPORT || - attr == VARYING_SLOT_LAYER || - attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) { - continue; - } - if (attr == VARYING_SLOT_PNTC) { sbe.PointSpriteTextureCoordinateEnable = 1 << input_index; continue;