mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
brw: Move GS URB Read Length limiting to brw_nir_lower_gs_inputs()
We're going to be deciding on push vs. pull in the NIR lowering pass soon, so move the code to limit our register usage from brw's thread payload code to brw_nir_lower_gs_inputs(). Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38990>
This commit is contained in:
parent
8889802271
commit
eae3bd19d4
4 changed files with 30 additions and 24 deletions
|
|
@ -176,7 +176,8 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
||||||
pos_slots);
|
pos_slots);
|
||||||
|
|
||||||
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
|
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
|
||||||
brw_nir_lower_gs_inputs(nir, &input_vue_map);
|
brw_nir_lower_gs_inputs(nir, compiler->devinfo, &input_vue_map,
|
||||||
|
&prog_data->base.urb_read_length);
|
||||||
brw_nir_lower_vue_outputs(nir);
|
brw_nir_lower_vue_outputs(nir);
|
||||||
brw_postprocess_nir(nir, compiler, dispatch_width,
|
brw_postprocess_nir(nir, compiler, dispatch_width,
|
||||||
params->base.archiver, debug_enabled,
|
params->base.archiver, debug_enabled,
|
||||||
|
|
@ -338,11 +339,6 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
||||||
|
|
||||||
prog_data->vertices_in = nir->info.gs.vertices_in;
|
prog_data->vertices_in = nir->info.gs.vertices_in;
|
||||||
|
|
||||||
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
|
|
||||||
* need to program a URB read length of ceiling(num_slots / 2).
|
|
||||||
*/
|
|
||||||
prog_data->base.urb_read_length = (input_vue_map.num_slots + 1) / 2;
|
|
||||||
|
|
||||||
/* Now that prog_data setup is done, we are ready to actually compile the
|
/* Now that prog_data setup is done, we are ready to actually compile the
|
||||||
* program.
|
* program.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -905,7 +905,9 @@ brw_nir_lower_vs_inputs(nir_shader *nir)
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_nir_lower_gs_inputs(nir_shader *nir,
|
brw_nir_lower_gs_inputs(nir_shader *nir,
|
||||||
const struct intel_vue_map *vue_map)
|
const struct intel_device_info *devinfo,
|
||||||
|
const struct intel_vue_map *vue_map,
|
||||||
|
unsigned *out_urb_read_length)
|
||||||
{
|
{
|
||||||
nir_foreach_shader_in_variable(var, nir)
|
nir_foreach_shader_in_variable(var, nir)
|
||||||
var->data.driver_location = var->data.location;
|
var->data.driver_location = var->data.location;
|
||||||
|
|
@ -950,6 +952,28 @@ brw_nir_lower_gs_inputs(nir_shader *nir,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned urb_read_length = 0;
|
||||||
|
|
||||||
|
if (nir->info.gs.invocations == 1) {
|
||||||
|
/* URB read length is in 256-bit units, which is two vec4s. */
|
||||||
|
urb_read_length = DIV_ROUND_UP(vue_map->num_slots, 2);
|
||||||
|
|
||||||
|
/* Because we're operating in scalar mode, the two vec4s take
|
||||||
|
* up 8 registers. Additionally, the GS reads URB Read Length
|
||||||
|
* for each vertex being processed, each unit of read length
|
||||||
|
* takes up 8 * VerticesIn registers.
|
||||||
|
*/
|
||||||
|
const unsigned regs_per_read = 8 * nir->info.gs.vertices_in;
|
||||||
|
|
||||||
|
/* Limit to 24 registers worth of pushed inputs */
|
||||||
|
const unsigned max_push_regs = 24;
|
||||||
|
|
||||||
|
if (urb_read_length * regs_per_read > max_push_regs)
|
||||||
|
urb_read_length = max_push_regs / regs_per_read;
|
||||||
|
}
|
||||||
|
|
||||||
|
*out_urb_read_length = urb_read_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -235,7 +235,9 @@ bool brw_nir_lower_outputs_to_urb_intrinsics(nir_shader *, const struct brw_lowe
|
||||||
|
|
||||||
void brw_nir_lower_vs_inputs(nir_shader *nir);
|
void brw_nir_lower_vs_inputs(nir_shader *nir);
|
||||||
void brw_nir_lower_gs_inputs(nir_shader *nir,
|
void brw_nir_lower_gs_inputs(nir_shader *nir,
|
||||||
const struct intel_vue_map *vue_map);
|
const struct intel_device_info *devinfo,
|
||||||
|
const struct intel_vue_map *vue_map,
|
||||||
|
unsigned *out_urb_read_length);
|
||||||
void brw_nir_lower_tes_inputs(nir_shader *nir,
|
void brw_nir_lower_tes_inputs(nir_shader *nir,
|
||||||
const struct intel_device_info *devinfo,
|
const struct intel_device_info *devinfo,
|
||||||
const struct intel_vue_map *vue);
|
const struct intel_vue_map *vue);
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,6 @@ brw_tes_thread_payload::brw_tes_thread_payload(const brw_shader &v)
|
||||||
|
|
||||||
brw_gs_thread_payload::brw_gs_thread_payload(brw_shader &v)
|
brw_gs_thread_payload::brw_gs_thread_payload(brw_shader &v)
|
||||||
{
|
{
|
||||||
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
|
|
||||||
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
|
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
|
||||||
const brw_builder bld = brw_builder(&v);
|
const brw_builder bld = brw_builder(&v);
|
||||||
|
|
||||||
|
|
@ -136,21 +135,6 @@ brw_gs_thread_payload::brw_gs_thread_payload(brw_shader &v)
|
||||||
r += v.nir->info.gs.vertices_in * reg_unit(v.devinfo);
|
r += v.nir->info.gs.vertices_in * reg_unit(v.devinfo);
|
||||||
|
|
||||||
num_regs = r;
|
num_regs = r;
|
||||||
|
|
||||||
/* Use a maximum of 24 registers for push-model inputs. */
|
|
||||||
const unsigned max_push_components = 24;
|
|
||||||
|
|
||||||
/* If pushing our inputs would take too many registers, reduce the URB read
|
|
||||||
* length (which is in HWords, or 8 registers), and resort to pulling.
|
|
||||||
*
|
|
||||||
* Note that the GS reads <URB Read Length> HWords for every vertex - so we
|
|
||||||
* have to multiply by VerticesIn to obtain the total storage requirement.
|
|
||||||
*/
|
|
||||||
if (8 * vue_prog_data->urb_read_length * v.nir->info.gs.vertices_in >
|
|
||||||
max_push_components) {
|
|
||||||
vue_prog_data->urb_read_length =
|
|
||||||
ROUND_DOWN_TO(max_push_components / v.nir->info.gs.vertices_in, 8) / 8;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue