mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'review/intel-push-data-rework' into 'main'
Draft: brw: rework push data handling See merge request mesa/mesa!38975
This commit is contained in:
commit
d361e353b8
34 changed files with 561 additions and 528 deletions
|
|
@ -363,6 +363,12 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
is_divergent = false;
|
is_divergent = false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_push_data_intel:
|
||||||
|
is_divergent =
|
||||||
|
(nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM) &&
|
||||||
|
src_divergent(instr->src[0], state);
|
||||||
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||||
|
|
|
||||||
|
|
@ -2559,9 +2559,15 @@ system_value("urb_output_handle_intel", 1)
|
||||||
load("urb_input_handle_indexed_intel", [1], [], [CAN_ELIMINATE, CAN_REORDER])
|
load("urb_input_handle_indexed_intel", [1], [], [CAN_ELIMINATE, CAN_REORDER])
|
||||||
|
|
||||||
# Inline register delivery (available on Gfx12.5+ for CS/Mesh/Task stages)
|
# Inline register delivery (available on Gfx12.5+ for CS/Mesh/Task stages)
|
||||||
intrinsic("load_inline_data_intel", [], dest_comp=0,
|
load("inline_data_intel", [], [BASE], [CAN_ELIMINATE, CAN_REORDER])
|
||||||
indices=[BASE],
|
|
||||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
# Load push data
|
||||||
|
# src[] = { offset }
|
||||||
|
#
|
||||||
|
# We use the ACCESS index mostly for ACCESS_NON_UNIFORM, this allows us to
|
||||||
|
# preserve the semantic of load_push_constant which is always uniform
|
||||||
|
# regardless of the offset source.
|
||||||
|
load("push_data_intel", [1], [BASE, RANGE, ACCESS], [CAN_ELIMINATE, CAN_REORDER])
|
||||||
|
|
||||||
# Dynamic tesselation parameters (see intel_tess_config).
|
# Dynamic tesselation parameters (see intel_tess_config).
|
||||||
system_value("tess_config_intel", 1)
|
system_value("tess_config_intel", 1)
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,7 @@ struct iris_bo;
|
||||||
struct iris_context;
|
struct iris_context;
|
||||||
struct blorp_batch;
|
struct blorp_batch;
|
||||||
struct blorp_params;
|
struct blorp_params;
|
||||||
|
struct brw_ubo_range;
|
||||||
|
|
||||||
#define IRIS_MAX_DRAW_BUFFERS 8
|
#define IRIS_MAX_DRAW_BUFFERS 8
|
||||||
#define IRIS_MAX_SOL_BINDINGS 64
|
#define IRIS_MAX_SOL_BINDINGS 64
|
||||||
|
|
@ -696,10 +697,15 @@ struct iris_compiled_shader {
|
||||||
mesa_shader_stage stage;
|
mesa_shader_stage stage;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Data derived from prog_data.
|
* Data derived from ELK prog_data.
|
||||||
*/
|
*/
|
||||||
struct iris_ubo_range ubo_ranges[4];
|
struct iris_ubo_range ubo_ranges[4];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data derived from BRW prog_data.
|
||||||
|
*/
|
||||||
|
uint16_t push_sizes[4];
|
||||||
|
|
||||||
unsigned nr_params;
|
unsigned nr_params;
|
||||||
unsigned total_scratch;
|
unsigned total_scratch;
|
||||||
unsigned total_shared;
|
unsigned total_shared;
|
||||||
|
|
@ -1350,7 +1356,8 @@ uint32_t iris_bti_to_group_index(const struct iris_binding_table *bt,
|
||||||
enum iris_surface_group group,
|
enum iris_surface_group group,
|
||||||
uint32_t bti);
|
uint32_t bti);
|
||||||
void iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
|
void iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
|
||||||
struct brw_stage_prog_data *prog_data);
|
struct brw_stage_prog_data *prog_data,
|
||||||
|
struct brw_ubo_range *ubo_ranges);
|
||||||
void iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
|
void iris_apply_elk_prog_data(struct iris_compiled_shader *shader,
|
||||||
struct elk_stage_prog_data *prog_data);
|
struct elk_stage_prog_data *prog_data);
|
||||||
struct intel_cs_dispatch_info
|
struct intel_cs_dispatch_info
|
||||||
|
|
|
||||||
|
|
@ -128,7 +128,6 @@ iris_disk_cache_store(struct disk_cache *cache,
|
||||||
union brw_any_prog_data serializable;
|
union brw_any_prog_data serializable;
|
||||||
assert(prog_data_s <= sizeof(serializable));
|
assert(prog_data_s <= sizeof(serializable));
|
||||||
memcpy(&serializable, shader->brw_prog_data, prog_data_s);
|
memcpy(&serializable, shader->brw_prog_data, prog_data_s);
|
||||||
serializable.base.param = NULL;
|
|
||||||
serializable.base.relocs = NULL;
|
serializable.base.relocs = NULL;
|
||||||
blob_write_bytes(&blob, &serializable, prog_data_s);
|
blob_write_bytes(&blob, &serializable, prog_data_s);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -152,8 +151,7 @@ iris_disk_cache_store(struct disk_cache *cache,
|
||||||
if (brw) {
|
if (brw) {
|
||||||
blob_write_bytes(&blob, brw->relocs,
|
blob_write_bytes(&blob, brw->relocs,
|
||||||
brw->num_relocs * sizeof(struct intel_shader_reloc));
|
brw->num_relocs * sizeof(struct intel_shader_reloc));
|
||||||
blob_write_bytes(&blob, brw->param,
|
blob_write_bytes(&blob, shader->ubo_ranges, sizeof(shader->ubo_ranges));
|
||||||
brw->nr_params * sizeof(uint32_t));
|
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
blob_write_bytes(&blob, elk->relocs,
|
blob_write_bytes(&blob, elk->relocs,
|
||||||
|
|
@ -265,12 +263,7 @@ iris_disk_cache_retrieve(struct iris_screen *screen,
|
||||||
brw->num_relocs * sizeof(struct intel_shader_reloc));
|
brw->num_relocs * sizeof(struct intel_shader_reloc));
|
||||||
brw->relocs = relocs;
|
brw->relocs = relocs;
|
||||||
}
|
}
|
||||||
|
blob_copy_bytes(&blob, shader->ubo_ranges, sizeof(shader->ubo_ranges));
|
||||||
brw->param = NULL;
|
|
||||||
if (brw->nr_params) {
|
|
||||||
brw->param = ralloc_array(NULL, uint32_t, brw->nr_params);
|
|
||||||
blob_copy_bytes(&blob, brw->param, brw->nr_params * sizeof(uint32_t));
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
elk->relocs = NULL;
|
elk->relocs = NULL;
|
||||||
|
|
@ -320,7 +313,7 @@ iris_disk_cache_retrieve(struct iris_screen *screen,
|
||||||
num_cbufs++;
|
num_cbufs++;
|
||||||
|
|
||||||
if (brw)
|
if (brw)
|
||||||
iris_apply_brw_prog_data(shader, brw);
|
iris_apply_brw_prog_data(shader, brw, NULL);
|
||||||
else
|
else
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
iris_apply_elk_prog_data(shader, elk);
|
iris_apply_elk_prog_data(shader, elk);
|
||||||
|
|
|
||||||
|
|
@ -291,8 +291,7 @@ emit_indirect_generate_draw(struct iris_batch *batch,
|
||||||
|
|
||||||
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
||||||
#if GFX_VER < 20
|
#if GFX_VER < 20
|
||||||
ps.PushConstantEnable = shader->nr_params > 0 ||
|
ps.PushConstantEnable = shader->push_sizes[0] > 0;
|
||||||
shader->ubo_ranges[0].length;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if GFX_VER >= 9
|
#if GFX_VER >= 9
|
||||||
|
|
|
||||||
|
|
@ -165,9 +165,8 @@ iris_apply_brw_cs_prog_data(struct iris_compiled_shader *shader,
|
||||||
iris->uses_sampler = brw->uses_sampler;
|
iris->uses_sampler = brw->uses_sampler;
|
||||||
iris->prog_mask = brw->prog_mask;
|
iris->prog_mask = brw->prog_mask;
|
||||||
|
|
||||||
iris->first_param_is_builtin_subgroup_id =
|
/* The pushed constants only contain the subgroup_id */
|
||||||
brw->base.nr_params > 0 &&
|
iris->first_param_is_builtin_subgroup_id = brw->base.push_sizes[0] > 0;
|
||||||
brw->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -249,16 +248,20 @@ iris_apply_brw_gs_prog_data(struct iris_compiled_shader *shader,
|
||||||
|
|
||||||
void
|
void
|
||||||
iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
|
iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
|
||||||
struct brw_stage_prog_data *brw)
|
struct brw_stage_prog_data *brw,
|
||||||
|
struct brw_ubo_range *ubo_ranges)
|
||||||
{
|
{
|
||||||
STATIC_ASSERT(ARRAY_SIZE(brw->ubo_ranges) == ARRAY_SIZE(shader->ubo_ranges));
|
if (ubo_ranges != NULL) {
|
||||||
for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
|
for (int i = 0; i < ARRAY_SIZE(shader->ubo_ranges); i++) {
|
||||||
shader->ubo_ranges[i].block = brw->ubo_ranges[i].block;
|
shader->ubo_ranges[i].block = ubo_ranges[i].block;
|
||||||
shader->ubo_ranges[i].start = brw->ubo_ranges[i].start;
|
shader->ubo_ranges[i].start = ubo_ranges[i].start;
|
||||||
shader->ubo_ranges[i].length = brw->ubo_ranges[i].length;
|
shader->ubo_ranges[i].length = ubo_ranges[i].length;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
shader->nr_params = brw->nr_params;
|
for (int i = 0; i < ARRAY_SIZE(shader->push_sizes); i++)
|
||||||
|
shader->push_sizes[i] = brw->push_sizes[i];
|
||||||
|
|
||||||
shader->total_scratch = brw->total_scratch;
|
shader->total_scratch = brw->total_scratch;
|
||||||
shader->total_shared = brw->total_shared;
|
shader->total_shared = brw->total_shared;
|
||||||
shader->program_size = brw->program_size;
|
shader->program_size = brw->program_size;
|
||||||
|
|
@ -294,7 +297,6 @@ iris_apply_brw_prog_data(struct iris_compiled_shader *shader,
|
||||||
|
|
||||||
ralloc_steal(shader, shader->brw_prog_data);
|
ralloc_steal(shader, shader->brw_prog_data);
|
||||||
ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
|
ralloc_steal(shader->brw_prog_data, (void *)brw->relocs);
|
||||||
ralloc_steal(shader->brw_prog_data, brw->param);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
|
|
@ -1213,13 +1215,6 @@ iris_setup_uniforms(ASSERTED const struct intel_device_info *devinfo,
|
||||||
assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
|
assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
|
||||||
nir_validate_shader(nir, "after remap");
|
nir_validate_shader(nir, "after remap");
|
||||||
|
|
||||||
/* We don't use params[] but gallium leaves num_uniforms set. We use this
|
|
||||||
* to detect when cbuf0 exists but we don't need it anymore when we get
|
|
||||||
* here. Instead, zero it out so that the back-end doesn't get confused
|
|
||||||
* when nr_params * 4 != num_uniforms != nr_params * 4.
|
|
||||||
*/
|
|
||||||
nir->num_uniforms = 0;
|
|
||||||
|
|
||||||
*out_system_values = system_values;
|
*out_system_values = system_values;
|
||||||
*out_num_system_values = num_system_values;
|
*out_num_system_values = num_system_values;
|
||||||
*out_num_cbufs = num_cbufs;
|
*out_num_cbufs = num_cbufs;
|
||||||
|
|
@ -1932,7 +1927,9 @@ iris_compile_vs(struct iris_screen *screen,
|
||||||
|
|
||||||
brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
|
brw_prog_data->base.base.use_alt_mode = nir->info.use_legacy_math_rules;
|
||||||
|
|
||||||
brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
|
struct brw_ubo_range ubo_ranges[4] = {};
|
||||||
|
brw_nir_analyze_ubo_ranges(screen->brw, nir, ubo_ranges);
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_ubo_ranges, ubo_ranges);
|
||||||
|
|
||||||
struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
|
struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
|
||||||
|
|
||||||
|
|
@ -1951,7 +1948,7 @@ iris_compile_vs(struct iris_screen *screen,
|
||||||
program = brw_compile_vs(screen->brw, ¶ms);
|
program = brw_compile_vs(screen->brw, ¶ms);
|
||||||
error = params.base.error_str;
|
error = params.base.error_str;
|
||||||
if (program) {
|
if (program) {
|
||||||
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
|
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base, ubo_ranges);
|
||||||
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -2174,7 +2171,10 @@ iris_compile_tcs(struct iris_screen *screen,
|
||||||
if (screen->brw) {
|
if (screen->brw) {
|
||||||
struct brw_tcs_prog_data *brw_prog_data =
|
struct brw_tcs_prog_data *brw_prog_data =
|
||||||
rzalloc(mem_ctx, struct brw_tcs_prog_data);
|
rzalloc(mem_ctx, struct brw_tcs_prog_data);
|
||||||
brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
|
|
||||||
|
struct brw_ubo_range ubo_ranges[4] = {};
|
||||||
|
brw_nir_analyze_ubo_ranges(screen->brw, nir, ubo_ranges);
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_ubo_ranges, ubo_ranges);
|
||||||
|
|
||||||
struct brw_compile_tcs_params params = {
|
struct brw_compile_tcs_params params = {
|
||||||
.base = {
|
.base = {
|
||||||
|
|
@ -2192,7 +2192,7 @@ iris_compile_tcs(struct iris_screen *screen,
|
||||||
error = params.base.error_str;
|
error = params.base.error_str;
|
||||||
|
|
||||||
if (program) {
|
if (program) {
|
||||||
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
|
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base, ubo_ranges);
|
||||||
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -2377,7 +2377,9 @@ iris_compile_tes(struct iris_screen *screen,
|
||||||
struct brw_tes_prog_data *brw_prog_data =
|
struct brw_tes_prog_data *brw_prog_data =
|
||||||
rzalloc(mem_ctx, struct brw_tes_prog_data);
|
rzalloc(mem_ctx, struct brw_tes_prog_data);
|
||||||
|
|
||||||
brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
|
struct brw_ubo_range ubo_ranges[4] = {};
|
||||||
|
brw_nir_analyze_ubo_ranges(screen->brw, nir, ubo_ranges);
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_ubo_ranges, ubo_ranges);
|
||||||
|
|
||||||
struct intel_vue_map input_vue_map;
|
struct intel_vue_map input_vue_map;
|
||||||
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
|
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
|
||||||
|
|
@ -2403,7 +2405,7 @@ iris_compile_tes(struct iris_screen *screen,
|
||||||
|
|
||||||
if (program) {
|
if (program) {
|
||||||
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
||||||
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
|
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base, ubo_ranges);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
|
|
@ -2571,7 +2573,9 @@ iris_compile_gs(struct iris_screen *screen,
|
||||||
struct brw_gs_prog_data *brw_prog_data =
|
struct brw_gs_prog_data *brw_prog_data =
|
||||||
rzalloc(mem_ctx, struct brw_gs_prog_data);
|
rzalloc(mem_ctx, struct brw_gs_prog_data);
|
||||||
|
|
||||||
brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.base.ubo_ranges);
|
struct brw_ubo_range ubo_ranges[4] = {};
|
||||||
|
brw_nir_analyze_ubo_ranges(screen->brw, nir, ubo_ranges);
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_ubo_ranges, ubo_ranges);
|
||||||
|
|
||||||
brw_compute_vue_map(devinfo,
|
brw_compute_vue_map(devinfo,
|
||||||
&brw_prog_data->base.vue_map, nir->info.outputs_written,
|
&brw_prog_data->base.vue_map, nir->info.outputs_written,
|
||||||
|
|
@ -2595,7 +2599,7 @@ iris_compile_gs(struct iris_screen *screen,
|
||||||
error = params.base.error_str;
|
error = params.base.error_str;
|
||||||
if (program) {
|
if (program) {
|
||||||
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
||||||
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base);
|
iris_apply_brw_prog_data(shader, &brw_prog_data->base.base, ubo_ranges);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
|
|
@ -2764,7 +2768,9 @@ iris_compile_fs(struct iris_screen *screen,
|
||||||
|
|
||||||
brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
|
brw_prog_data->base.use_alt_mode = nir->info.use_legacy_math_rules;
|
||||||
|
|
||||||
brw_nir_analyze_ubo_ranges(screen->brw, nir, brw_prog_data->base.ubo_ranges);
|
struct brw_ubo_range ubo_ranges[4] = {};
|
||||||
|
brw_nir_analyze_ubo_ranges(screen->brw, nir, ubo_ranges);
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_ubo_ranges, ubo_ranges);
|
||||||
|
|
||||||
struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
|
struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
|
||||||
|
|
||||||
|
|
@ -2788,7 +2794,7 @@ iris_compile_fs(struct iris_screen *screen,
|
||||||
error = params.base.error_str;
|
error = params.base.error_str;
|
||||||
if (program) {
|
if (program) {
|
||||||
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
||||||
iris_apply_brw_prog_data(shader, &brw_prog_data->base);
|
iris_apply_brw_prog_data(shader, &brw_prog_data->base, ubo_ranges);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
|
|
@ -3111,6 +3117,15 @@ iris_compile_cs(struct iris_screen *screen,
|
||||||
struct brw_cs_prog_data *brw_prog_data =
|
struct brw_cs_prog_data *brw_prog_data =
|
||||||
rzalloc(mem_ctx, struct brw_cs_prog_data);
|
rzalloc(mem_ctx, struct brw_cs_prog_data);
|
||||||
|
|
||||||
|
bool subgroup_id_lowered = false;
|
||||||
|
NIR_PASS(subgroup_id_lowered, nir, brw_nir_lower_cs_subgroup_id, devinfo, 0);
|
||||||
|
if (subgroup_id_lowered) {
|
||||||
|
brw_prog_data->base.push_sizes[0] = 4;
|
||||||
|
brw_cs_fill_push_const_info(devinfo, brw_prog_data, 0);
|
||||||
|
} else {
|
||||||
|
brw_cs_fill_push_const_info(devinfo, brw_prog_data, -1);
|
||||||
|
}
|
||||||
|
|
||||||
struct brw_compile_cs_params params = {
|
struct brw_compile_cs_params params = {
|
||||||
.base = {
|
.base = {
|
||||||
.mem_ctx = mem_ctx,
|
.mem_ctx = mem_ctx,
|
||||||
|
|
@ -3127,7 +3142,7 @@ iris_compile_cs(struct iris_screen *screen,
|
||||||
error = params.base.error_str;
|
error = params.base.error_str;
|
||||||
if (program) {
|
if (program) {
|
||||||
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
iris_debug_recompile_brw(screen, dbg, ish, &brw_key.base);
|
||||||
iris_apply_brw_prog_data(shader, &brw_prog_data->base);
|
iris_apply_brw_prog_data(shader, &brw_prog_data->base, NULL);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
|
|
|
||||||
|
|
@ -278,7 +278,7 @@ iris_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
|
||||||
memcpy(prog_data, prog_data_templ, prog_data_size);
|
memcpy(prog_data, prog_data_templ, prog_data_size);
|
||||||
|
|
||||||
if (screen->brw) {
|
if (screen->brw) {
|
||||||
iris_apply_brw_prog_data(shader, prog_data);
|
iris_apply_brw_prog_data(shader, prog_data, NULL);
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
assert(screen->elk);
|
assert(screen->elk);
|
||||||
|
|
@ -445,9 +445,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
|
||||||
|
|
||||||
struct brw_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
|
struct brw_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
|
||||||
memset(prog_data, 0, sizeof(*prog_data));
|
memset(prog_data, 0, sizeof(*prog_data));
|
||||||
prog_data->base.nr_params = nir->num_uniforms / 4;
|
prog_data->base.push_sizes[0] = uniform_size;
|
||||||
|
|
||||||
brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges);
|
|
||||||
|
|
||||||
struct genisa_stats stats[3];
|
struct genisa_stats stats[3];
|
||||||
struct brw_compile_fs_params params = {
|
struct brw_compile_fs_params params = {
|
||||||
|
|
@ -463,7 +461,7 @@ iris_ensure_indirect_generation_shader(struct iris_batch *batch)
|
||||||
};
|
};
|
||||||
program = brw_compile_fs(screen->brw, ¶ms);
|
program = brw_compile_fs(screen->brw, ¶ms);
|
||||||
assert(program);
|
assert(program);
|
||||||
iris_apply_brw_prog_data(shader, &prog_data->base);
|
iris_apply_brw_prog_data(shader, &prog_data->base, NULL);
|
||||||
} else {
|
} else {
|
||||||
#ifdef INTEL_USE_ELK
|
#ifdef INTEL_USE_ELK
|
||||||
union elk_any_prog_key prog_key;
|
union elk_any_prog_key prog_key;
|
||||||
|
|
|
||||||
|
|
@ -9410,21 +9410,34 @@ iris_upload_gpgpu_walker(struct iris_context *ice,
|
||||||
if ((stage_dirty & IRIS_STAGE_DIRTY_CS) ||
|
if ((stage_dirty & IRIS_STAGE_DIRTY_CS) ||
|
||||||
(GFX_VER == 12 && !batch->contains_draw) ||
|
(GFX_VER == 12 && !batch->contains_draw) ||
|
||||||
cs_data->local_size[0] == 0 /* Variable local group size */) {
|
cs_data->local_size[0] == 0 /* Variable local group size */) {
|
||||||
uint32_t curbe_data_offset = 0;
|
uint32_t curbe_data_offset, push_const_size;
|
||||||
assert(cs_data->push.cross_thread.dwords == 0 &&
|
uint32_t *curbe_data_map;
|
||||||
cs_data->push.per_thread.dwords == 1 &&
|
if (cs_data->push.cross_thread.dwords == 0 &&
|
||||||
cs_data->first_param_is_builtin_subgroup_id);
|
cs_data->push.per_thread.dwords == 0) {
|
||||||
const unsigned push_const_size =
|
push_const_size = 64;
|
||||||
iris_cs_push_const_total_size(shader, dispatch.threads);
|
curbe_data_map =
|
||||||
uint32_t *curbe_data_map =
|
stream_state(batch, ice->state.dynamic_uploader,
|
||||||
stream_state(batch, ice->state.dynamic_uploader,
|
&ice->state.last_res.cs_thread_ids,
|
||||||
&ice->state.last_res.cs_thread_ids,
|
align(push_const_size, 64), 64,
|
||||||
align(push_const_size, 64), 64,
|
&curbe_data_offset);
|
||||||
&curbe_data_offset);
|
assert(curbe_data_map);
|
||||||
assert(curbe_data_map);
|
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||||
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
} else {
|
||||||
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
|
assert(cs_data->push.cross_thread.dwords == 0 &&
|
||||||
curbe_data_map);
|
cs_data->push.per_thread.dwords == 1 &&
|
||||||
|
cs_data->first_param_is_builtin_subgroup_id);
|
||||||
|
push_const_size =
|
||||||
|
iris_cs_push_const_total_size(shader, dispatch.threads);
|
||||||
|
curbe_data_map =
|
||||||
|
stream_state(batch, ice->state.dynamic_uploader,
|
||||||
|
&ice->state.last_res.cs_thread_ids,
|
||||||
|
align(push_const_size, 64), 64,
|
||||||
|
&curbe_data_offset);
|
||||||
|
assert(curbe_data_map);
|
||||||
|
memset(curbe_data_map, 0x5a, align(push_const_size, 64));
|
||||||
|
iris_fill_cs_push_const_buffer(screen, shader, dispatch.threads,
|
||||||
|
curbe_data_map);
|
||||||
|
}
|
||||||
|
|
||||||
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||||
curbe.CURBETotalDataLength = align(push_const_size, 64);
|
curbe.CURBETotalDataLength = align(push_const_size, 64);
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,6 @@ blorp_compile_fs_brw(struct blorp_context *blorp, void *mem_ctx,
|
||||||
const struct brw_compiler *compiler = blorp->compiler->brw;
|
const struct brw_compiler *compiler = blorp->compiler->brw;
|
||||||
|
|
||||||
struct brw_wm_prog_data *wm_prog_data = rzalloc(mem_ctx, struct brw_wm_prog_data);
|
struct brw_wm_prog_data *wm_prog_data = rzalloc(mem_ctx, struct brw_wm_prog_data);
|
||||||
wm_prog_data->base.nr_params = 0;
|
|
||||||
wm_prog_data->base.param = NULL;
|
|
||||||
|
|
||||||
struct brw_nir_compiler_opts opts = {
|
struct brw_nir_compiler_opts opts = {
|
||||||
.softfp64 = blorp->get_fp64_nir ? blorp->get_fp64_nir(blorp) : NULL,
|
.softfp64 = blorp->get_fp64_nir ? blorp->get_fp64_nir(blorp) : NULL,
|
||||||
|
|
@ -125,6 +123,24 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_load_uniform(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
|
UNUSED void *data)
|
||||||
|
{
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_uniform)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
b->cursor = nir_instr_remove(&intrin->instr);
|
||||||
|
nir_def_rewrite_uses(&intrin->def,
|
||||||
|
nir_load_push_data_intel(b,
|
||||||
|
intrin->def.num_components,
|
||||||
|
intrin->def.bit_size,
|
||||||
|
intrin->src[0].ssa,
|
||||||
|
.base = nir_intrinsic_base(intrin),
|
||||||
|
.range = nir_intrinsic_range(intrin)));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static struct blorp_program
|
static struct blorp_program
|
||||||
blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
|
blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
|
||||||
struct nir_shader *nir)
|
struct nir_shader *nir)
|
||||||
|
|
@ -140,19 +156,24 @@ blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
|
||||||
NIR_PASS(_, nir, nir_lower_io, nir_var_uniform, type_size_scalar_bytes,
|
NIR_PASS(_, nir, nir_lower_io, nir_var_uniform, type_size_scalar_bytes,
|
||||||
(nir_lower_io_options)0);
|
(nir_lower_io_options)0);
|
||||||
|
|
||||||
|
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_uniform,
|
||||||
|
nir_metadata_control_flow, NULL);
|
||||||
|
|
||||||
STATIC_ASSERT(offsetof(struct blorp_wm_inputs, subgroup_id) + 4 ==
|
STATIC_ASSERT(offsetof(struct blorp_wm_inputs, subgroup_id) + 4 ==
|
||||||
sizeof(struct blorp_wm_inputs));
|
sizeof(struct blorp_wm_inputs));
|
||||||
nir->num_uniforms = offsetof(struct blorp_wm_inputs, subgroup_id);
|
|
||||||
unsigned nr_params = nir->num_uniforms / 4;
|
|
||||||
|
|
||||||
struct brw_cs_prog_data *cs_prog_data = rzalloc(mem_ctx, struct brw_cs_prog_data);
|
struct brw_cs_prog_data *cs_prog_data = rzalloc(mem_ctx, struct brw_cs_prog_data);
|
||||||
cs_prog_data->base.nr_params = nr_params;
|
cs_prog_data->base.push_sizes[0] = sizeof(struct blorp_wm_inputs);
|
||||||
cs_prog_data->base.param = rzalloc_array(NULL, uint32_t, nr_params);
|
|
||||||
|
brw_cs_fill_push_const_info(compiler->devinfo, cs_prog_data,
|
||||||
|
offsetof(struct blorp_wm_inputs, subgroup_id) / 4);
|
||||||
|
|
||||||
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
|
NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
|
||||||
cs_prog_data);
|
cs_prog_data);
|
||||||
|
NIR_PASS(_, nir, brw_nir_lower_cs_subgroup_id, compiler->devinfo,
|
||||||
|
offsetof(struct blorp_wm_inputs, subgroup_id));
|
||||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
|
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
|
||||||
nir_metadata_control_flow, NULL);
|
nir_metadata_control_flow, NULL);
|
||||||
|
|
||||||
struct brw_cs_prog_key cs_key;
|
struct brw_cs_prog_key cs_key;
|
||||||
memset(&cs_key, 0, sizeof(cs_key));
|
memset(&cs_key, 0, sizeof(cs_key));
|
||||||
|
|
@ -170,9 +191,6 @@ blorp_compile_cs_brw(struct blorp_context *blorp, void *mem_ctx,
|
||||||
|
|
||||||
const unsigned *kernel = brw_compile_cs(compiler, ¶ms);
|
const unsigned *kernel = brw_compile_cs(compiler, ¶ms);
|
||||||
|
|
||||||
ralloc_free(cs_prog_data->base.param);
|
|
||||||
cs_prog_data->base.param = NULL;
|
|
||||||
|
|
||||||
return (struct blorp_program) {
|
return (struct blorp_program) {
|
||||||
.kernel = kernel,
|
.kernel = kernel,
|
||||||
.kernel_size = cs_prog_data->base.program_size,
|
.kernel_size = cs_prog_data->base.program_size,
|
||||||
|
|
|
||||||
|
|
@ -25,26 +25,22 @@ fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
|
||||||
block->size = block->regs * 32;
|
block->size = block->regs * 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
extern "C" void
|
||||||
cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
||||||
struct brw_cs_prog_data *cs_prog_data)
|
struct brw_cs_prog_data *cs_prog_data,
|
||||||
|
int subgroup_id_index)
|
||||||
{
|
{
|
||||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||||
int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
|
|
||||||
|
|
||||||
/* The thread ID should be stored in the last param dword */
|
|
||||||
assert(subgroup_id_index == -1 ||
|
|
||||||
subgroup_id_index == (int)prog_data->nr_params - 1);
|
|
||||||
|
|
||||||
unsigned cross_thread_dwords, per_thread_dwords;
|
unsigned cross_thread_dwords, per_thread_dwords;
|
||||||
if (subgroup_id_index >= 0) {
|
if (devinfo->verx10 < 125 && subgroup_id_index >= 0) {
|
||||||
/* Fill all but the last register with cross-thread payload */
|
/* Fill all but the last register with cross-thread payload */
|
||||||
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
||||||
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
per_thread_dwords = prog_data->push_sizes[0] / 4 - cross_thread_dwords;
|
||||||
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
||||||
} else {
|
} else {
|
||||||
/* Fill all data using cross-thread payload */
|
/* Fill all data using cross-thread payload */
|
||||||
cross_thread_dwords = prog_data->nr_params;
|
cross_thread_dwords = prog_data->push_sizes[0] / 4;
|
||||||
per_thread_dwords = 0u;
|
per_thread_dwords = 0u;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -55,7 +51,7 @@ cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
||||||
cs_prog_data->push.per_thread.size == 0);
|
cs_prog_data->push.per_thread.size == 0);
|
||||||
assert(cs_prog_data->push.cross_thread.dwords +
|
assert(cs_prog_data->push.cross_thread.dwords +
|
||||||
cs_prog_data->push.per_thread.dwords ==
|
cs_prog_data->push.per_thread.dwords ==
|
||||||
prog_data->nr_params);
|
prog_data->push_sizes[0] / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -120,41 +116,6 @@ brw_nir_uses_sampler(nir_shader *shader)
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t *
|
|
||||||
brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
|
|
||||||
unsigned nr_new_params)
|
|
||||||
{
|
|
||||||
unsigned old_nr_params = prog_data->nr_params;
|
|
||||||
prog_data->nr_params += nr_new_params;
|
|
||||||
prog_data->param = reralloc(ralloc_parent(prog_data->param),
|
|
||||||
prog_data->param, uint32_t,
|
|
||||||
prog_data->nr_params);
|
|
||||||
return prog_data->param + old_nr_params;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
brw_adjust_uniforms(brw_shader &s)
|
|
||||||
{
|
|
||||||
if (s.devinfo->verx10 >= 125)
|
|
||||||
return;
|
|
||||||
|
|
||||||
assert(mesa_shader_stage_is_compute(s.stage));
|
|
||||||
|
|
||||||
if (brw_get_subgroup_id_param_index(s.devinfo, s.prog_data) == -1) {
|
|
||||||
/* Add uniforms for builtins after regular NIR uniforms. */
|
|
||||||
assert(s.uniforms == s.prog_data->nr_params);
|
|
||||||
|
|
||||||
/* Subgroup ID must be the last uniform on the list. This will make
|
|
||||||
* easier later to split between cross thread and per thread
|
|
||||||
* uniforms.
|
|
||||||
*/
|
|
||||||
uint32_t *param = brw_stage_prog_data_add_params(s.prog_data, 1);
|
|
||||||
*param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
|
|
||||||
}
|
|
||||||
|
|
||||||
s.uniforms = s.prog_data->nr_params;
|
|
||||||
}
|
|
||||||
|
|
||||||
const unsigned *
|
const unsigned *
|
||||||
brw_compile_cs(const struct brw_compiler *compiler,
|
brw_compile_cs(const struct brw_compiler *compiler,
|
||||||
struct brw_compile_cs_params *params)
|
struct brw_compile_cs_params *params)
|
||||||
|
|
@ -233,7 +194,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
||||||
.archiver = params->base.archiver,
|
.archiver = params->base.archiver,
|
||||||
};
|
};
|
||||||
v[simd] = std::make_unique<brw_shader>(&shader_params);
|
v[simd] = std::make_unique<brw_shader>(&shader_params);
|
||||||
brw_adjust_uniforms(*v[simd]);
|
|
||||||
|
|
||||||
const bool allow_spilling = simd == 0 ||
|
const bool allow_spilling = simd == 0 ||
|
||||||
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
|
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
|
||||||
|
|
@ -245,8 +205,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (run_cs(*v[simd], allow_spilling)) {
|
if (run_cs(*v[simd], allow_spilling)) {
|
||||||
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
|
||||||
|
|
||||||
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
||||||
|
|
||||||
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
|
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
|
||||||
|
|
|
||||||
|
|
@ -600,7 +600,6 @@ brw_emit_repclear_shader(brw_shader &s)
|
||||||
brw_send_inst *write = NULL;
|
brw_send_inst *write = NULL;
|
||||||
|
|
||||||
assert(s.devinfo->ver < 20);
|
assert(s.devinfo->ver < 20);
|
||||||
assert(s.uniforms == 0);
|
|
||||||
assume(key->nr_color_regions > 0);
|
assume(key->nr_color_regions > 0);
|
||||||
|
|
||||||
brw_reg color_output = retype(brw_vec4_grf(127, 0), BRW_TYPE_UD);
|
brw_reg color_output = retype(brw_vec4_grf(127, 0), BRW_TYPE_UD);
|
||||||
|
|
@ -1123,7 +1122,7 @@ gfx9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
|
||||||
if (wm_prog_data->num_varying_inputs)
|
if (wm_prog_data->num_varying_inputs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (wm_prog_data->base.curb_read_length)
|
if (wm_prog_data->base.push_sizes[0] > 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
|
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
|
||||||
|
|
@ -1296,7 +1295,13 @@ brw_assign_urb_setup(brw_shader &s)
|
||||||
|
|
||||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data);
|
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(s.prog_data);
|
||||||
|
|
||||||
int urb_start = s.payload().num_regs + prog_data->base.curb_read_length;
|
uint32_t push_size = 0;
|
||||||
|
for (uint32_t i = 0; i < 4; i++)
|
||||||
|
push_size += prog_data->base.push_sizes[i];
|
||||||
|
|
||||||
|
const int urb_start =
|
||||||
|
s.payload().num_regs +
|
||||||
|
DIV_ROUND_UP(align(push_size, REG_SIZE * reg_unit(s.devinfo)), REG_SIZE);
|
||||||
bool read_attribute_payload = false;
|
bool read_attribute_payload = false;
|
||||||
|
|
||||||
/* Offset all the urb_setup[] index by the actual position of the
|
/* Offset all the urb_setup[] index by the actual position of the
|
||||||
|
|
|
||||||
|
|
@ -564,12 +564,16 @@ enum brw_param_builtin {
|
||||||
(((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) & 0x3)
|
(((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) & 0x3)
|
||||||
|
|
||||||
struct brw_stage_prog_data {
|
struct brw_stage_prog_data {
|
||||||
struct brw_ubo_range ubo_ranges[4];
|
|
||||||
|
|
||||||
unsigned nr_params; /**< number of float params/constants */
|
|
||||||
|
|
||||||
mesa_shader_stage stage;
|
mesa_shader_stage stage;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Amount of push data delivered to the shader (in bytes)
|
||||||
|
*
|
||||||
|
* The HW can push up to 4 ranges from 4 different virtual addresses.
|
||||||
|
* Values should be aligned to 32B.
|
||||||
|
*/
|
||||||
|
uint16_t push_sizes[4];
|
||||||
|
|
||||||
/* If robust_ubo_ranges not 0, push_reg_mask_param specifies the param
|
/* If robust_ubo_ranges not 0, push_reg_mask_param specifies the param
|
||||||
* index (in 32-bit units) where the 4 UBO range limits will be pushed
|
* index (in 32-bit units) where the 4 UBO range limits will be pushed
|
||||||
* as 8-bit integers. The shader will zero byte i of UBO range j if:
|
* as 8-bit integers. The shader will zero byte i of UBO range j if:
|
||||||
|
|
@ -582,7 +586,6 @@ struct brw_stage_prog_data {
|
||||||
uint8_t robust_ubo_ranges;
|
uint8_t robust_ubo_ranges;
|
||||||
unsigned push_reg_mask_param;
|
unsigned push_reg_mask_param;
|
||||||
|
|
||||||
unsigned curb_read_length;
|
|
||||||
unsigned total_scratch;
|
unsigned total_scratch;
|
||||||
unsigned total_shared;
|
unsigned total_shared;
|
||||||
|
|
||||||
|
|
@ -613,14 +616,6 @@ struct brw_stage_prog_data {
|
||||||
|
|
||||||
uint32_t source_hash;
|
uint32_t source_hash;
|
||||||
|
|
||||||
/* 32-bit identifiers for all push/pull parameters. These can be anything
|
|
||||||
* the driver wishes them to be; the core of the back-end compiler simply
|
|
||||||
* re-arranges them. The one restriction is that the bottom 2^16 values
|
|
||||||
* are reserved for builtins defined in the brw_param_builtin enum defined
|
|
||||||
* above.
|
|
||||||
*/
|
|
||||||
uint32_t *param;
|
|
||||||
|
|
||||||
/* Whether shader uses atomic operations. */
|
/* Whether shader uses atomic operations. */
|
||||||
bool uses_atomic_load_store;
|
bool uses_atomic_load_store;
|
||||||
};
|
};
|
||||||
|
|
@ -1669,6 +1664,11 @@ unsigned
|
||||||
brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
|
brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
|
||||||
unsigned threads);
|
unsigned threads);
|
||||||
|
|
||||||
|
void
|
||||||
|
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
||||||
|
struct brw_cs_prog_data *cs_prog_data,
|
||||||
|
int subgroup_id_index);
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_write_shader_relocs(const struct brw_isa_info *isa,
|
brw_write_shader_relocs(const struct brw_isa_info *isa,
|
||||||
void *program,
|
void *program,
|
||||||
|
|
|
||||||
|
|
@ -785,7 +785,7 @@ enum ENUM_PACKED brw_reg_file {
|
||||||
ADDRESS,
|
ADDRESS,
|
||||||
VGRF,
|
VGRF,
|
||||||
ATTR,
|
ATTR,
|
||||||
UNIFORM, /* prog_data->params[reg] */
|
UNIFORM, /* pushed constant delivered register */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Align1 support for 3-src instructions. Bit 35 of the instruction
|
/* Align1 support for 3-src instructions. Bit 35 of the instruction
|
||||||
|
|
|
||||||
|
|
@ -1992,8 +1992,7 @@ get_nir_def(nir_to_brw_state &ntb, const nir_def &def, bool all_sources_uniform)
|
||||||
is_scalar = get_nir_src(ntb, instr->src[1], 0).is_scalar;
|
is_scalar = get_nir_src(ntb, instr->src[1], 0).is_scalar;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_uniform:
|
case nir_intrinsic_load_push_data_intel:
|
||||||
case nir_intrinsic_load_push_constant:
|
|
||||||
is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar;
|
is_scalar = get_nir_src(ntb, instr->src[0], 0).is_scalar;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -5393,8 +5392,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_uniform:
|
case nir_intrinsic_load_push_data_intel: {
|
||||||
case nir_intrinsic_load_push_constant: {
|
|
||||||
/* Offsets are in bytes but they should always aligned to
|
/* Offsets are in bytes but they should always aligned to
|
||||||
* the type size
|
* the type size
|
||||||
*/
|
*/
|
||||||
|
|
@ -5472,120 +5470,35 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
nir_def_first_component_read(&instr->def);
|
nir_def_first_component_read(&instr->def);
|
||||||
const unsigned last_component =
|
const unsigned last_component =
|
||||||
nir_def_last_component_read(&instr->def);
|
nir_def_last_component_read(&instr->def);
|
||||||
const unsigned num_components = last_component - first_component + 1;
|
|
||||||
|
|
||||||
if (!nir_src_is_const(instr->src[1])) {
|
if (instr->intrinsic == nir_intrinsic_load_ubo) {
|
||||||
s.prog_data->has_ubo_pull = true;
|
/* load_ubo with non-constant offset. The offset might still be
|
||||||
|
* uniform on non-LSC platforms when loading fewer than 4 components.
|
||||||
|
*/
|
||||||
|
brw_reg base_offset = retype(get_nir_src(ntb, instr->src[1], 0),
|
||||||
|
BRW_TYPE_UD);
|
||||||
|
if (nir_intrinsic_has_base(instr)) {
|
||||||
|
struct brw_reg imm = brw_imm_int(base_offset.type,
|
||||||
|
nir_intrinsic_base(instr));
|
||||||
|
base_offset = bld.ADD(base_offset, imm);
|
||||||
|
}
|
||||||
|
|
||||||
if (instr->intrinsic == nir_intrinsic_load_ubo) {
|
const unsigned comps_per_load = brw_type_size_bytes(dest.type) == 8 ? 2 : 4;
|
||||||
/* load_ubo with non-constant offset. The offset might still be
|
|
||||||
* uniform on non-LSC platforms when loading fewer than 4
|
|
||||||
* components.
|
|
||||||
*/
|
|
||||||
brw_reg base_offset = retype(get_nir_src(ntb, instr->src[1], 0),
|
|
||||||
BRW_TYPE_UD);
|
|
||||||
if (nir_intrinsic_has_base(instr)) {
|
|
||||||
struct brw_reg imm = brw_imm_int(base_offset.type,
|
|
||||||
nir_intrinsic_base(instr));
|
|
||||||
base_offset = bld.ADD(base_offset, imm);
|
|
||||||
}
|
|
||||||
|
|
||||||
const unsigned comps_per_load = brw_type_size_bytes(dest.type) == 8 ? 2 : 4;
|
for (unsigned i = first_component;
|
||||||
|
i <= last_component;
|
||||||
for (unsigned i = first_component;
|
i += comps_per_load) {
|
||||||
i <= last_component;
|
const unsigned remaining = last_component + 1 - i;
|
||||||
i += comps_per_load) {
|
xbld.VARYING_PULL_CONSTANT_LOAD(offset(dest, xbld, i),
|
||||||
const unsigned remaining = last_component + 1 - i;
|
surface, surface_handle,
|
||||||
xbld.VARYING_PULL_CONSTANT_LOAD(offset(dest, xbld, i),
|
base_offset,
|
||||||
surface, surface_handle,
|
i * brw_type_size_bytes(dest.type),
|
||||||
base_offset,
|
instr->def.bit_size / 8,
|
||||||
i * brw_type_size_bytes(dest.type),
|
MIN2(remaining, comps_per_load));
|
||||||
instr->def.bit_size / 8,
|
|
||||||
MIN2(remaining, comps_per_load));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* load_ubo_uniform_block_intel with non-constant offset */
|
|
||||||
brw_from_nir_emit_memory_access(ntb, bld, xbld, instr);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* Even if we are loading doubles, a pull constant load will load
|
/* load_ubo_uniform_block_intel with non-constant offset */
|
||||||
* a 32-bit vec4, so should only reserve vgrf space for that. If we
|
brw_from_nir_emit_memory_access(ntb, bld, xbld, instr);
|
||||||
* need to load a full dvec4 we will have to emit 2 loads. This is
|
|
||||||
* similar to demote_pull_constants(), except that in that case we
|
|
||||||
* see individual accesses to each component of the vector and then
|
|
||||||
* we let CSE deal with duplicate loads. Here we see a vector access
|
|
||||||
* and we have to split it if necessary.
|
|
||||||
*/
|
|
||||||
const unsigned type_size = brw_type_size_bytes(dest.type);
|
|
||||||
const unsigned load_offset =
|
|
||||||
nir_src_as_uint(instr->src[1]) + first_component * type_size +
|
|
||||||
(nir_intrinsic_has_base(instr) ? nir_intrinsic_base(instr) : 0);
|
|
||||||
const unsigned end_offset = load_offset + num_components * type_size;
|
|
||||||
const unsigned ubo_block =
|
|
||||||
brw_nir_ubo_surface_index_get_push_block(instr->src[0]);
|
|
||||||
const unsigned offset_256b = load_offset / 32;
|
|
||||||
const unsigned end_256b = DIV_ROUND_UP(end_offset, 32);
|
|
||||||
|
|
||||||
/* See if we've selected this as a push constant candidate */
|
|
||||||
brw_reg push_reg;
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
const struct brw_ubo_range *range = &s.prog_data->ubo_ranges[i];
|
|
||||||
if (range->block == ubo_block &&
|
|
||||||
offset_256b >= range->start &&
|
|
||||||
end_256b <= range->start + range->length) {
|
|
||||||
|
|
||||||
push_reg = brw_uniform_reg(UBO_START + i, dest.type);
|
|
||||||
push_reg.offset = load_offset - 32 * range->start;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (push_reg.file != BAD_FILE) {
|
|
||||||
for (unsigned i = first_component; i <= last_component; i++) {
|
|
||||||
xbld.MOV(offset(dest, xbld, i),
|
|
||||||
byte_offset(push_reg,
|
|
||||||
(i - first_component) * type_size));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
s.prog_data->has_ubo_pull = true;
|
|
||||||
|
|
||||||
if (instr->intrinsic == nir_intrinsic_load_ubo_uniform_block_intel) {
|
|
||||||
brw_from_nir_emit_memory_access(ntb, bld, xbld, instr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
|
|
||||||
const brw_builder ubld = bld.exec_all().group(block_sz / 4, 0);
|
|
||||||
|
|
||||||
for (unsigned c = 0; c < num_components;) {
|
|
||||||
const unsigned base = load_offset + c * type_size;
|
|
||||||
/* Number of usable components in the next block-aligned load. */
|
|
||||||
const unsigned count = MIN2(num_components - c,
|
|
||||||
(block_sz - base % block_sz) / type_size);
|
|
||||||
|
|
||||||
const brw_reg packed_consts = ubld.vgrf(BRW_TYPE_UD);
|
|
||||||
brw_reg srcs[PULL_UNIFORM_CONSTANT_SRCS];
|
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE] = surface;
|
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SURFACE_HANDLE] = surface_handle;
|
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_OFFSET] = brw_imm_ud(base & ~(block_sz - 1));
|
|
||||||
srcs[PULL_UNIFORM_CONSTANT_SRC_SIZE] = brw_imm_ud(block_sz);
|
|
||||||
|
|
||||||
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
|
|
||||||
srcs, PULL_UNIFORM_CONSTANT_SRCS);
|
|
||||||
|
|
||||||
const brw_reg consts =
|
|
||||||
retype(byte_offset(packed_consts, base & (block_sz - 1)),
|
|
||||||
dest.type);
|
|
||||||
|
|
||||||
for (unsigned d = 0; d < count; d++) {
|
|
||||||
xbld.MOV(offset(dest, xbld, first_component + c + d),
|
|
||||||
component(consts, d));
|
|
||||||
}
|
|
||||||
|
|
||||||
c += count;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3144,8 +3144,7 @@ nir_def *
|
||||||
brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load,
|
brw_nir_load_global_const(nir_builder *b, nir_intrinsic_instr *load,
|
||||||
nir_def *base_addr, unsigned off)
|
nir_def *base_addr, unsigned off)
|
||||||
{
|
{
|
||||||
assert(load->intrinsic == nir_intrinsic_load_push_constant ||
|
assert(load->intrinsic == nir_intrinsic_load_push_data_intel);
|
||||||
load->intrinsic == nir_intrinsic_load_uniform);
|
|
||||||
|
|
||||||
unsigned bit_size = load->def.bit_size;
|
unsigned bit_size = load->def.bit_size;
|
||||||
assert(bit_size >= 8 && bit_size % 8 == 0);
|
assert(bit_size >= 8 && bit_size % 8 == 0);
|
||||||
|
|
|
||||||
|
|
@ -179,6 +179,10 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
||||||
bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
|
bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
|
||||||
const struct intel_device_info *devinfo,
|
const struct intel_device_info *devinfo,
|
||||||
struct brw_cs_prog_data *prog_data);
|
struct brw_cs_prog_data *prog_data);
|
||||||
|
bool brw_nir_lower_cs_subgroup_id(nir_shader *nir,
|
||||||
|
const struct intel_device_info *devinfo,
|
||||||
|
unsigned subgroup_id_offset);
|
||||||
|
|
||||||
bool brw_nir_lower_alpha_to_coverage(nir_shader *shader);
|
bool brw_nir_lower_alpha_to_coverage(nir_shader *shader);
|
||||||
bool brw_needs_vertex_attributes_bypass(const nir_shader *shader);
|
bool brw_needs_vertex_attributes_bypass(const nir_shader *shader);
|
||||||
void brw_nir_lower_fs_barycentrics(nir_shader *shader);
|
void brw_nir_lower_fs_barycentrics(nir_shader *shader);
|
||||||
|
|
@ -354,6 +358,9 @@ void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
|
||||||
nir_shader *nir,
|
nir_shader *nir,
|
||||||
struct brw_ubo_range out_ranges[4]);
|
struct brw_ubo_range out_ranges[4]);
|
||||||
|
|
||||||
|
bool brw_nir_lower_ubo_ranges(nir_shader *nir,
|
||||||
|
struct brw_ubo_range out_ranges[4]);
|
||||||
|
|
||||||
void brw_nir_optimize(nir_shader *nir,
|
void brw_nir_optimize(nir_shader *nir,
|
||||||
const struct intel_device_info *devinfo);
|
const struct intel_device_info *devinfo);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -129,36 +129,37 @@ analyze_ubos_block(struct ubo_analysis_state *state, nir_block *block)
|
||||||
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (brw_nir_ubo_surface_index_is_pushable(intrin->src[0]) &&
|
if (!brw_nir_ubo_surface_index_is_pushable(intrin->src[0]) ||
|
||||||
nir_src_is_const(intrin->src[1])) {
|
!nir_src_is_const(intrin->src[1]))
|
||||||
const int block = brw_nir_ubo_surface_index_get_push_block(intrin->src[0]);
|
continue;
|
||||||
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
|
||||||
const unsigned sizeof_GRF = REG_SIZE * reg_unit(state->devinfo);
|
|
||||||
const int offset = byte_offset / sizeof_GRF;
|
|
||||||
|
|
||||||
/* Avoid shifting by larger than the width of our bitfield, as this
|
const int block = brw_nir_ubo_surface_index_get_push_block(intrin->src[0]);
|
||||||
* is undefined in C. Even if we require multiple bits to represent
|
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
||||||
* the entire value, it's OK to record a partial value - the backend
|
const unsigned sizeof_GRF = REG_SIZE * reg_unit(state->devinfo);
|
||||||
* is capable of falling back to pull loads for later components of
|
const int offset = byte_offset / sizeof_GRF;
|
||||||
* vectors, as it has to shrink ranges for other reasons anyway.
|
|
||||||
*/
|
|
||||||
if (offset >= 64)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* The value might span multiple sizeof(GRF) chunks. */
|
/* Avoid shifting by larger than the width of our bitfield, as this
|
||||||
const unsigned num_components =
|
* is undefined in C. Even if we require multiple bits to represent
|
||||||
nir_def_last_component_read(&intrin->def) + 1;
|
* the entire value, it's OK to record a partial value - the backend
|
||||||
const int bytes = num_components * (intrin->def.bit_size / 8);
|
* is capable of falling back to pull loads for later components of
|
||||||
const int start = ROUND_DOWN_TO(byte_offset, sizeof_GRF);
|
* vectors, as it has to shrink ranges for other reasons anyway.
|
||||||
const int end = align(byte_offset + bytes, sizeof_GRF);
|
*/
|
||||||
const int chunks = (end - start) / sizeof_GRF;
|
if (offset >= 64)
|
||||||
|
continue;
|
||||||
|
|
||||||
/* TODO: should we count uses in loops as higher benefit? */
|
/* The value might span multiple sizeof(GRF) chunks. */
|
||||||
|
const unsigned num_components =
|
||||||
|
nir_def_last_component_read(&intrin->def) + 1;
|
||||||
|
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||||
|
const int start = ROUND_DOWN_TO(byte_offset, sizeof_GRF);
|
||||||
|
const int end = align(byte_offset + bytes, sizeof_GRF);
|
||||||
|
const int chunks = (end - start) / sizeof_GRF;
|
||||||
|
|
||||||
struct ubo_block_info *info = get_block_info(state, block);
|
/* TODO: should we count uses in loops as higher benefit? */
|
||||||
info->offsets |= ((1ull << chunks) - 1) << offset;
|
|
||||||
info->uses[offset]++;
|
struct ubo_block_info *info = get_block_info(state, block);
|
||||||
}
|
info->offsets |= ((1ull << chunks) - 1) << offset;
|
||||||
|
info->uses[offset]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -316,3 +317,53 @@ brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
|
||||||
|
|
||||||
ralloc_free(ranges.mem_ctx);
|
ralloc_free(ranges.mem_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_load_ubo_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||||
|
{
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_ubo)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!brw_nir_ubo_surface_index_is_pushable(intrin->src[0]) ||
|
||||||
|
!nir_src_is_const(intrin->src[1]))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const int block = brw_nir_ubo_surface_index_get_push_block(intrin->src[0]);
|
||||||
|
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
||||||
|
const unsigned num_components =
|
||||||
|
nir_def_last_component_read(&intrin->def) + 1;
|
||||||
|
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||||
|
|
||||||
|
const struct brw_ubo_range *range = data;
|
||||||
|
for (uint32_t i = 0; i < 4; i++) {
|
||||||
|
if (range[i].block != block)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (byte_offset < range[i].start * 32 ||
|
||||||
|
(byte_offset + bytes) > (range[i].start + range[i].length) * 32)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
nir_def *data = nir_load_push_data_intel(
|
||||||
|
b,
|
||||||
|
nir_def_last_component_read(&intrin->def) + 1,
|
||||||
|
intrin->def.bit_size,
|
||||||
|
nir_imm_int(b, 0),
|
||||||
|
.base = byte_offset - range[i].start * 32,
|
||||||
|
.range = nir_intrinsic_range(intrin));
|
||||||
|
nir_def_replace(&intrin->def, data);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_lower_ubo_ranges(nir_shader *nir,
|
||||||
|
struct brw_ubo_range out_ranges[4])
|
||||||
|
{
|
||||||
|
return nir_shader_intrinsics_pass(nir, lower_load_ubo_instr,
|
||||||
|
nir_metadata_control_flow,
|
||||||
|
out_ranges);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -387,3 +387,36 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
|
||||||
|
|
||||||
return state.progress;
|
return state.progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_cs_subgroup_id_instr(nir_builder *b,
|
||||||
|
nir_intrinsic_instr *intrin,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
if (intrin->intrinsic != nir_intrinsic_load_subgroup_id)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const unsigned *subgroup_id_offset_ptr = data;
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
nir_def_replace(&intrin->def,
|
||||||
|
nir_load_push_data_intel(
|
||||||
|
b, 1, 32, nir_imm_int(b, 0),
|
||||||
|
.base = *subgroup_id_offset_ptr,
|
||||||
|
.range = 4));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
brw_nir_lower_cs_subgroup_id(nir_shader *nir,
|
||||||
|
const struct intel_device_info *devinfo,
|
||||||
|
unsigned subgroup_id_offset)
|
||||||
|
{
|
||||||
|
if (devinfo->verx10 >= 125)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return nir_shader_intrinsics_pass(nir, lower_cs_subgroup_id_instr,
|
||||||
|
nir_metadata_control_flow,
|
||||||
|
&subgroup_id_offset);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -135,8 +135,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||||
nir_instr_remove(instr);
|
nir_instr_remove(instr);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_uniform:
|
case nir_intrinsic_load_push_data_intel:
|
||||||
case nir_intrinsic_load_push_constant:
|
|
||||||
/* We don't want to lower this in the launch trampoline.
|
/* We don't want to lower this in the launch trampoline.
|
||||||
*
|
*
|
||||||
* Also if the driver chooses to use an inline push address, we
|
* Also if the driver chooses to use an inline push address, we
|
||||||
|
|
|
||||||
|
|
@ -427,7 +427,6 @@ brw_nir_create_raygen_trampoline(const struct brw_compiler *compiler,
|
||||||
* passed in as push constants in the first register. We deal with the
|
* passed in as push constants in the first register. We deal with the
|
||||||
* raygen BSR address here; the global data we'll deal with later.
|
* raygen BSR address here; the global data we'll deal with later.
|
||||||
*/
|
*/
|
||||||
b.shader->num_uniforms = 32;
|
|
||||||
nir_def *raygen_param_bsr_addr =
|
nir_def *raygen_param_bsr_addr =
|
||||||
load_trampoline_param(&b, raygen_bsr_addr, 1, 64);
|
load_trampoline_param(&b, raygen_bsr_addr, 1, 64);
|
||||||
nir_def *is_indirect =
|
nir_def *is_indirect =
|
||||||
|
|
|
||||||
|
|
@ -425,7 +425,6 @@ brw_shader::brw_shader(const brw_shader_params *params)
|
||||||
this->source_depth_to_render_target = false;
|
this->source_depth_to_render_target = false;
|
||||||
this->first_non_payload_grf = 0;
|
this->first_non_payload_grf = 0;
|
||||||
|
|
||||||
this->uniforms = this->nir->num_uniforms / 4;
|
|
||||||
this->last_scratch = 0;
|
this->last_scratch = 0;
|
||||||
|
|
||||||
memset(&this->shader_stats, 0, sizeof(this->shader_stats));
|
memset(&this->shader_stats, 0, sizeof(this->shader_stats));
|
||||||
|
|
@ -621,40 +620,22 @@ brw_shader::mark_last_urb_write_with_eot()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned
|
|
||||||
round_components_to_whole_registers(const intel_device_info *devinfo,
|
|
||||||
unsigned c)
|
|
||||||
{
|
|
||||||
return DIV_ROUND_UP(c, 8 * reg_unit(devinfo)) * reg_unit(devinfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_shader::assign_curb_setup()
|
brw_shader::assign_curb_setup()
|
||||||
{
|
{
|
||||||
unsigned uniform_push_length =
|
uint32_t ranges_start[4];
|
||||||
round_components_to_whole_registers(devinfo, prog_data->nr_params);
|
this->push_data_size = 0;
|
||||||
|
for (uint32_t i = 0; i < 4; i++) {
|
||||||
unsigned ubo_push_length = 0;
|
ranges_start[i] = this->push_data_size / REG_SIZE;
|
||||||
unsigned ubo_push_start[4];
|
this->push_data_size += align(prog_data->push_sizes[i], REG_SIZE);
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
ubo_push_start[i] = 8 * (ubo_push_length + uniform_push_length);
|
|
||||||
ubo_push_length += prog_data->ubo_ranges[i].length;
|
|
||||||
|
|
||||||
assert(ubo_push_start[i] % (8 * reg_unit(devinfo)) == 0);
|
|
||||||
assert(ubo_push_length % (1 * reg_unit(devinfo)) == 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prog_data->curb_read_length = uniform_push_length + ubo_push_length;
|
|
||||||
if (stage == MESA_SHADER_FRAGMENT &&
|
|
||||||
((struct brw_wm_prog_key *)key)->null_push_constant_tbimr_workaround)
|
|
||||||
prog_data->curb_read_length = MAX2(1, prog_data->curb_read_length);
|
|
||||||
|
|
||||||
uint64_t used = 0;
|
uint64_t used = 0;
|
||||||
const bool pull_constants =
|
const bool pull_constants =
|
||||||
devinfo->verx10 >= 125 &&
|
devinfo->verx10 >= 125 &&
|
||||||
(mesa_shader_stage_is_compute(stage) ||
|
(mesa_shader_stage_is_compute(stage) ||
|
||||||
mesa_shader_stage_is_mesh(stage)) &&
|
mesa_shader_stage_is_mesh(stage)) &&
|
||||||
uniform_push_length;
|
this->push_data_size > 0;
|
||||||
|
|
||||||
if (pull_constants) {
|
if (pull_constants) {
|
||||||
const bool pull_constants_a64 =
|
const bool pull_constants_a64 =
|
||||||
|
|
@ -688,9 +669,11 @@ brw_shader::assign_curb_setup()
|
||||||
/* On Gfx12-HP we load constants at the start of the program using A32
|
/* On Gfx12-HP we load constants at the start of the program using A32
|
||||||
* stateless messages.
|
* stateless messages.
|
||||||
*/
|
*/
|
||||||
for (unsigned i = 0; i < uniform_push_length;) {
|
const unsigned n_push_data_regs = reg_unit(devinfo) *
|
||||||
|
DIV_ROUND_UP(this->push_data_size, reg_unit(devinfo) * REG_SIZE);
|
||||||
|
for (unsigned i = 0; i < this->push_data_size / REG_SIZE;) {
|
||||||
/* Limit ourselves to LSC HW limit of 8 GRFs (256bytes D32V64). */
|
/* Limit ourselves to LSC HW limit of 8 GRFs (256bytes D32V64). */
|
||||||
unsigned num_regs = MIN2(uniform_push_length - i, 8);
|
unsigned num_regs = MIN2(this->push_data_size / REG_SIZE - i, 8);
|
||||||
assert(num_regs > 0);
|
assert(num_regs > 0);
|
||||||
num_regs = 1 << util_logbase2(num_regs);
|
num_regs = 1 << util_logbase2(num_regs);
|
||||||
|
|
||||||
|
|
@ -746,7 +729,7 @@ brw_shader::assign_curb_setup()
|
||||||
send->size_written =
|
send->size_written =
|
||||||
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
|
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
|
||||||
assert((payload().num_regs + i + send->size_written / REG_SIZE) <=
|
assert((payload().num_regs + i + send->size_written / REG_SIZE) <=
|
||||||
(payload().num_regs + prog_data->curb_read_length));
|
(payload().num_regs + n_push_data_regs));
|
||||||
send->is_volatile = true;
|
send->is_volatile = true;
|
||||||
|
|
||||||
send->src[SEND_SRC_DESC] =
|
send->src[SEND_SRC_DESC] =
|
||||||
|
|
@ -766,28 +749,13 @@ brw_shader::assign_curb_setup()
|
||||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||||
if (inst->src[i].file == UNIFORM) {
|
if (inst->src[i].file == UNIFORM) {
|
||||||
int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;
|
int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;
|
||||||
int constant_nr;
|
|
||||||
if (inst->src[i].nr >= UBO_START) {
|
|
||||||
/* constant_nr is in 32-bit units, the rest are in bytes */
|
|
||||||
constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] +
|
|
||||||
inst->src[i].offset / 4;
|
|
||||||
} else if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
|
|
||||||
constant_nr = uniform_nr;
|
|
||||||
} else {
|
|
||||||
/* Section 5.11 of the OpenGL 4.1 spec says:
|
|
||||||
* "Out-of-bounds reads return undefined values, which include
|
|
||||||
* values from other variables of the active program or zero."
|
|
||||||
* Just return the first push constant.
|
|
||||||
*/
|
|
||||||
constant_nr = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(constant_nr / 8 < 64);
|
assert(uniform_nr / 8 < 64);
|
||||||
used |= BITFIELD64_BIT(constant_nr / 8);
|
used |= BITFIELD64_BIT(uniform_nr / 8);
|
||||||
|
|
||||||
struct brw_reg brw_reg = brw_vec1_grf(payload().num_regs +
|
struct brw_reg brw_reg = brw_vec1_grf(payload().num_regs +
|
||||||
constant_nr / 8,
|
uniform_nr / 8,
|
||||||
constant_nr % 8);
|
uniform_nr % 8);
|
||||||
brw_reg.abs = inst->src[i].abs;
|
brw_reg.abs = inst->src[i].abs;
|
||||||
brw_reg.negate = inst->src[i].negate;
|
brw_reg.negate = inst->src[i].negate;
|
||||||
|
|
||||||
|
|
@ -824,15 +792,16 @@ brw_shader::assign_curb_setup()
|
||||||
ubld.group(16, 0).ADD(horiz_offset(offset_base, 16), offset_base, brw_imm_uw(16));
|
ubld.group(16, 0).ADD(horiz_offset(offset_base, 16), offset_base, brw_imm_uw(16));
|
||||||
|
|
||||||
u_foreach_bit(i, prog_data->robust_ubo_ranges) {
|
u_foreach_bit(i, prog_data->robust_ubo_ranges) {
|
||||||
struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
|
const unsigned range_length =
|
||||||
|
DIV_ROUND_UP(prog_data->push_sizes[i], REG_SIZE);
|
||||||
|
|
||||||
unsigned range_start = ubo_push_start[i] / 8;
|
const unsigned range_start = ranges_start[i];
|
||||||
uint64_t want_zero = (used >> range_start) & BITFIELD64_MASK(ubo_range->length);
|
uint64_t want_zero = (used >> range_start) & BITFIELD64_MASK(range_length);
|
||||||
if (!want_zero)
|
if (!want_zero)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const unsigned grf_start = payload().num_regs + range_start;
|
const unsigned grf_start = payload().num_regs + range_start;
|
||||||
const unsigned grf_end = grf_start + ubo_range->length;
|
const unsigned grf_end = grf_start + range_length;
|
||||||
const unsigned max_grf_mask = max_grf_writes * 4;
|
const unsigned max_grf_mask = max_grf_writes * 4;
|
||||||
unsigned grf = grf_start;
|
unsigned grf = grf_start;
|
||||||
|
|
||||||
|
|
@ -899,7 +868,10 @@ brw_shader::assign_curb_setup()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
|
/* This may be updated in assign_urb_setup or assign_vs_urb_setup. */
|
||||||
this->first_non_payload_grf = payload().num_regs + prog_data->curb_read_length;
|
this->first_non_payload_grf = payload().num_regs +
|
||||||
|
DIV_ROUND_UP(
|
||||||
|
align(this->push_data_size, REG_SIZE * reg_unit(devinfo)),
|
||||||
|
REG_SIZE);
|
||||||
|
|
||||||
this->debug_optimizer(this->nir, "assign_curb_setup", 90, 0);
|
this->debug_optimizer(this->nir, "assign_curb_setup", 90, 0);
|
||||||
}
|
}
|
||||||
|
|
@ -935,7 +907,9 @@ brw_shader::convert_attr_sources_to_hw_regs(brw_inst *inst)
|
||||||
if (inst->src[i].file == ATTR) {
|
if (inst->src[i].file == ATTR) {
|
||||||
assert(inst->src[i].nr == 0);
|
assert(inst->src[i].nr == 0);
|
||||||
int grf = payload().num_regs +
|
int grf = payload().num_regs +
|
||||||
prog_data->curb_read_length +
|
DIV_ROUND_UP(
|
||||||
|
align(this->push_data_size, REG_SIZE * reg_unit(devinfo)),
|
||||||
|
REG_SIZE) +
|
||||||
inst->src[i].offset / REG_SIZE;
|
inst->src[i].offset / REG_SIZE;
|
||||||
|
|
||||||
/* As explained at brw_lower_vgrf_to_fixed_grf, From the Haswell PRM:
|
/* As explained at brw_lower_vgrf_to_fixed_grf, From the Haswell PRM:
|
||||||
|
|
@ -969,24 +943,6 @@ brw_shader::convert_attr_sources_to_hw_regs(brw_inst *inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
|
||||||
const brw_stage_prog_data *prog_data)
|
|
||||||
{
|
|
||||||
if (prog_data->nr_params == 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
if (devinfo->verx10 >= 125)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
/* The local thread id is always the last parameter in the list */
|
|
||||||
uint32_t last_param = prog_data->param[prog_data->nr_params - 1];
|
|
||||||
if (last_param == BRW_PARAM_BUILTIN_SUBGROUP_ID)
|
|
||||||
return prog_data->nr_params - 1;
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
brw_fb_write_msg_control(const brw_inst *inst,
|
brw_fb_write_msg_control(const brw_inst *inst,
|
||||||
const struct brw_wm_prog_data *prog_data)
|
const struct brw_wm_prog_data *prog_data)
|
||||||
|
|
|
||||||
|
|
@ -144,8 +144,11 @@ public:
|
||||||
brw_analysis<brw_def_analysis, brw_shader> def_analysis;
|
brw_analysis<brw_def_analysis, brw_shader> def_analysis;
|
||||||
brw_analysis<brw_ip_ranges, brw_shader> ip_ranges_analysis;
|
brw_analysis<brw_ip_ranges, brw_shader> ip_ranges_analysis;
|
||||||
|
|
||||||
/** Number of uniform variable components visited. */
|
/** Amount data push constant data delivered to the shader
|
||||||
unsigned uniforms;
|
*
|
||||||
|
* Aligned to native GRF registers
|
||||||
|
*/
|
||||||
|
unsigned push_data_size;
|
||||||
|
|
||||||
/** Byte-offset for the next available spot in the scratch space buffer. */
|
/** Byte-offset for the next available spot in the scratch space buffer. */
|
||||||
unsigned last_scratch;
|
unsigned last_scratch;
|
||||||
|
|
@ -290,9 +293,6 @@ uint32_t brw_fb_write_msg_control(const brw_inst *inst,
|
||||||
|
|
||||||
void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
|
void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
|
||||||
|
|
||||||
int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
|
|
||||||
const brw_stage_prog_data *prog_data);
|
|
||||||
|
|
||||||
void brw_from_nir(brw_shader *s);
|
void brw_from_nir(brw_shader *s);
|
||||||
|
|
||||||
void brw_shader_phase_update(brw_shader &s, enum brw_shader_phase phase);
|
void brw_shader_phase_update(brw_shader &s, enum brw_shader_phase phase);
|
||||||
|
|
|
||||||
|
|
@ -380,19 +380,9 @@ void
|
||||||
brw_cs_thread_payload::load_subgroup_id(const brw_builder &bld,
|
brw_cs_thread_payload::load_subgroup_id(const brw_builder &bld,
|
||||||
brw_reg &dest) const
|
brw_reg &dest) const
|
||||||
{
|
{
|
||||||
auto devinfo = bld.shader->devinfo;
|
assert(bld.shader->devinfo->verx10 >= 125);
|
||||||
dest = retype(dest, BRW_TYPE_UD);
|
dest = retype(dest, BRW_TYPE_UD);
|
||||||
|
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
|
||||||
if (subgroup_id_.file != BAD_FILE) {
|
|
||||||
assert(devinfo->verx10 >= 125);
|
|
||||||
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
|
|
||||||
} else {
|
|
||||||
assert(devinfo->verx10 < 125);
|
|
||||||
assert(mesa_shader_stage_is_compute(bld.shader->stage));
|
|
||||||
int index = brw_get_subgroup_id_param_index(devinfo,
|
|
||||||
bld.shader->prog_data);
|
|
||||||
bld.MOV(dest, brw_uniform_reg(index, BRW_TYPE_UD));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v)
|
brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v)
|
||||||
|
|
|
||||||
|
|
@ -147,15 +147,10 @@ compile_shader(struct anv_device *device,
|
||||||
};
|
};
|
||||||
NIR_PASS(_, nir, nir_opt_load_store_vectorize, &options);
|
NIR_PASS(_, nir, nir_opt_load_store_vectorize, &options);
|
||||||
|
|
||||||
nir->num_uniforms = uniform_size;
|
prog_data.base.push_sizes[0] = uniform_size;
|
||||||
|
|
||||||
void *temp_ctx = ralloc_context(NULL);
|
void *temp_ctx = ralloc_context(NULL);
|
||||||
|
|
||||||
prog_data.base.nr_params = nir->num_uniforms / 4;
|
|
||||||
prog_data.base.param = rzalloc_array(temp_ctx, uint32_t, prog_data.base.nr_params);
|
|
||||||
|
|
||||||
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
|
|
||||||
|
|
||||||
const unsigned *program;
|
const unsigned *program;
|
||||||
if (stage == MESA_SHADER_FRAGMENT) {
|
if (stage == MESA_SHADER_FRAGMENT) {
|
||||||
struct genisa_stats stats[3];
|
struct genisa_stats stats[3];
|
||||||
|
|
|
||||||
|
|
@ -39,22 +39,23 @@ struct vk_pipeline_robustness_state;
|
||||||
(sizeof(((struct anv_push_constants *)0)->field))
|
(sizeof(((struct anv_push_constants *)0)->field))
|
||||||
|
|
||||||
#define anv_load_driver_uniform(b, components, field) \
|
#define anv_load_driver_uniform(b, components, field) \
|
||||||
nir_load_push_constant(b, components, \
|
nir_load_push_data_intel(b, components, \
|
||||||
anv_drv_const_size(field) * 8, \
|
anv_drv_const_size(field) * 8, \
|
||||||
nir_imm_int(b, 0), \
|
nir_imm_int(b, 0), \
|
||||||
.base = anv_drv_const_offset(field), \
|
.base = anv_drv_const_offset(field), \
|
||||||
.range = components * anv_drv_const_size(field))
|
.range = components * anv_drv_const_size(field))
|
||||||
/* Use load_uniform for indexed values since load_push_constant requires that
|
/* Use ACCESS_NON_UNIFORM for indexed values since load_push_constant requires
|
||||||
* the offset source is dynamically uniform in the subgroup which we cannot
|
* that the offset source is dynamically uniform in the subgroup which we
|
||||||
* guarantee.
|
* cannot guarantee.
|
||||||
*/
|
*/
|
||||||
#define anv_load_driver_uniform_indexed(b, components, field, idx) \
|
#define anv_load_driver_uniform_indexed(b, components, field, idx) \
|
||||||
nir_load_uniform(b, components, \
|
nir_load_push_data_intel(b, components, \
|
||||||
anv_drv_const_size(field[0]) * 8, \
|
anv_drv_const_size(field[0]) * 8, \
|
||||||
nir_imul_imm(b, idx, \
|
nir_imul_imm(b, idx, \
|
||||||
anv_drv_const_size(field[0])), \
|
anv_drv_const_size(field[0])), \
|
||||||
.base = anv_drv_const_offset(field), \
|
.base = anv_drv_const_offset(field), \
|
||||||
.range = anv_drv_const_size(field))
|
.range = anv_drv_const_size(field), \
|
||||||
|
.access = ACCESS_NON_UNIFORM)
|
||||||
|
|
||||||
/* This map is represent a mapping where the key is the NIR
|
/* This map is represent a mapping where the key is the NIR
|
||||||
* nir_intrinsic_resource_intel::block index. It allows mapping bindless UBOs
|
* nir_intrinsic_resource_intel::block index. It allows mapping bindless UBOs
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,94 @@
|
||||||
#include "compiler/brw/brw_nir.h"
|
#include "compiler/brw/brw_nir.h"
|
||||||
#include "util/mesa-sha1.h"
|
#include "util/mesa-sha1.h"
|
||||||
|
|
||||||
|
struct lower_to_push_data_intel_state {
|
||||||
|
const struct anv_pipeline_bind_map *bind_map;
|
||||||
|
const struct anv_pipeline_push_map *push_map;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_to_push_data_intel(nir_builder *b,
|
||||||
|
nir_intrinsic_instr *intrin,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
const struct lower_to_push_data_intel_state *state = data;
|
||||||
|
/* With bindless shaders we load uniforms with SEND messages. All the push
|
||||||
|
* constants are located after the RT_DISPATCH_GLOBALS. We just need to add
|
||||||
|
* the offset to the address right after RT_DISPATCH_GLOBALS (see
|
||||||
|
* brw_nir_lower_rt_intrinsics.c).
|
||||||
|
*/
|
||||||
|
const unsigned base_offset =
|
||||||
|
brw_shader_stage_is_bindless(b->shader->info.stage) ?
|
||||||
|
0 : state->bind_map->push_ranges[0].start * 32;
|
||||||
|
|
||||||
|
switch (intrin->intrinsic) {
|
||||||
|
case nir_intrinsic_load_push_data_intel: {
|
||||||
|
nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) - base_offset);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_push_constant: {
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
nir_def *data = nir_load_push_data_intel(
|
||||||
|
b,
|
||||||
|
intrin->def.num_components,
|
||||||
|
intrin->def.bit_size,
|
||||||
|
intrin->src[0].ssa,
|
||||||
|
.base = nir_intrinsic_base(intrin) - base_offset,
|
||||||
|
.range = nir_intrinsic_range(intrin));
|
||||||
|
nir_def_replace(&intrin->def, data);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
case nir_intrinsic_load_ubo: {
|
||||||
|
if (!brw_nir_ubo_surface_index_is_pushable(intrin->src[0]) ||
|
||||||
|
!nir_src_is_const(intrin->src[1]))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const int block = brw_nir_ubo_surface_index_get_push_block(intrin->src[0]);
|
||||||
|
const unsigned byte_offset = nir_src_as_uint(intrin->src[1]);
|
||||||
|
const unsigned num_components =
|
||||||
|
nir_def_last_component_read(&intrin->def) + 1;
|
||||||
|
const int bytes = num_components * (intrin->def.bit_size / 8);
|
||||||
|
|
||||||
|
const struct anv_pipeline_binding *binding =
|
||||||
|
&state->push_map->block_to_descriptor[block];
|
||||||
|
|
||||||
|
uint32_t range_offset = 0;
|
||||||
|
const struct anv_push_range *push_range = NULL;
|
||||||
|
for (uint32_t i = 0; i < 4; i++) {
|
||||||
|
if (state->bind_map->push_ranges[i].set == binding->set &&
|
||||||
|
state->bind_map->push_ranges[i].index == binding->index &&
|
||||||
|
byte_offset >= state->bind_map->push_ranges[i].start * 32 &&
|
||||||
|
(byte_offset + bytes) <= (state->bind_map->push_ranges[i].start +
|
||||||
|
state->bind_map->push_ranges[i].length) * 32) {
|
||||||
|
push_range = &state->bind_map->push_ranges[i];
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
range_offset += state->bind_map->push_ranges[i].length * 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (push_range == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
nir_def *data = nir_load_push_data_intel(
|
||||||
|
b,
|
||||||
|
nir_def_last_component_read(&intrin->def) + 1,
|
||||||
|
intrin->def.bit_size,
|
||||||
|
nir_imm_int(b, 0),
|
||||||
|
.base = range_offset + byte_offset - push_range->start * 32,
|
||||||
|
.range = nir_intrinsic_range(intrin));
|
||||||
|
nir_def_replace(&intrin->def, data);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
anv_nir_compute_push_layout(nir_shader *nir,
|
anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
const struct anv_physical_device *pdevice,
|
const struct anv_physical_device *pdevice,
|
||||||
|
|
@ -57,8 +145,8 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
has_const_ubo = true;
|
has_const_ubo = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_load_uniform:
|
case nir_intrinsic_load_push_constant:
|
||||||
case nir_intrinsic_load_push_constant: {
|
case nir_intrinsic_load_push_data_intel: {
|
||||||
unsigned base = nir_intrinsic_base(intrin);
|
unsigned base = nir_intrinsic_base(intrin);
|
||||||
unsigned range = nir_intrinsic_range(intrin);
|
unsigned range = nir_intrinsic_range(intrin);
|
||||||
push_start = MIN2(push_start, base);
|
push_start = MIN2(push_start, base);
|
||||||
|
|
@ -80,8 +168,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool has_push_intrinsic = push_start <= push_end;
|
|
||||||
|
|
||||||
const bool push_ubo_ranges =
|
const bool push_ubo_ranges =
|
||||||
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
|
has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&
|
||||||
!brw_shader_stage_requires_bindless_resources(nir->info.stage);
|
!brw_shader_stage_requires_bindless_resources(nir->info.stage);
|
||||||
|
|
@ -143,18 +229,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
push_end = MAX2(push_end, tess_config_end);
|
push_end = MAX2(push_end, tess_config_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {
|
|
||||||
/* For compute shaders, we always have to have the subgroup ID. The
|
|
||||||
* back-end compiler will "helpfully" add it for us in the last push
|
|
||||||
* constant slot. Yes, there is an off-by-one error here but that's
|
|
||||||
* because the back-end will add it so we want to claim the number of
|
|
||||||
* push constants one dword less than the full amount including
|
|
||||||
* gl_SubgroupId.
|
|
||||||
*/
|
|
||||||
assert(push_end <= anv_drv_const_offset(cs.subgroup_id));
|
|
||||||
push_end = anv_drv_const_offset(cs.subgroup_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
|
/* Align push_start down to a 32B (for 3DSTATE_CONSTANT) and make it no
|
||||||
* larger than push_end (no push constants is indicated by push_start =
|
* larger than push_end (no push constants is indicated by push_start =
|
||||||
* UINT_MAX).
|
* UINT_MAX).
|
||||||
|
|
@ -186,9 +260,20 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
|
|
||||||
/* For scalar, push data size needs to be aligned to a DWORD. */
|
/* For scalar, push data size needs to be aligned to a DWORD. */
|
||||||
const unsigned alignment = 4;
|
const unsigned alignment = 4;
|
||||||
nir->num_uniforms = align(push_end - push_start, alignment);
|
const unsigned push_size = align(push_end - push_start, alignment);
|
||||||
prog_data->nr_params = nir->num_uniforms / 4;
|
prog_data->push_sizes[0] = push_size;
|
||||||
prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);
|
|
||||||
|
/* Fill the compute push constant layout (cross/per thread constants) for
|
||||||
|
* platforms pre Gfx12.5.
|
||||||
|
*/
|
||||||
|
if (nir->info.stage == MESA_SHADER_COMPUTE) {
|
||||||
|
const int subgroup_id_index =
|
||||||
|
push_end == (anv_drv_const_offset(cs.subgroup_id) +
|
||||||
|
anv_drv_const_size(cs.subgroup_id)) ?
|
||||||
|
(anv_drv_const_offset(cs.subgroup_id) - push_start) / 4 : -1;
|
||||||
|
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
|
||||||
|
brw_cs_fill_push_const_info(devinfo, cs_prog_data, subgroup_id_index);
|
||||||
|
}
|
||||||
|
|
||||||
struct anv_push_range push_constant_range = {
|
struct anv_push_range push_constant_range = {
|
||||||
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
|
.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
|
||||||
|
|
@ -196,39 +281,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
.length = align(push_end - push_start, devinfo->grf_size) / 32,
|
.length = align(push_end - push_start, devinfo->grf_size) / 32,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (has_push_intrinsic) {
|
|
||||||
nir_foreach_function_impl(impl, nir) {
|
|
||||||
nir_foreach_block(block, impl) {
|
|
||||||
nir_foreach_instr_safe(instr, block) {
|
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
||||||
switch (intrin->intrinsic) {
|
|
||||||
case nir_intrinsic_load_uniform:
|
|
||||||
case nir_intrinsic_load_push_constant: {
|
|
||||||
/* With bindless shaders we load uniforms with SEND
|
|
||||||
* messages. All the push constants are located after the
|
|
||||||
* RT_DISPATCH_GLOBALS. We just need to add the offset to
|
|
||||||
* the address right after RT_DISPATCH_GLOBALS (see
|
|
||||||
* brw_nir_lower_rt_intrinsics.c).
|
|
||||||
*/
|
|
||||||
unsigned base_offset =
|
|
||||||
brw_shader_stage_is_bindless(nir->info.stage) ? 0 : push_start;
|
|
||||||
nir_intrinsic_set_base(intrin,
|
|
||||||
nir_intrinsic_base(intrin) -
|
|
||||||
base_offset);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* When platforms support Mesh and the fragment shader is not fully linked
|
/* When platforms support Mesh and the fragment shader is not fully linked
|
||||||
* to the previous shader, payload format can change if the preceding
|
* to the previous shader, payload format can change if the preceding
|
||||||
* shader is mesh or not, this is an issue in particular for PrimitiveID
|
* shader is mesh or not, this is an issue in particular for PrimitiveID
|
||||||
|
|
@ -260,15 +312,17 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
|
|
||||||
unsigned n_push_ranges = 0;
|
unsigned n_push_ranges = 0;
|
||||||
if (push_ubo_ranges) {
|
if (push_ubo_ranges) {
|
||||||
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges);
|
struct brw_ubo_range ubo_ranges[4] = {};
|
||||||
|
|
||||||
|
brw_nir_analyze_ubo_ranges(compiler, nir, ubo_ranges);
|
||||||
|
|
||||||
const unsigned max_push_regs = 64;
|
const unsigned max_push_regs = 64;
|
||||||
|
|
||||||
unsigned total_push_regs = push_constant_range.length;
|
unsigned total_push_regs = push_constant_range.length;
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
if (total_push_regs + prog_data->ubo_ranges[i].length > max_push_regs)
|
if (total_push_regs + ubo_ranges[i].length > max_push_regs)
|
||||||
prog_data->ubo_ranges[i].length = max_push_regs - total_push_regs;
|
ubo_ranges[i].length = max_push_regs - total_push_regs;
|
||||||
total_push_regs += prog_data->ubo_ranges[i].length;
|
total_push_regs += ubo_ranges[i].length;
|
||||||
}
|
}
|
||||||
assert(total_push_regs <= max_push_regs);
|
assert(total_push_regs <= max_push_regs);
|
||||||
|
|
||||||
|
|
@ -286,7 +340,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
const unsigned max_push_buffers = needs_padding_per_primitive ? 3 : 4;
|
const unsigned max_push_buffers = needs_padding_per_primitive ? 3 : 4;
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
|
struct brw_ubo_range *ubo_range = &ubo_ranges[i];
|
||||||
if (ubo_range->length == 0)
|
if (ubo_range->length == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -310,7 +364,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
/* We only bother to shader-zero pushed client UBOs */
|
/* We only bother to shader-zero pushed client UBOs */
|
||||||
if (binding->set < MAX_SETS &&
|
if (binding->set < MAX_SETS &&
|
||||||
(robust_flags & BRW_ROBUSTNESS_UBO)) {
|
(robust_flags & BRW_ROBUSTNESS_UBO)) {
|
||||||
prog_data->robust_ubo_ranges |= (uint8_t) (1 << i);
|
prog_data->robust_ubo_ranges |= (uint8_t) (1 << (i + push_size != 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (push_constant_range.length > 0) {
|
} else if (push_constant_range.length > 0) {
|
||||||
|
|
@ -340,8 +394,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
.start = 0,
|
.start = 0,
|
||||||
.length = 1,
|
.length = 1,
|
||||||
};
|
};
|
||||||
assert(prog_data->nr_params == 0);
|
prog_data->push_sizes[0] = 32;
|
||||||
prog_data->nr_params = 32 / 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (needs_padding_per_primitive) {
|
if (needs_padding_per_primitive) {
|
||||||
|
|
@ -355,21 +408,36 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
|
|
||||||
assert(n_push_ranges <= 4);
|
assert(n_push_ranges <= 4);
|
||||||
|
|
||||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL && needs_dyn_tess_config) {
|
bool progress = nir_shader_intrinsics_pass(
|
||||||
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
|
nir, lower_to_push_data_intel,
|
||||||
|
nir_metadata_control_flow,
|
||||||
|
&(struct lower_to_push_data_intel_state) {
|
||||||
|
.bind_map = map,
|
||||||
|
.push_map = push_map,
|
||||||
|
});
|
||||||
|
|
||||||
const uint32_t tess_config_offset = anv_drv_const_offset(gfx.tess_config);
|
switch (nir->info.stage) {
|
||||||
assert(tess_config_offset >= push_start);
|
case MESA_SHADER_TESS_CTRL:
|
||||||
tcs_prog_data->tess_config_param = (tess_config_offset - push_start) / 4;
|
if (needs_dyn_tess_config) {
|
||||||
}
|
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
|
||||||
if (nir->info.stage == MESA_SHADER_TESS_EVAL && push_info->separate_tessellation) {
|
|
||||||
struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(prog_data);
|
|
||||||
|
|
||||||
const uint32_t tess_config_offset = anv_drv_const_offset(gfx.tess_config);
|
const uint32_t tess_config_offset = anv_drv_const_offset(gfx.tess_config);
|
||||||
assert(tess_config_offset >= push_start);
|
assert(tess_config_offset >= push_start);
|
||||||
tes_prog_data->tess_config_param = (tess_config_offset - push_start) / 4;
|
tcs_prog_data->tess_config_param = (tess_config_offset - push_start) / 4;
|
||||||
}
|
}
|
||||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
break;
|
||||||
|
|
||||||
|
case MESA_SHADER_TESS_EVAL:
|
||||||
|
if (push_info->separate_tessellation) {
|
||||||
|
struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(prog_data);
|
||||||
|
|
||||||
|
const uint32_t tess_config_offset = anv_drv_const_offset(gfx.tess_config);
|
||||||
|
assert(tess_config_offset >= push_start);
|
||||||
|
tes_prog_data->tess_config_param = (tess_config_offset - push_start) / 4;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MESA_SHADER_FRAGMENT: {
|
||||||
struct brw_wm_prog_data *wm_prog_data =
|
struct brw_wm_prog_data *wm_prog_data =
|
||||||
container_of(prog_data, struct brw_wm_prog_data, base);
|
container_of(prog_data, struct brw_wm_prog_data, base);
|
||||||
|
|
||||||
|
|
@ -380,7 +448,6 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
wm_prog_data->msaa_flags_param =
|
wm_prog_data->msaa_flags_param =
|
||||||
(fs_msaa_flags_offset - push_start) / 4;
|
(fs_msaa_flags_offset - push_start) / 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (needs_wa_18019110168) {
|
if (needs_wa_18019110168) {
|
||||||
const uint32_t fs_per_prim_remap_offset =
|
const uint32_t fs_per_prim_remap_offset =
|
||||||
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
|
anv_drv_const_offset(gfx.fs_per_prim_remap_offset);
|
||||||
|
|
@ -388,8 +455,16 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
wm_prog_data->per_primitive_remap_param =
|
wm_prog_data->per_primitive_remap_param =
|
||||||
(fs_per_prim_remap_offset - push_start) / 4;
|
(fs_per_prim_remap_offset - push_start) / 4;
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 4; i++)
|
||||||
|
prog_data->push_sizes[i] = map->push_ranges[i].length * 32;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "stage=%s push ranges:\n", mesa_shader_stage_name(nir->info.stage));
|
fprintf(stderr, "stage=%s push ranges:\n", mesa_shader_stage_name(nir->info.stage));
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(map->push_ranges); i++)
|
for (unsigned i = 0; i < ARRAY_SIZE(map->push_ranges); i++)
|
||||||
|
|
@ -407,7 +482,7 @@ anv_nir_compute_push_layout(nir_shader *nir,
|
||||||
_mesa_sha1_compute(map->push_ranges,
|
_mesa_sha1_compute(map->push_ranges,
|
||||||
sizeof(map->push_ranges),
|
sizeof(map->push_ranges),
|
||||||
map->push_sha1);
|
map->push_sha1);
|
||||||
return false;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -416,10 +491,9 @@ anv_nir_validate_push_layout(const struct anv_physical_device *pdevice,
|
||||||
struct anv_pipeline_bind_map *map)
|
struct anv_pipeline_bind_map *map)
|
||||||
{
|
{
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
unsigned prog_data_push_size = align(prog_data->nr_params, pdevice->info.grf_size / 4) / 8;
|
unsigned prog_data_push_size = 0;
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++)
|
for (unsigned i = 0; i < 4; i++)
|
||||||
prog_data_push_size += prog_data->ubo_ranges[i].length;
|
prog_data_push_size += DIV_ROUND_UP(prog_data->push_sizes[i], 32);
|
||||||
|
|
||||||
unsigned bind_map_push_size = 0;
|
unsigned bind_map_push_size = 0;
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,22 @@ lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_subgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
|
const struct anv_physical_device *pdevice)
|
||||||
|
{
|
||||||
|
if (pdevice->info.verx10 >= 125)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
|
||||||
|
nir_def *subgroup_id =
|
||||||
|
anv_load_driver_uniform(b, 1, cs.subgroup_id);
|
||||||
|
nir_def_replace(&intrin->def, subgroup_id);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin)
|
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||||
{
|
{
|
||||||
|
|
@ -72,6 +88,8 @@ lower_driver_values(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
|
||||||
return lower_load_constant(b, intrin);
|
return lower_load_constant(b, intrin);
|
||||||
case nir_intrinsic_load_base_workgroup_id:
|
case nir_intrinsic_load_base_workgroup_id:
|
||||||
return lower_base_workgroup_id(b, intrin);
|
return lower_base_workgroup_id(b, intrin);
|
||||||
|
case nir_intrinsic_load_subgroup_id:
|
||||||
|
return lower_subgroup_id(b, intrin, data);
|
||||||
case nir_intrinsic_load_ray_query_global_intel:
|
case nir_intrinsic_load_ray_query_global_intel:
|
||||||
return lower_ray_query_globals(b, intrin);
|
return lower_ray_query_globals(b, intrin);
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,6 @@ anv_shader_internal_create(struct anv_device *device,
|
||||||
prog_data_size);
|
prog_data_size);
|
||||||
VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs,
|
VK_MULTIALLOC_DECL(&ma, struct intel_shader_reloc, prog_data_relocs,
|
||||||
prog_data_in->num_relocs);
|
prog_data_in->num_relocs);
|
||||||
VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
|
|
||||||
VK_MULTIALLOC_DECL(&ma, void, code, kernel_size);
|
VK_MULTIALLOC_DECL(&ma, void, code, kernel_size);
|
||||||
|
|
||||||
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
|
VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
|
||||||
|
|
@ -151,7 +150,6 @@ anv_shader_internal_create(struct anv_device *device,
|
||||||
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
|
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
|
||||||
prog_data_in->num_relocs);
|
prog_data_in->num_relocs);
|
||||||
prog_data->relocs = prog_data_relocs;
|
prog_data->relocs = prog_data_relocs;
|
||||||
prog_data->param = prog_data_param;
|
|
||||||
shader->prog_data = prog_data;
|
shader->prog_data = prog_data;
|
||||||
shader->prog_data_size = prog_data_size;
|
shader->prog_data_size = prog_data_size;
|
||||||
|
|
||||||
|
|
@ -210,7 +208,6 @@ anv_shader_internal_serialize(struct vk_pipeline_cache_object *object,
|
||||||
assert(shader->prog_data_size <= sizeof(prog_data));
|
assert(shader->prog_data_size <= sizeof(prog_data));
|
||||||
memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
|
memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
|
||||||
prog_data.base.relocs = NULL;
|
prog_data.base.relocs = NULL;
|
||||||
prog_data.base.param = NULL;
|
|
||||||
blob_write_bytes(blob, &prog_data, shader->prog_data_size);
|
blob_write_bytes(blob, &prog_data, shader->prog_data_size);
|
||||||
|
|
||||||
blob_write_bytes(blob, shader->prog_data->relocs,
|
blob_write_bytes(blob, shader->prog_data->relocs,
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,6 @@ anv_shader_serialize(struct vk_device *device,
|
||||||
union brw_any_prog_data prog_data;
|
union brw_any_prog_data prog_data;
|
||||||
memcpy(&prog_data, shader->prog_data, brw_prog_data_size(vk_shader->stage));
|
memcpy(&prog_data, shader->prog_data, brw_prog_data_size(vk_shader->stage));
|
||||||
prog_data.base.relocs = NULL;
|
prog_data.base.relocs = NULL;
|
||||||
prog_data.base.param = NULL;
|
|
||||||
|
|
||||||
blob_write_bytes(blob, &prog_data, brw_prog_data_size(vk_shader->stage));
|
blob_write_bytes(blob, &prog_data, brw_prog_data_size(vk_shader->stage));
|
||||||
|
|
||||||
|
|
@ -584,9 +583,6 @@ anv_shader_create(struct anv_device *device,
|
||||||
const uint32_t cmd_data_dwords = anv_genX(device->info, shader_cmd_size)(
|
const uint32_t cmd_data_dwords = anv_genX(device->info, shader_cmd_size)(
|
||||||
device, stage);
|
device, stage);
|
||||||
|
|
||||||
/* We never need this at runtime */
|
|
||||||
shader_data->prog_data.base.param = NULL;
|
|
||||||
|
|
||||||
VK_MULTIALLOC(ma);
|
VK_MULTIALLOC(ma);
|
||||||
VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1);
|
VK_MULTIALLOC_DECL(&ma, struct anv_shader, shader, 1);
|
||||||
VK_MULTIALLOC_DECL(&ma, uint32_t, cmd_data, cmd_data_dwords);
|
VK_MULTIALLOC_DECL(&ma, uint32_t, cmd_data, cmd_data_dwords);
|
||||||
|
|
|
||||||
|
|
@ -1473,8 +1473,6 @@ anv_shader_lower_nir(struct anv_device *device,
|
||||||
dynamic_descriptors_offsets,
|
dynamic_descriptors_offsets,
|
||||||
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
|
&shader_data->bind_map, &shader_data->push_map, mem_ctx);
|
||||||
|
|
||||||
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
|
|
||||||
|
|
||||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
|
||||||
anv_nir_ubo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
anv_nir_ubo_addr_format(pdevice, shader_data->key.base.robust_flags));
|
||||||
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
|
||||||
|
|
@ -1545,28 +1543,6 @@ anv_shader_lower_nir(struct anv_device *device,
|
||||||
NIR_PASS(_, nir, nir_opt_dce);
|
NIR_PASS(_, nir, nir_opt_dce);
|
||||||
}
|
}
|
||||||
|
|
||||||
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
|
|
||||||
|
|
||||||
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
|
||||||
pdevice, shader_data->key.base.robust_flags,
|
|
||||||
&(struct anv_nir_push_layout_info) {
|
|
||||||
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
|
||||||
shader_data->key.tcs.separate_tess_vue_layout) ||
|
|
||||||
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
|
||||||
shader_data->key.tes.separate_tess_vue_layout),
|
|
||||||
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
||||||
brw_wm_prog_key_is_dynamic(&shader_data->key.wm),
|
|
||||||
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
||||||
shader_data->key.wm.mesh_input == INTEL_SOMETIMES,
|
|
||||||
},
|
|
||||||
&shader_data->key.base,
|
|
||||||
&shader_data->prog_data.base,
|
|
||||||
&shader_data->bind_map, &shader_data->push_map,
|
|
||||||
mem_ctx);
|
|
||||||
|
|
||||||
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
|
||||||
shader_data->bind_map.layout_type);
|
|
||||||
|
|
||||||
if (mesa_shader_stage_uses_workgroup(nir->info.stage)) {
|
if (mesa_shader_stage_uses_workgroup(nir->info.stage)) {
|
||||||
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
|
||||||
nir_var_mem_shared, shared_type_info);
|
nir_var_mem_shared, shared_type_info);
|
||||||
|
|
@ -1597,6 +1573,30 @@ anv_shader_lower_nir(struct anv_device *device,
|
||||||
&shader_data->prog_data.cs);
|
&shader_data->prog_data.cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NIR_PASS(_, nir, anv_nir_lower_driver_values, pdevice);
|
||||||
|
|
||||||
|
NIR_PASS(_, nir, anv_nir_update_resource_intel_block);
|
||||||
|
|
||||||
|
NIR_PASS(_, nir, anv_nir_compute_push_layout,
|
||||||
|
pdevice, shader_data->key.base.robust_flags,
|
||||||
|
&(struct anv_nir_push_layout_info) {
|
||||||
|
.separate_tessellation = (nir->info.stage == MESA_SHADER_TESS_CTRL &&
|
||||||
|
shader_data->key.tcs.separate_tess_vue_layout) ||
|
||||||
|
(nir->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||||
|
shader_data->key.tes.separate_tess_vue_layout),
|
||||||
|
.fragment_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
|
brw_wm_prog_key_is_dynamic(&shader_data->key.wm),
|
||||||
|
.mesh_dynamic = nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
|
shader_data->key.wm.mesh_input == INTEL_SOMETIMES,
|
||||||
|
},
|
||||||
|
&shader_data->key.base,
|
||||||
|
&shader_data->prog_data.base,
|
||||||
|
&shader_data->bind_map, &shader_data->push_map,
|
||||||
|
mem_ctx);
|
||||||
|
|
||||||
|
NIR_PASS(_, nir, anv_nir_lower_resource_intel, pdevice,
|
||||||
|
shader_data->bind_map.layout_type);
|
||||||
|
|
||||||
shader_data->push_desc_info.push_set_buffer =
|
shader_data->push_desc_info.push_set_buffer =
|
||||||
anv_nir_loads_push_desc_buffer(
|
anv_nir_loads_push_desc_buffer(
|
||||||
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
nir, set_layouts, set_layout_count, &shader_data->bind_map);
|
||||||
|
|
|
||||||
|
|
@ -448,24 +448,16 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
||||||
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
const struct anv_pipeline_bind_map *bind_map = &shader->bind_map;
|
||||||
struct anv_push_constants *push = &gfx->base.push_constants;
|
struct anv_push_constants *push = &gfx->base.push_constants;
|
||||||
|
|
||||||
unsigned ubo_range_index = 0;
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
const struct anv_push_range *range = &bind_map->push_ranges[i];
|
const struct anv_push_range *range = &bind_map->push_ranges[i];
|
||||||
if (range->length == 0)
|
if (range->length == 0)
|
||||||
continue;
|
break;
|
||||||
|
|
||||||
/* Skip any push ranges that were not promoted from UBOs */
|
/* Skip any push ranges that were not promoted from UBOs */
|
||||||
if (range->set >= MAX_SETS) {
|
if (range->set >= MAX_SETS)
|
||||||
/* The indexing in prog_data->robust_ubo_ranges is based off
|
|
||||||
* prog_data->ubo_ranges which does not include the
|
|
||||||
* prog_data->nr_params (Vulkan push constants).
|
|
||||||
*/
|
|
||||||
if (range->set != ANV_DESCRIPTOR_SET_PUSH_CONSTANTS)
|
|
||||||
ubo_range_index++;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
assert(shader->prog_data->robust_ubo_ranges & (1 << ubo_range_index));
|
assert(shader->prog_data->robust_ubo_ranges & (1 << i));
|
||||||
|
|
||||||
unsigned bound_size =
|
unsigned bound_size =
|
||||||
get_push_range_bound_size(cmd_buffer, shader, range);
|
get_push_range_bound_size(cmd_buffer, shader, range);
|
||||||
|
|
@ -482,14 +474,12 @@ cmd_buffer_flush_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update the pushed bound length constant if it changed */
|
/* Update the pushed bound length constant if it changed */
|
||||||
if (range_mask != push->gfx.push_reg_mask[stage][ubo_range_index]) {
|
if (range_mask != push->gfx.push_reg_mask[stage][i]) {
|
||||||
push->gfx.push_reg_mask[stage][ubo_range_index] = range_mask;
|
push->gfx.push_reg_mask[stage][i] = range_mask;
|
||||||
cmd_buffer->state.push_constants_dirty |=
|
cmd_buffer->state.push_constants_dirty |=
|
||||||
mesa_to_vk_shader_stage(stage);
|
mesa_to_vk_shader_stage(stage);
|
||||||
gfx->base.push_constants_data_dirty = true;
|
gfx->base.push_constants_data_dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
ubo_range_index++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,10 +26,10 @@
|
||||||
|
|
||||||
#include "genxml/gen_macros.h"
|
#include "genxml/gen_macros.h"
|
||||||
|
|
||||||
#define load_param(b, bit_size, struct_name, field_name) \
|
#define load_param(b, bit_size, struct_name, field_name) \
|
||||||
nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
|
nir_load_push_data_intel(b, 1, bit_size, nir_imm_int(b, 0), \
|
||||||
.base = offsetof(struct_name, field_name), \
|
.base = offsetof(struct_name, field_name), \
|
||||||
.range = bit_size / 8)
|
.range = bit_size / 8)
|
||||||
|
|
||||||
static nir_def *
|
static nir_def *
|
||||||
load_fragment_index(nir_builder *b)
|
load_fragment_index(nir_builder *b)
|
||||||
|
|
|
||||||
|
|
@ -1064,9 +1064,7 @@ emit_ps_shader(struct anv_batch *batch,
|
||||||
ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(shader);
|
ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(shader);
|
||||||
ps.BindingTableEntryCount = shader->bind_map.surface_count;
|
ps.BindingTableEntryCount = shader->bind_map.surface_count;
|
||||||
#if GFX_VER < 20
|
#if GFX_VER < 20
|
||||||
ps.PushConstantEnable =
|
ps.PushConstantEnable = wm_prog_data->base.push_sizes[0] > 0;
|
||||||
wm_prog_data->base.nr_params > 0 ||
|
|
||||||
wm_prog_data->base.ubo_ranges[0].length;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
|
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
|
||||||
|
|
|
||||||
|
|
@ -205,8 +205,7 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
|
||||||
|
|
||||||
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
||||||
#if GFX_VER < 20
|
#if GFX_VER < 20
|
||||||
ps.PushConstantEnable = prog_data->base.nr_params > 0 ||
|
ps.PushConstantEnable = prog_data->base.push_sizes[0] > 0;
|
||||||
prog_data->base.ubo_ranges[0].length;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue