mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 11:00:11 +01:00
i965: Handle mix-and-match TCS/TES with separate shader objects.
GL_ARB_separate_shader_objects allows the application to mix-and-match
TCS and TES programs separately. This means that the interface between
the two stages isn't known until the final SSO pipeline is in place.
This isn't a great match for our hardware: the TCS and TES have to agree
on the Patch URB entry layout. Since we store data as per-patch slots
followed by per-vertex slots, changing the number of per-patch slots can
significantly alter the layout. This can easily happen with SSO.
To handle this, we store the [Patch]OutputsWritten and [Patch]InputsRead
bitfields in the TCS/TES program keys, introducing program recompiles.
brw_upload_programs() decides the layout for both TCS and TES, and
passes it to brw_upload_tcs/tes(), which store it in the key.
When creating the NIR for a shader specialization, we override
nir->info.inputs_read (and friends) to the program key's values.
Since everything uses those, no further compiler changes are needed.
This also replaces the hack in brw_create_nir().
To avoid recompiles, brw_precompile_tes() looks to see if there's a
TCS in the linked shader. If so, it accounts for the TCS outputs,
just as brw_upload_programs() would. This eliminates all recompiles
in the non-SSO case. In the SSO case, there should only be recompiles
when using a TCS and TES that have different input/output interfaces.
Fixes Piglit's mix-and-match-tcs-tes test.
v2: Pull the brw_upload_programs code into a brw_upload_tess_programs()
helper function (requested by Jordan Justen).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
parent
01b1b44d31
commit
794eb9d727
8 changed files with 87 additions and 24 deletions
|
|
@ -200,6 +200,9 @@ struct brw_tcs_prog_key
|
|||
|
||||
unsigned input_vertices;
|
||||
|
||||
/** A bitfield of per-patch outputs written. */
|
||||
uint32_t patch_outputs_written;
|
||||
|
||||
/** A bitfield of per-vertex outputs written. */
|
||||
uint64_t outputs_written;
|
||||
|
||||
|
|
@ -211,6 +214,12 @@ struct brw_tes_prog_key
|
|||
{
|
||||
unsigned program_string_id;
|
||||
|
||||
/** A bitfield of per-patch inputs read. */
|
||||
uint32_t patch_inputs_read;
|
||||
|
||||
/** A bitfield of per-vertex inputs read. */
|
||||
uint64_t inputs_read;
|
||||
|
||||
struct brw_sampler_prog_key_data tex;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -618,17 +618,6 @@ brw_create_nir(struct brw_context *brw,
|
|||
/* First, lower the GLSL IR or Mesa IR to NIR */
|
||||
if (shader_prog) {
|
||||
nir = glsl_to_nir(shader_prog, stage, options);
|
||||
|
||||
if (nir->stage == MESA_SHADER_TESS_EVAL &&
|
||||
shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
|
||||
const struct gl_program *tcs =
|
||||
shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
|
||||
/* Work around the TCS having bonus outputs used as shared memory
|
||||
* segments, which makes OutputsWritten not match InputsRead
|
||||
*/
|
||||
nir->info.inputs_read = tcs->OutputsWritten;
|
||||
nir->info.patch_inputs_read = tcs->PatchOutputsWritten;
|
||||
}
|
||||
} else {
|
||||
nir = prog_to_nir(prog, options);
|
||||
OPT_V(nir_convert_to_ssa); /* turn registers into SSA */
|
||||
|
|
|
|||
|
|
@ -56,8 +56,10 @@ void
|
|||
brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
|
||||
struct gl_shader *shader, struct gl_program *prog);
|
||||
|
||||
void brw_upload_tcs_prog(struct brw_context *brw);
|
||||
void brw_upload_tes_prog(struct brw_context *brw);
|
||||
void brw_upload_tcs_prog(struct brw_context *brw,
|
||||
uint64_t per_vertex_slots, uint32_t per_patch_slots);
|
||||
void brw_upload_tes_prog(struct brw_context *brw,
|
||||
uint64_t per_vertex_slots, uint32_t per_patch_slots);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
|
|
|||
|
|
@ -1329,6 +1329,8 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
|
||||
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
|
||||
nir->info.inputs_read = key->inputs_read;
|
||||
nir->info.patch_inputs_read = key->patch_inputs_read;
|
||||
nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
|
||||
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
|
||||
|
||||
|
|
|
|||
|
|
@ -673,21 +673,41 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_upload_tess_programs(struct brw_context *brw)
|
||||
{
|
||||
if (brw->tess_eval_program) {
|
||||
uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
|
||||
uint32_t per_patch_slots =
|
||||
brw->tess_eval_program->Base.PatchInputsRead;
|
||||
|
||||
/* The TCS may have additional outputs which aren't read by the
|
||||
* TES (possibly for cross-thread communication). These need to
|
||||
* be stored in the Patch URB Entry as well.
|
||||
*/
|
||||
if (brw->tess_ctrl_program) {
|
||||
per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
|
||||
per_patch_slots |=
|
||||
brw->tess_ctrl_program->Base.PatchOutputsWritten;
|
||||
}
|
||||
|
||||
brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
|
||||
brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
|
||||
} else {
|
||||
brw->tcs.prog_data = NULL;
|
||||
brw->tcs.base.prog_data = NULL;
|
||||
brw->tes.prog_data = NULL;
|
||||
brw->tes.base.prog_data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_upload_programs(struct brw_context *brw,
|
||||
enum brw_pipeline pipeline)
|
||||
{
|
||||
if (pipeline == BRW_RENDER_PIPELINE) {
|
||||
brw_upload_vs_prog(brw);
|
||||
if (brw->tess_eval_program) {
|
||||
brw_upload_tcs_prog(brw);
|
||||
brw_upload_tes_prog(brw);
|
||||
} else {
|
||||
brw->tcs.prog_data = NULL;
|
||||
brw->tcs.base.prog_data = NULL;
|
||||
brw->tes.prog_data = NULL;
|
||||
brw->tes.base.prog_data = NULL;
|
||||
}
|
||||
brw_upload_tess_programs(brw);
|
||||
|
||||
if (brw->gen < 6)
|
||||
brw_upload_ff_gs_prog(brw);
|
||||
|
|
|
|||
|
|
@ -67,6 +67,10 @@ brw_tcs_debug_recompile(struct brw_context *brw,
|
|||
|
||||
found |= key_debug(brw, "input vertices", old_key->input_vertices,
|
||||
key->input_vertices);
|
||||
found |= key_debug(brw, "outputs written", old_key->outputs_written,
|
||||
key->outputs_written);
|
||||
found |= key_debug(brw, "patch outputs written", old_key->patch_outputs_written,
|
||||
key->patch_outputs_written);
|
||||
found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
|
||||
key->tes_primitive_mode);
|
||||
found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
|
||||
|
|
@ -224,7 +228,9 @@ brw_codegen_tcs_prog(struct brw_context *brw,
|
|||
|
||||
|
||||
void
|
||||
brw_upload_tcs_prog(struct brw_context *brw)
|
||||
brw_upload_tcs_prog(struct brw_context *brw,
|
||||
uint64_t per_vertex_slots,
|
||||
uint32_t per_patch_slots)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
|
||||
|
|
@ -248,6 +254,8 @@ brw_upload_tcs_prog(struct brw_context *brw)
|
|||
memset(&key, 0, sizeof(key));
|
||||
|
||||
key.input_vertices = ctx->TessCtrlProgram.patch_vertices;
|
||||
key.outputs_written = per_vertex_slots;
|
||||
key.patch_outputs_written = per_patch_slots;
|
||||
|
||||
/* We need to specialize our code generation for tessellation levels
|
||||
* based on the domain the DS is expecting to tessellate.
|
||||
|
|
@ -301,6 +309,9 @@ brw_tcs_precompile(struct gl_context *ctx,
|
|||
|
||||
key.tes_primitive_mode = GL_TRIANGLES;
|
||||
|
||||
key.outputs_written = prog->OutputsWritten;
|
||||
key.patch_outputs_written = prog->PatchOutputsWritten;
|
||||
|
||||
success = brw_codegen_tcs_prog(brw, shader_prog, btcp, &key);
|
||||
|
||||
brw->tcs.base.prog_offset = old_prog_offset;
|
||||
|
|
|
|||
|
|
@ -66,6 +66,10 @@ brw_tes_debug_recompile(struct brw_context *brw,
|
|||
}
|
||||
|
||||
found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
|
||||
found |= key_debug(brw, "inputs read", old_key->inputs_read,
|
||||
key->inputs_read);
|
||||
found |= key_debug(brw, "patch inputs read", old_key->patch_inputs_read,
|
||||
key->patch_inputs_read);
|
||||
|
||||
if (!found) {
|
||||
perf_debug(" Something else\n");
|
||||
|
|
@ -226,7 +230,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
|
|||
|
||||
|
||||
void
|
||||
brw_upload_tes_prog(struct brw_context *brw)
|
||||
brw_upload_tes_prog(struct brw_context *brw,
|
||||
uint64_t per_vertex_slots,
|
||||
uint32_t per_patch_slots)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
|
||||
|
|
@ -247,6 +253,14 @@ brw_upload_tes_prog(struct brw_context *brw)
|
|||
|
||||
key.program_string_id = tep->id;
|
||||
|
||||
/* Ignore gl_TessLevelInner/Outer - we treat them as system values,
|
||||
* not inputs, and they're always present in the URB entry regardless
|
||||
* of whether or not we read them.
|
||||
*/
|
||||
key.inputs_read = per_vertex_slots &
|
||||
~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
|
||||
key.patch_inputs_read = per_patch_slots;
|
||||
|
||||
/* _NEW_TEXTURE */
|
||||
brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
|
||||
&key.tex);
|
||||
|
|
@ -280,6 +294,20 @@ brw_tes_precompile(struct gl_context *ctx,
|
|||
memset(&key, 0, sizeof(key));
|
||||
|
||||
key.program_string_id = btep->id;
|
||||
key.inputs_read = prog->InputsRead;
|
||||
key.patch_inputs_read = prog->PatchInputsRead;
|
||||
|
||||
if (shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
|
||||
struct gl_program *tcp =
|
||||
shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
|
||||
key.inputs_read |= tcp->OutputsWritten;
|
||||
key.patch_inputs_read |= tcp->PatchOutputsWritten;
|
||||
}
|
||||
|
||||
/* Ignore gl_TessLevelInner/Outer - they're system values. */
|
||||
key.inputs_read &= ~(VARYING_BIT_TESS_LEVEL_INNER |
|
||||
VARYING_BIT_TESS_LEVEL_OUTER);
|
||||
|
||||
brw_setup_tex_for_precompile(brw, &key.tex, prog);
|
||||
|
||||
success = brw_codegen_tes_prog(brw, shader_prog, btep, &key);
|
||||
|
|
|
|||
|
|
@ -476,6 +476,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
|
||||
nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
|
||||
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
|
||||
nir->info.outputs_written = key->outputs_written;
|
||||
nir->info.patch_outputs_written = key->patch_outputs_written;
|
||||
nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
|
||||
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue