From 9d9965c06f18f7d609933e511b98514570e85909 Mon Sep 17 00:00:00 2001 From: Plamena Manolova Date: Tue, 15 Mar 2016 20:39:49 +0200 Subject: [PATCH 01/26] mesa: Ignore glPointSize when GL_POINT_SIZE_ARRAY_OES is enabled When a user defines a point size array and enables it, the point size value set via glPointSize should be ignored. To achieve this, we can simply toggle ctx->VertexProgram.PointSizeEnabled. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=42187 Signed-off-by: Plamena Manolova Reviewed-by: Kenneth Graunke --- src/mesa/main/enable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 3985457f21a..b90a60ba03f 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -105,6 +105,8 @@ client_state(struct gl_context *ctx, GLenum cap, GLboolean state) case GL_POINT_SIZE_ARRAY_OES: var = &vao->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled; flag = VERT_BIT_POINT_SIZE; + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + ctx->VertexProgram.PointSizeEnabled = state; break; /* GL_NV_primitive_restart */ From b566317e7e6eb78a22cac759a4af2e9d78f74a32 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Mar 2016 11:13:25 -0700 Subject: [PATCH 02/26] meta: Use ARB_explicit_attrib_location in the rest of the meta shaders. This is cleaner than using glBindAttribLocation(). Not all drivers support the extension, but I don't think those drivers use GLSL in the first place. Apparently some Meta shaders already use GL_ARB_explicit_attrib_location, so I think it should be okay. Honestly, I'm not sure how the old code worked anyway - we bound the attribute location for "texcoords", while all the shaders capitalized or spelled it differently. v2: Convert another instance in brw_meta_fast_clear.c. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/common/meta.c | 17 ++++++----------- src/mesa/drivers/common/meta_blit.c | 15 +++++++++------ src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 4 ++-- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index ab78f4565da..b05dfc725f5 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -207,8 +207,6 @@ _mesa_meta_compile_and_link_program(struct gl_context *ctx, _mesa_DeleteShader(fs); _mesa_AttachShader(*program, vs); _mesa_DeleteShader(vs); - _mesa_BindAttribLocation(*program, 0, "position"); - _mesa_BindAttribLocation(*program, 1, "texcoords"); _mesa_meta_link_program_with_debug(ctx, *program); _mesa_UseProgram(*program); @@ -230,19 +228,15 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx, { char *vs_source, *fs_source; struct blit_shader *shader = choose_blit_shader(target, table); - const char *vs_input, *vs_output, *fs_input, *vs_preprocess, *fs_preprocess; + const char *fs_input, *vs_preprocess, *fs_preprocess; void *mem_ctx; if (ctx->Const.GLSLVersion < 130) { vs_preprocess = ""; - vs_input = "attribute"; - vs_output = "varying"; fs_preprocess = "#extension GL_EXT_texture_array : enable"; fs_input = "varying"; } else { vs_preprocess = "#version 130"; - vs_input = "in"; - vs_output = "out"; fs_preprocess = "#version 130"; fs_input = "in"; shader->func = "texture"; @@ -259,15 +253,16 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx, vs_source = ralloc_asprintf(mem_ctx, "%s\n" - "%s vec2 position;\n" - "%s vec4 textureCoords;\n" - "%s vec4 texCoords;\n" + "#extension GL_ARB_explicit_attrib_location: enable\n" + "layout(location = 0) in vec2 position;\n" + "layout(location = 1) in vec4 textureCoords;\n" + "out vec4 texCoords;\n" "void main()\n" "{\n" " texCoords = textureCoords;\n" " gl_Position = vec4(position, 0.0, 1.0);\n" "}\n", - vs_preprocess, vs_input, vs_input, vs_output); + vs_preprocess); fs_source = ralloc_asprintf(mem_ctx, "%s\n" diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index 5d80f7d8fe1..179dc0d8630 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -168,8 +168,9 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, static const char vs_source[] = "#version 130\n" - "in vec2 position;\n" - "in vec3 textureCoords;\n" + "#extension GL_ARB_explicit_attrib_location: enable\n" + "layout(location = 0) in vec2 position;\n" + "layout(location = 1) in vec3 textureCoords;\n" "out vec2 texCoords;\n" "flat out int layer;\n" "void main()\n" @@ -384,8 +385,9 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, vs_source = ralloc_asprintf(mem_ctx, "#version 130\n" - "in vec2 position;\n" - "in %s textureCoords;\n" + "#extension GL_ARB_explicit_attrib_location: enable\n" + "layout(location = 0) in vec2 position;\n" + "layout(location = 1) in %s textureCoords;\n" "out %s texCoords;\n" "void main()\n" "{\n" @@ -506,8 +508,9 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, vs_source = ralloc_asprintf(mem_ctx, "#version 130\n" - "in vec2 position;\n" - "in %s textureCoords;\n" + "#extension GL_ARB_explicit_attrib_location: enable\n" + "layout(location = 0) in vec2 position;\n" + "layout(location = 1) in %s textureCoords;\n" "out %s texCoords;\n" "void main()\n" "{\n" diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index b81b1438ba3..fe61c1654d2 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -110,7 +110,8 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) const char *vs_source = "#extension GL_AMD_vertex_shader_layer : enable\n" "#extension GL_ARB_draw_instanced : enable\n" - "attribute vec4 position;\n" + "#extension GL_ARB_explicit_attrib_location : enable\n" + "layout(location = 0) in vec4 position;\n" "uniform int layer;\n" "void main()\n" "{\n" @@ -144,7 +145,6 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) _mesa_DeleteShader(fs); _mesa_AttachShader(clear->shader_prog, vs); _mesa_DeleteShader(vs); - _mesa_BindAttribLocation(clear->shader_prog, 0, "position"); _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear"); _mesa_LinkProgram(clear->shader_prog); From a298fb15afa91d866c78f80fbd71bafeb9f36fda Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Mar 2016 18:50:32 -0700 Subject: [PATCH 03/26] vc4: Reuse list_for_each_entry_safe_rev(). This didn't exist when I wrote the code. --- src/gallium/drivers/vc4/vc4_opt_dead_code.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index 9e79a2d1b2e..44d0779c611 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -86,12 +86,8 @@ qir_opt_dead_code(struct vc4_compile *c) /* Whether we're eliminating texture setup currently. */ bool dce_tex = false; - struct list_head *node, *t; - for (node = c->instructions.prev, t = node->prev; - &c->instructions != node; - node = t, t = t->prev) { - struct qinst *inst = (struct qinst *)node; - + list_for_each_entry_safe_rev(struct qinst, inst, &c->instructions, + link) { if (inst->dst.file == QFILE_TEMP && !used[inst->dst.index] && !inst->sf && From b4f45f319cc5cb833b4581447c45656cdeed025a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Mar 2016 18:57:20 -0700 Subject: [PATCH 04/26] vc4: Add a safety check for setting flags. If a pack was on the src reg, should it be a float, int, or mul unpack? Just complain, instead. --- src/gallium/drivers/vc4/vc4_qir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 65f0067c61e..fd1192f340c 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -488,6 +488,9 @@ qir_SF(struct vc4_compile *c, struct qreg src) if (!list_empty(&c->instructions)) last_inst = (struct qinst *)c->instructions.prev; + /* We don't have any way to guess which kind of MOV is implied. */ + assert(!src.pack); + if (src.file != QFILE_TEMP || !c->defs[src.index] || last_inst != c->defs[src.index] || From 7c9fc439150188612c7fe595cbe0180fcea3e705 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Mar 2016 18:58:43 -0700 Subject: [PATCH 05/26] vc4: Don't make a temporary for setting flags. The register allocator doesn't really do anything about the temp, so it doesn't seem like it should matter. However, the scheduler would think that a new def is being created. This doesn't change anything yet, but it avoids a bunch of regressions in the next commit. --- src/gallium/drivers/vc4/vc4_qir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index fd1192f340c..125a9525e1b 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -495,7 +495,8 @@ qir_SF(struct vc4_compile *c, struct qreg src) !c->defs[src.index] || last_inst != c->defs[src.index] || qir_is_multi_instruction(last_inst)) { - src = qir_MOV(c, src); + struct qreg null = { QFILE_NULL, 0 }; + last_inst = qir_MOV_dest(c, null, src); last_inst = (struct qinst *)c->instructions.prev; } last_inst->sf = true; From 2b9f0dffe00bdc556436da02c099b8a50ecc4f49 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Mar 2016 18:00:22 -0700 Subject: [PATCH 06/26] vc4: Move discard handling to the condition flag. Now that the field exists in the instruction, we can make discards less special. As a bonus, that means that we should be able to merge some more .sf instructions together when we get around to that. This causes some scheduling changes, as it allows tlb_color_reads to be delayed past the discard condition setup. Since the tlb_color_read ends up later, this may mean performance improvements, but I haven't tested. total instructions in shared programs: 78114 -> 78035 (-0.10%) instructions in affected programs: 1922 -> 1843 (-4.11%) total estimated cycles in shared programs: 234318 -> 234329 (0.00%) estimated cycles in affected programs: 8200 -> 8211 (0.13%) --- src/gallium/drivers/vc4/vc4_program.c | 19 +++++++++++++------ src/gallium/drivers/vc4/vc4_qir.c | 1 - src/gallium/drivers/vc4/vc4_qir.h | 16 ++++++++++------ src/gallium/drivers/vc4/vc4_qir_schedule.c | 5 ----- src/gallium/drivers/vc4/vc4_qpu_emit.c | 22 ++++++---------------- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 81e8e9150d6..f5826d85174 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1184,8 +1184,11 @@ emit_frag_end(struct vc4_compile *c) color = qir_uniform_ui(c, 0); } - if (c->discard.file != QFILE_NULL) - qir_TLB_DISCARD_SETUP(c, c->discard); + uint32_t discard_cond = QPU_COND_ALWAYS; + if (c->discard.file != QFILE_NULL) { + qir_SF(c, c->discard); + discard_cond = QPU_COND_ZS; + } if (c->fs_key->stencil_enabled) { qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0)); @@ -1209,14 +1212,18 @@ emit_frag_end(struct vc4_compile *c) } else { z = qir_FRAG_Z(c); } - qir_TLB_Z_WRITE(c, z); + struct qinst *inst = qir_TLB_Z_WRITE(c, z); + inst->cond = discard_cond; } if (!c->msaa_per_sample_output) { - qir_TLB_COLOR_WRITE(c, color); + struct qinst *inst = qir_TLB_COLOR_WRITE(c, color); + inst->cond = discard_cond; } else { - for (int i = 0; i < VC4_MAX_SAMPLES; i++) - qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]); + for (int i = 0; i < VC4_MAX_SAMPLES; i++) { + struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]); + inst->cond = discard_cond; + } } } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 125a9525e1b..e73e3899410 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -69,7 +69,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_RSQ] = { "rsq", 1, 1, false, true }, [QOP_EXP2] = { "exp2", 1, 2, false, true }, [QOP_LOG2] = { "log2", 1, 2, false, true }, - [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4f39d72f552..3fbf5d749e7 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -101,7 +101,6 @@ enum qop { QOP_LOG2, QOP_VW_SETUP, QOP_VR_SETUP, - QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, QOP_TLB_Z_WRITE, QOP_TLB_COLOR_WRITE, @@ -551,17 +550,23 @@ qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ } #define QIR_NODST_1(name) \ -static inline void \ +static inline struct qinst * \ qir_##name(struct vc4_compile *c, struct qreg a) \ { \ - qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ + struct qinst *inst = qir_inst(QOP_##name, c->undef, \ + a, c->undef); \ + qir_emit(c, inst); \ + return inst; \ } #define QIR_NODST_2(name) \ -static inline void \ +static inline struct qinst * \ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ { \ - qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ + struct qinst *inst = qir_inst(QOP_##name, c->undef, \ + a, b); \ + qir_emit(c, inst); \ + return inst; \ } #define QIR_PACK(name) \ @@ -623,7 +628,6 @@ QIR_ALU0(TLB_COLOR_READ) QIR_NODST_1(TLB_COLOR_WRITE) QIR_NODST_1(TLB_COLOR_WRITE_MS) QIR_NODST_1(TLB_Z_WRITE) -QIR_NODST_1(TLB_DISCARD_SETUP) QIR_NODST_1(TLB_STENCIL_SETUP) QIR_NODST_1(MS_MASK) diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index ee1e9aafbb9..186e81be750 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -236,11 +236,6 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n) add_write_dep(dir, &state->last_tlb, n); break; - case QOP_TLB_DISCARD_SETUP: - add_write_dep(dir, &state->last_sf, n); - add_write_dep(dir, &state->last_tlb, n); - break; - default: break; } diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 450b97fc014..b507e370683 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -171,7 +171,6 @@ void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); - bool discard = false; uint32_t inputs_remaining = c->num_inputs; uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; @@ -375,12 +374,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) */ break; - case QOP_TLB_DISCARD_SETUP: - discard = true; - queue(c, qpu_a_MOV(src[0], src[0]) | unpack); - *last_inst(c) |= QPU_SF; - break; - case QOP_TLB_STENCIL_SETUP: assert(!unpack); queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), @@ -390,9 +383,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_Z_WRITE: queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]) | unpack); - if (discard) { - set_last_cond_add(c, QPU_COND_ZS); - } + set_last_cond_add(c, qinst->cond); + handled_qinst_cond = true; break; case QOP_TLB_COLOR_READ: @@ -406,16 +398,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_COLOR_WRITE: queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack); - if (discard) { - set_last_cond_add(c, QPU_COND_ZS); - } + set_last_cond_add(c, qinst->cond); + handled_qinst_cond = true; break; case QOP_TLB_COLOR_WRITE_MS: queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0])); - if (discard) { - set_last_cond_add(c, QPU_COND_ZS); - } + set_last_cond_add(c, qinst->cond); + handled_qinst_cond = true; break; case QOP_VARY_ADD_C: From a7e9b31d5bf98bdaabbb8b5c2459eb2c3a0af579 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Mar 2016 16:21:36 -0700 Subject: [PATCH 07/26] meta: Use the _mesa_meta_compile_and_link_program helper more places. Less boilerplate. Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/common/meta.c | 34 +++---------------- .../drivers/dri/i965/brw_meta_fast_clear.c | 14 ++------ 2 files changed, 8 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index b05dfc725f5..bdcf316e455 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -1537,21 +1537,8 @@ meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) if (clear->ShaderProg != 0) return; - vs = _mesa_CreateShader(GL_VERTEX_SHADER); - _mesa_ShaderSource(vs, 1, &vs_source, NULL); - _mesa_CompileShader(vs); - - fs = _mesa_CreateShader(GL_FRAGMENT_SHADER); - _mesa_ShaderSource(fs, 1, &fs_source, NULL); - _mesa_CompileShader(fs); - - clear->ShaderProg = _mesa_CreateProgram(); - _mesa_AttachShader(clear->ShaderProg, fs); - _mesa_DeleteShader(fs); - _mesa_AttachShader(clear->ShaderProg, vs); - _mesa_DeleteShader(vs); - _mesa_ObjectLabel(GL_PROGRAM, clear->ShaderProg, -1, "meta clear"); - _mesa_LinkProgram(clear->ShaderProg); + _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, "meta clear", + &clear->ShaderProg); has_integer_textures = _mesa_is_gles3(ctx) || (_mesa_is_desktop_gl(ctx) && ctx->Const.GLSLVersion >= 130); @@ -1585,26 +1572,15 @@ meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) " out_color = color;\n" "}\n"); - vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, - vs_int_source); - fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, - fs_int_source); + _mesa_meta_compile_and_link_program(ctx, vs_int_source, fs_int_source, + "integer clear", + &clear->IntegerShaderProg); ralloc_free(shader_source_mem_ctx); - clear->IntegerShaderProg = _mesa_CreateProgram(); - _mesa_AttachShader(clear->IntegerShaderProg, fs); - _mesa_DeleteShader(fs); - _mesa_AttachShader(clear->IntegerShaderProg, vs); - _mesa_DeleteShader(vs); - /* Note that user-defined out attributes get automatically assigned * locations starting from 0, so we don't need to explicitly * BindFragDataLocation to 0. */ - - _mesa_ObjectLabel(GL_PROGRAM, clear->IntegerShaderProg, -1, - "integer clear"); - _mesa_meta_link_program_with_debug(ctx, clear->IntegerShaderProg); } } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index fe61c1654d2..e2882da403d 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -127,7 +127,6 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) " gl_FragColor = color;\n" "}\n"; - GLuint vs, fs; struct brw_fast_clear_state *clear = brw->fast_clear_state; struct gl_context *ctx = &brw->ctx; @@ -137,16 +136,9 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) return; } - vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, vs_source); - fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, fs_source); - - clear->shader_prog = _mesa_CreateProgram(); - _mesa_AttachShader(clear->shader_prog, fs); - _mesa_DeleteShader(fs); - _mesa_AttachShader(clear->shader_prog, vs); - _mesa_DeleteShader(vs); - _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear"); - _mesa_LinkProgram(clear->shader_prog); + _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, + "meta repclear", + &clear->shader_prog); clear->color_location = _mesa_GetUniformLocation(clear->shader_prog, "color"); From a461e0003fdca564fa1ec7528c2463c50a797e85 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Mar 2016 10:24:09 -0700 Subject: [PATCH 08/26] mesa: Make compile_shader() take a gl_shader, not a GLuint. In half the callers, we already have a pointer, and don't need to look it up again. This will also help with upcoming meta work. Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/mesa/main/shaderapi.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index cdf15b48a0d..43c4188cf2f 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -960,11 +960,8 @@ shader_source(struct gl_shader *sh, const GLchar *source) * Compile a shader. */ static void -compile_shader(struct gl_context *ctx, GLuint shaderObj) +compile_shader(struct gl_context *ctx, struct gl_shader *sh) { - struct gl_shader *sh; - - sh = _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader"); if (!sh) return; @@ -1270,7 +1267,8 @@ _mesa_CompileShader(GLuint shaderObj) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glCompileShader %u\n", shaderObj); - compile_shader(ctx, shaderObj); + compile_shader(ctx, + _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader")); } @@ -2154,18 +2152,17 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count, } if (shader) { - _mesa_ShaderSource(shader, count, strings, NULL); + struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); - compile_shader(ctx, shader); + _mesa_ShaderSource(shader, count, strings, NULL); + compile_shader(ctx, sh); program = create_shader_program(ctx); if (program) { struct gl_shader_program *shProg; - struct gl_shader *sh; GLint compiled = GL_FALSE; shProg = _mesa_lookup_shader_program(ctx, program); - sh = _mesa_lookup_shader(ctx, shader); shProg->SeparateShader = GL_TRUE; From 7753657cf2cf3c1090186f9e33335a0417ce35fe Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Mar 2016 17:08:17 -0700 Subject: [PATCH 09/26] mesa: Make link_program() take a gl_shader_program, not a GLuint. In half the callers, we already have a pointer, and don't need to look it up again. This will also help with upcoming meta work. Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/mesa/main/shaderapi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 43c4188cf2f..c7b1047bea1 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -1021,11 +1021,8 @@ compile_shader(struct gl_context *ctx, struct gl_shader *sh) * Link a program's shaders. */ static void -link_program(struct gl_context *ctx, GLuint program) +link_program(struct gl_context *ctx, struct gl_shader_program *shProg) { - struct gl_shader_program *shProg; - - shProg = _mesa_lookup_shader_program_err(ctx, program, "glLinkProgram"); if (!shProg) return; @@ -1515,7 +1512,8 @@ _mesa_LinkProgram(GLuint programObj) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glLinkProgram %u\n", programObj); - link_program(ctx, programObj); + link_program(ctx, _mesa_lookup_shader_program_err(ctx, programObj, + "glLinkProgram")); } #if defined(HAVE_SHA1) @@ -2169,7 +2167,7 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count, get_shaderiv(ctx, shader, GL_COMPILE_STATUS, &compiled); if (compiled) { attach_shader(ctx, program, shader); - link_program(ctx, program); + link_program(ctx, shProg); detach_shader(ctx, program, shader); #if 0 From 0fe254168be26e71777dc2648e86976bdcd2e707 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Mar 2016 10:51:33 -0700 Subject: [PATCH 10/26] mesa: Expose compile_shader() and link_program() beyond the file. This will allow me to use them directly from Meta, bypassing the versions that work with GL integer handles. Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/mesa/main/shaderapi.c | 20 ++++++++++---------- src/mesa/main/shaderapi.h | 6 ++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index c7b1047bea1..32fad56f651 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -959,8 +959,8 @@ shader_source(struct gl_shader *sh, const GLchar *source) /** * Compile a shader. */ -static void -compile_shader(struct gl_context *ctx, struct gl_shader *sh) +void +_mesa_compile_shader(struct gl_context *ctx, struct gl_shader *sh) { if (!sh) return; @@ -1020,8 +1020,8 @@ compile_shader(struct gl_context *ctx, struct gl_shader *sh) /** * Link a program's shaders. */ -static void -link_program(struct gl_context *ctx, struct gl_shader_program *shProg) +void +_mesa_link_program(struct gl_context *ctx, struct gl_shader_program *shProg) { if (!shProg) return; @@ -1264,8 +1264,8 @@ _mesa_CompileShader(GLuint shaderObj) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glCompileShader %u\n", shaderObj); - compile_shader(ctx, - _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader")); + _mesa_compile_shader(ctx, _mesa_lookup_shader_err(ctx, shaderObj, + "glCompileShader")); } @@ -1512,8 +1512,8 @@ _mesa_LinkProgram(GLuint programObj) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glLinkProgram %u\n", programObj); - link_program(ctx, _mesa_lookup_shader_program_err(ctx, programObj, - "glLinkProgram")); + _mesa_link_program(ctx, _mesa_lookup_shader_program_err(ctx, programObj, + "glLinkProgram")); } #if defined(HAVE_SHA1) @@ -2153,7 +2153,7 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count, struct gl_shader *sh = _mesa_lookup_shader(ctx, shader); _mesa_ShaderSource(shader, count, strings, NULL); - compile_shader(ctx, sh); + _mesa_compile_shader(ctx, sh); program = create_shader_program(ctx); if (program) { @@ -2167,7 +2167,7 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count, get_shaderiv(ctx, shader, GL_COMPILE_STATUS, &compiled); if (compiled) { attach_shader(ctx, program, shader); - link_program(ctx, shProg); + _mesa_link_program(ctx, shProg); detach_shader(ctx, program, shader); #if 0 diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h index 8922c4d0640..d2d7f16ec7c 100644 --- a/src/mesa/main/shaderapi.h +++ b/src/mesa/main/shaderapi.h @@ -54,6 +54,12 @@ extern void _mesa_active_program(struct gl_context *ctx, struct gl_shader_program *shProg, const char *caller); +extern void +_mesa_compile_shader(struct gl_context *ctx, struct gl_shader *sh); + +extern void +_mesa_link_program(struct gl_context *ctx, struct gl_shader_program *sh_prog); + extern unsigned _mesa_count_active_attribs(struct gl_shader_program *shProg); From 9c1e01c4a883ac4a738f6f8c17c0236621101e28 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Mar 2016 10:51:55 -0700 Subject: [PATCH 11/26] meta: Don't use integer handles for shaders or programs. Previously, we gave our internal clear/blit shaders actual GL handles and stored them in the shader/program hash table. We used ordinary GL API entrypoints to work with them. We thought this shouldn't be a problem because GL doesn't allow applications to invent their own names for shaders or programs. GL allocates all names via glCreateShader and glCreateProgram. However, having them in the hash table is a bit risky: if a broken application guesses the name of our shaders or programs, it could alter them, potentially screwing up future meta operations. Also, test cases can observe the programs in the hash table. Running a single dEQP process that executes the following test list: dEQP-GLES3.functional.negative_api.buffer.clear dEQP-GLES3.functional.negative_api.shader.compile_shader dEQP-GLES3.functional.negative_api.shader.delete_shader would result in the last two tests breaking. The compile_shader test calls glCompileShader(9) straight away, and since it hasn't even created any shaders or programs, it expects to get a GL_INVALID_VALUE error because there's no such name. However, because the clear test ran first, it created Meta programs, so an object named "9" did exist. This patch reworks Meta to work with gl_shader and gl_shader_program pointers directly. These internal programs have bogus names, and are never stored in the hash tables, so they're invisible to applications. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94485 Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/common/meta.c | 155 +++++++----------- src/mesa/drivers/common/meta.h | 24 ++- src/mesa/drivers/common/meta_blit.c | 16 +- .../drivers/common/meta_generate_mipmap.c | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 2 +- .../drivers/dri/i965/brw_meta_fast_clear.c | 11 +- .../drivers/dri/i965/brw_meta_stencil_blit.c | 59 ++++--- 7 files changed, 126 insertions(+), 143 deletions(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index bdcf316e455..b673db44b0b 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -121,72 +121,51 @@ _mesa_meta_framebuffer_texture_image(struct gl_context *ctx, level, layer, false, __func__); } -GLuint +struct gl_shader * _mesa_meta_compile_shader_with_debug(struct gl_context *ctx, GLenum target, const GLcharARB *source) { - GLuint shader; - GLint ok, size; - GLchar *info; + const GLuint name = ~0; + struct gl_shader *sh; - shader = _mesa_CreateShader(target); - _mesa_ShaderSource(shader, 1, &source, NULL); - _mesa_CompileShader(shader); + sh = ctx->Driver.NewShader(ctx, name, target); + sh->Source = strdup(source); + sh->CompileStatus = false; + _mesa_compile_shader(ctx, sh); - _mesa_GetShaderiv(shader, GL_COMPILE_STATUS, &ok); - if (ok) - return shader; + if (!sh->CompileStatus) { + if (sh->InfoLog) { + _mesa_problem(ctx, + "meta program compile failed:\n%s\nsource:\n%s\n", + sh->InfoLog, source); + } - _mesa_GetShaderiv(shader, GL_INFO_LOG_LENGTH, &size); - if (size == 0) { - _mesa_DeleteShader(shader); - return 0; + _mesa_reference_shader(ctx, &sh, NULL); } - info = malloc(size); - if (!info) { - _mesa_DeleteShader(shader); - return 0; - } - - _mesa_GetShaderInfoLog(shader, size, NULL, info); - _mesa_problem(ctx, - "meta program compile failed:\n%s\n" - "source:\n%s\n", - info, source); - - free(info); - _mesa_DeleteShader(shader); - - return 0; + return sh; } -GLuint -_mesa_meta_link_program_with_debug(struct gl_context *ctx, GLuint program) +void +_mesa_meta_link_program_with_debug(struct gl_context *ctx, + struct gl_shader_program *sh_prog) { - GLint ok, size; - GLchar *info; + _mesa_link_program(ctx, sh_prog); - _mesa_LinkProgram(program); + if (!sh_prog->LinkStatus) { + _mesa_problem(ctx, "meta program link failed:\n%s", sh_prog->InfoLog); + } +} - _mesa_GetProgramiv(program, GL_LINK_STATUS, &ok); - if (ok) - return program; +void +_mesa_meta_use_program(struct gl_context *ctx, + struct gl_shader_program *sh_prog) +{ + /* Attach shader state to the binding point */ + _mesa_reference_pipeline_object(ctx, &ctx->_Shader, &ctx->Shader); - _mesa_GetProgramiv(program, GL_INFO_LOG_LENGTH, &size); - if (size == 0) - return 0; - - info = malloc(size); - if (!info) - return 0; - - _mesa_GetProgramInfoLog(program, size, NULL, info); - _mesa_problem(ctx, "meta program link failed:\n%s", info); - - free(info); - - return 0; + /* Update the program */ + _mesa_use_program(ctx, sh_prog); } void @@ -194,22 +173,25 @@ _mesa_meta_compile_and_link_program(struct gl_context *ctx, const char *vs_source, const char *fs_source, const char *name, - GLuint *program) + struct gl_shader_program **out_sh_prog) { - GLuint vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, - vs_source); - GLuint fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, - fs_source); + struct gl_shader_program *sh_prog; + const GLuint id = ~0; - *program = _mesa_CreateProgram(); - _mesa_ObjectLabel(GL_PROGRAM, *program, -1, name); - _mesa_AttachShader(*program, fs); - _mesa_DeleteShader(fs); - _mesa_AttachShader(*program, vs); - _mesa_DeleteShader(vs); - _mesa_meta_link_program_with_debug(ctx, *program); + sh_prog = _mesa_new_shader_program(id); + sh_prog->Label = strdup(name); + sh_prog->NumShaders = 2; + sh_prog->Shaders = malloc(2 * sizeof(struct gl_shader *)); + sh_prog->Shaders[0] = + _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, vs_source); + sh_prog->Shaders[1] = + _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, fs_source); - _mesa_UseProgram(*program); + _mesa_meta_link_program_with_debug(ctx, sh_prog); + + _mesa_meta_use_program(ctx, sh_prog); + + *out_sh_prog = sh_prog; } /** @@ -244,8 +226,8 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx, assert(shader != NULL); - if (shader->shader_prog != 0) { - _mesa_UseProgram(shader->shader_prog); + if (shader->shader_prog != NULL) { + _mesa_meta_use_program(ctx, shader->shader_prog); return; } @@ -1528,7 +1510,6 @@ meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear) "{\n" " gl_FragColor = color;\n" "}\n"; - GLuint vs, fs; bool has_integer_textures; _mesa_meta_setup_vertex_objects(ctx, &clear->VAO, &clear->buf_obj, true, @@ -1592,12 +1573,10 @@ meta_glsl_clear_cleanup(struct gl_context *ctx, struct clear_state *clear) _mesa_DeleteVertexArrays(1, &clear->VAO); clear->VAO = 0; _mesa_reference_buffer_object(ctx, &clear->buf_obj, NULL); - _mesa_DeleteProgram(clear->ShaderProg); - clear->ShaderProg = 0; + _mesa_reference_shader_program(ctx, &clear->ShaderProg, NULL); if (clear->IntegerShaderProg) { - _mesa_DeleteProgram(clear->IntegerShaderProg); - clear->IntegerShaderProg = 0; + _mesa_reference_shader_program(ctx, &clear->IntegerShaderProg, NULL); } } @@ -1711,10 +1690,10 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, bool glsl) if (fb->_IntegerColor) { assert(glsl); - _mesa_UseProgram(clear->IntegerShaderProg); + _mesa_meta_use_program(ctx, clear->IntegerShaderProg); _mesa_Uniform4iv(0, 1, ctx->Color.ClearColor.i); } else if (glsl) { - _mesa_UseProgram(clear->ShaderProg); + _mesa_meta_use_program(ctx, clear->ShaderProg); _mesa_Uniform4fv(0, 1, ctx->Color.ClearColor.f); } @@ -2675,25 +2654,17 @@ choose_blit_shader(GLenum target, struct blit_shader_table *table) } void -_mesa_meta_blit_shader_table_cleanup(struct blit_shader_table *table) +_mesa_meta_blit_shader_table_cleanup(struct gl_context *ctx, + struct blit_shader_table *table) { - _mesa_DeleteProgram(table->sampler_1d.shader_prog); - _mesa_DeleteProgram(table->sampler_2d.shader_prog); - _mesa_DeleteProgram(table->sampler_3d.shader_prog); - _mesa_DeleteProgram(table->sampler_rect.shader_prog); - _mesa_DeleteProgram(table->sampler_cubemap.shader_prog); - _mesa_DeleteProgram(table->sampler_1d_array.shader_prog); - _mesa_DeleteProgram(table->sampler_2d_array.shader_prog); - _mesa_DeleteProgram(table->sampler_cubemap_array.shader_prog); - - table->sampler_1d.shader_prog = 0; - table->sampler_2d.shader_prog = 0; - table->sampler_3d.shader_prog = 0; - table->sampler_rect.shader_prog = 0; - table->sampler_cubemap.shader_prog = 0; - table->sampler_1d_array.shader_prog = 0; - table->sampler_2d_array.shader_prog = 0; - table->sampler_cubemap_array.shader_prog = 0; + _mesa_reference_shader_program(ctx, &table->sampler_1d.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_2d.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_3d.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_rect.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_cubemap.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_1d_array.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_2d_array.shader_prog, NULL); + _mesa_reference_shader_program(ctx, &table->sampler_cubemap_array.shader_prog, NULL); } /** diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index c2efa50a33d..0a7321c9d88 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -218,7 +218,7 @@ struct blit_shader { const char *type; const char *func; const char *texcoords; - GLuint shader_prog; + struct gl_shader_program *shader_prog; }; /** @@ -302,7 +302,7 @@ struct blit_state struct gl_buffer_object *buf_obj; struct blit_shader_table shaders_with_depth; struct blit_shader_table shaders_without_depth; - GLuint msaa_shaders[BLIT_MSAA_SHADER_COUNT]; + struct gl_shader_program *msaa_shaders[BLIT_MSAA_SHADER_COUNT]; struct temp_texture depthTex; bool no_ctsi_fallback; }; @@ -324,8 +324,8 @@ struct clear_state { GLuint VAO; struct gl_buffer_object *buf_obj; - GLuint ShaderProg; - GLuint IntegerShaderProg; + struct gl_shader_program *ShaderProg; + struct gl_shader_program *IntegerShaderProg; }; @@ -577,20 +577,25 @@ _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, void _mesa_meta_drawbuffers_from_bitfield(GLbitfield bits); -GLuint +struct gl_shader * _mesa_meta_compile_shader_with_debug(struct gl_context *ctx, GLenum target, const GLcharARB *source); -GLuint -_mesa_meta_link_program_with_debug(struct gl_context *ctx, GLuint program); +void +_mesa_meta_link_program_with_debug(struct gl_context *ctx, + struct gl_shader_program *sh_prog); void _mesa_meta_compile_and_link_program(struct gl_context *ctx, const char *vs_source, const char *fs_source, const char *name, - GLuint *program); + struct gl_shader_program **sh_prog_ptr); + +extern void +_mesa_meta_use_program(struct gl_context *ctx, + struct gl_shader_program *sh_prog); GLboolean _mesa_meta_alloc_texture(struct temp_texture *tex, @@ -655,7 +660,8 @@ void _mesa_meta_glsl_blit_cleanup(struct gl_context *ctx, struct blit_state *blit); void -_mesa_meta_blit_shader_table_cleanup(struct blit_shader_table *table); +_mesa_meta_blit_shader_table_cleanup(struct gl_context *ctx, + struct blit_shader_table *table); void _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index 179dc0d8630..0066f7f9184 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -105,12 +105,12 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, } if (blit->msaa_shaders[shader_index]) { - _mesa_UseProgram(blit->msaa_shaders[shader_index]); + _mesa_meta_use_program(ctx, blit->msaa_shaders[shader_index]); /* Update the uniform values. */ loc_src_width = - _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_width"); + _mesa_program_resource_location(blit->msaa_shaders[shader_index], GL_UNIFORM, "src_width"); loc_src_height = - _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_height"); + _mesa_program_resource_location(blit->msaa_shaders[shader_index], GL_UNIFORM, "src_height"); _mesa_Uniform1f(loc_src_width, src_rb->Width); _mesa_Uniform1f(loc_src_height, src_rb->Height); return; @@ -237,9 +237,9 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name, &blit->msaa_shaders[shader_index]); loc_src_width = - _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_width"); + _mesa_program_resource_location(blit->msaa_shaders[shader_index], GL_UNIFORM, "src_width"); loc_src_height = - _mesa_GetUniformLocation(blit->msaa_shaders[shader_index], "src_height"); + _mesa_program_resource_location(blit->msaa_shaders[shader_index], GL_UNIFORM, "src_height"); _mesa_Uniform1f(loc_src_width, src_rb->Width); _mesa_Uniform1f(loc_src_height, src_rb->Height); @@ -347,7 +347,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx, } if (blit->msaa_shaders[shader_index]) { - _mesa_UseProgram(blit->msaa_shaders[shader_index]); + _mesa_meta_use_program(ctx, blit->msaa_shaders[shader_index]); return; } @@ -1037,8 +1037,8 @@ _mesa_meta_glsl_blit_cleanup(struct gl_context *ctx, struct blit_state *blit) _mesa_reference_buffer_object(ctx, &blit->buf_obj, NULL); } - _mesa_meta_blit_shader_table_cleanup(&blit->shaders_with_depth); - _mesa_meta_blit_shader_table_cleanup(&blit->shaders_without_depth); + _mesa_meta_blit_shader_table_cleanup(ctx, &blit->shaders_with_depth); + _mesa_meta_blit_shader_table_cleanup(ctx, &blit->shaders_without_depth); _mesa_DeleteTextures(1, &blit->depthTex.TexObj); blit->depthTex.TexObj = 0; diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index 892d8d34619..d4b75390ebf 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -134,7 +134,7 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, _mesa_reference_sampler_object(ctx, &mipmap->samp_obj, NULL); _mesa_reference_framebuffer(&mipmap->fb, NULL); - _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders); + _mesa_meta_blit_shader_table_cleanup(ctx, &mipmap->shaders); } static GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a953745b114..b45ee5eb69d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -900,7 +900,7 @@ struct brw_context struct brw_cache cache; /** IDs for meta stencil blit shader programs. */ - unsigned meta_stencil_blit_programs[2]; + struct gl_shader_program *meta_stencil_blit_programs[2]; /* Whether a meta-operation is in progress. */ bool meta_in_progress; diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index e2882da403d..1fb5dc83885 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -26,6 +26,7 @@ #include "main/context.h" #include "main/objectlabel.h" #include "main/shaderapi.h" +#include "main/shaderobj.h" #include "main/arrayobj.h" #include "main/bufferobj.h" #include "main/buffers.h" @@ -61,8 +62,8 @@ struct brw_fast_clear_state { struct gl_buffer_object *buf_obj; struct gl_vertex_array_object *array_obj; + struct gl_shader_program *shader_prog; GLuint vao; - GLuint shader_prog; GLint color_location; }; @@ -131,7 +132,7 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) struct gl_context *ctx = &brw->ctx; if (clear->shader_prog) { - _mesa_UseProgram(clear->shader_prog); + _mesa_meta_use_program(ctx, clear->shader_prog); _mesa_Uniform4fv(clear->color_location, 1, color); return; } @@ -141,9 +142,9 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color) &clear->shader_prog); clear->color_location = - _mesa_GetUniformLocation(clear->shader_prog, "color"); + _mesa_program_resource_location(clear->shader_prog, GL_UNIFORM, "color"); - _mesa_UseProgram(clear->shader_prog); + _mesa_meta_use_program(ctx, clear->shader_prog); _mesa_Uniform4fv(clear->color_location, 1, color); } @@ -160,7 +161,7 @@ brw_meta_fast_clear_free(struct brw_context *brw) _mesa_DeleteVertexArrays(1, &clear->vao); _mesa_reference_buffer_object(&brw->ctx, &clear->buf_obj, NULL); - _mesa_DeleteProgram(clear->shader_prog); + _mesa_reference_shader_program(&brw->ctx, &clear->shader_prog, NULL); free(clear); if (old_context) diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index 5b0c2e9bdd5..7e0424846a5 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -193,6 +193,9 @@ static const char *fs_tmpl = " %s;\n" "}\n"; +#define get_uniform_loc(sh_prog, name) \ + _mesa_program_resource_location(sh_prog, GL_UNIFORM, name) + /** * Setup uniforms telling the coordinates of the destination rectangle in the * native w-tiled space. These are needed to ignore pixels that lie outside. @@ -201,12 +204,13 @@ static const char *fs_tmpl = * 16x2 y-tiled). */ static void -setup_bounding_rect(GLuint prog, const struct blit_dims *dims) +setup_bounding_rect(struct gl_shader_program *sh_prog, + const struct blit_dims *dims) { - _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_x0"), dims->dst_x0); - _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_x1"), dims->dst_x1); - _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_y0"), dims->dst_y0); - _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_y1"), dims->dst_y1); + _mesa_Uniform1i(get_uniform_loc(sh_prog, "dst_x0"), dims->dst_x0); + _mesa_Uniform1i(get_uniform_loc(sh_prog, "dst_x1"), dims->dst_x1); + _mesa_Uniform1i(get_uniform_loc(sh_prog, "dst_y0"), dims->dst_y0); + _mesa_Uniform1i(get_uniform_loc(sh_prog, "dst_y1"), dims->dst_y1); } /** @@ -215,14 +219,15 @@ setup_bounding_rect(GLuint prog, const struct blit_dims *dims) * between destination and source that may have differing offsets. */ static void -setup_drawing_rect(GLuint prog, const struct blit_dims *dims) +setup_drawing_rect(struct gl_shader_program *sh_prog, + const struct blit_dims *dims) { - _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "draw_rect_w"), + _mesa_Uniform1f(get_uniform_loc(sh_prog, "draw_rect_w"), dims->dst_x1 - dims->dst_x0); - _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "draw_rect_h"), + _mesa_Uniform1f(get_uniform_loc(sh_prog, "draw_rect_h"), dims->dst_y1 - dims->dst_y0); - _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "dst_x_off"), dims->dst_x0); - _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "dst_y_off"), dims->dst_y0); + _mesa_Uniform1f(get_uniform_loc(sh_prog, "dst_x_off"), dims->dst_x0); + _mesa_Uniform1f(get_uniform_loc(sh_prog, "dst_y_off"), dims->dst_y0); } /** @@ -241,7 +246,7 @@ setup_drawing_rect(GLuint prog, const struct blit_dims *dims) * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale */ static void -setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset, +setup_coord_coeff(GLuint multiplier, GLuint offset, int src_0, int src_1, int dst_0, int dst_1, bool mirror) { const float scale = ((float)(src_1 - src_0)) / (dst_1 - dst_0); @@ -265,22 +270,21 @@ setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset, * destination rectangle is adjusted for possible msaa and Y-tiling. */ static void -setup_coord_transform(GLuint prog, const struct blit_dims *dims) +setup_coord_transform(struct gl_shader_program *sh_prog, + const struct blit_dims *dims) { - setup_coord_coeff(prog, - _mesa_GetUniformLocation(prog, "src_x_scale"), - _mesa_GetUniformLocation(prog, "src_x_off"), + setup_coord_coeff(get_uniform_loc(sh_prog, "src_x_scale"), + get_uniform_loc(sh_prog, "src_x_off"), dims->src_x0, dims->src_x1, dims->dst_x0, dims->dst_x1, dims->mirror_x); - setup_coord_coeff(prog, - _mesa_GetUniformLocation(prog, "src_y_scale"), - _mesa_GetUniformLocation(prog, "src_y_off"), + setup_coord_coeff(get_uniform_loc(sh_prog, "src_y_scale"), + get_uniform_loc(sh_prog, "src_y_off"), dims->src_y0, dims->src_y1, dims->dst_y0, dims->dst_y1, dims->mirror_y); } -static GLuint +static struct gl_shader_program * setup_program(struct brw_context *brw, bool msaa_tex) { struct gl_context *ctx = &brw->ctx; @@ -291,21 +295,22 @@ setup_program(struct brw_context *brw, bool msaa_tex) _mesa_meta_setup_vertex_objects(&brw->ctx, &blit->VAO, &blit->buf_obj, true, 2, 2, 0); - GLuint *prog_id = &brw->meta_stencil_blit_programs[msaa_tex]; + struct gl_shader_program **sh_prog_p = + &brw->meta_stencil_blit_programs[msaa_tex]; - if (*prog_id) { - _mesa_UseProgram(*prog_id); - return *prog_id; + if (*sh_prog_p) { + _mesa_meta_use_program(ctx, *sh_prog_p); + return *sh_prog_p; } fs_source = ralloc_asprintf(NULL, fs_tmpl, sampler->sampler, sampler->fetch); _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, "i965 stencil blit", - prog_id); + sh_prog_p); ralloc_free(fs_source); - return *prog_id; + return *sh_prog_p; } /** @@ -425,7 +430,7 @@ brw_meta_stencil_blit(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct blit_dims dims = *orig_dims; struct fb_tex_blit_state blit; - GLuint prog; + struct gl_shader_program *prog; struct gl_framebuffer *drawFb = NULL; struct gl_renderbuffer *rb = NULL; GLenum target; @@ -467,7 +472,7 @@ brw_meta_stencil_blit(struct brw_context *brw, setup_drawing_rect(prog, &dims); setup_coord_transform(prog, orig_dims); - _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_num_samples"), + _mesa_Uniform1i(get_uniform_loc(prog, "dst_num_samples"), dst_mt->num_samples); prepare_vertex_data(ctx, ctx->Meta->Blit.buf_obj); From ea45b6e96d16b04b6a6cbebb5a8f77ba6a46bcf9 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 3 Dec 2015 08:49:13 +0100 Subject: [PATCH 12/26] i965/eu: set correct execution size in brw_NOP v2: NOP should have an execsize of 1 (Matt) Reviewed-by: Matt Turner Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6f11f597492..e8d80ccc2bf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1229,8 +1229,9 @@ brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) void brw_NOP(struct brw_codegen *p) { brw_inst *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_1); + brw_set_dest(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(p, insn, brw_imm_ud(0x0)); } From 2d6af62a0f51f9b8dd8f142debe1c5a8e2a007ef Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 3 Dec 2015 10:59:23 +0100 Subject: [PATCH 13/26] i965/fs: Set exec size for gen7 pull const loads v2 (Topi): - No need to set the execsize for the indirect send message, the next patch will handle that. - Set the execution size explicitly instead of taking it from the width of the dst that we set before. Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index b58c938c53c..c883fe3f259 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1264,6 +1264,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4); brw_pop_insn_state(p); brw_set_dest(p, send, dst); From 31a86042522f4f836b503679be8a120e302fb68a Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 3 Dec 2015 11:10:12 +0100 Subject: [PATCH 14/26] i965/eu: set execution size for SEND message in brw_send_indirect_message Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e8d80ccc2bf..68524590b2d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2563,6 +2563,9 @@ brw_send_indirect_message(struct brw_codegen *p, brw_set_src1(p, send, addr); } + if (dst.width < BRW_EXECUTE_8) + brw_inst_set_exec_size(devinfo, send, dst.width); + brw_set_dest(p, send, dst); brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); From f6342b56456582340f622ec6e23627ee07ba711d Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 3 Dec 2015 11:11:14 +0100 Subject: [PATCH 15/26] i965: set correct execsize for MOVS with a width of 4 in brw_find_live_channel Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 68524590b2d..1f3b32e55d8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3334,11 +3334,14 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) /* Overwrite the destination without and with execution masking to * find out which of the channels is active. */ + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), brw_imm_ud(1)); inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), brw_imm_ud(0)); + brw_pop_insn_state(p); brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); } } From 2fafc6b98ccc5b0aecc54eadceef8760bbff4233 Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Thu, 3 Dec 2015 18:05:39 +0100 Subject: [PATCH 16/26] i965/gs/gen6: fix execsize for instructions with width of 4 in gen6_sol_program() v2: - Add assert (Topi). Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_ff_gs_emit.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c index 8589dabe4c5..fea2b93471c 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c @@ -406,9 +406,13 @@ gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, : 0x00020001)); /* (1, 0, 2) */ brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL); } + + assert(c->reg.destination_indices.width == BRW_EXECUTE_4); + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); - + brw_pop_insn_state(p); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ @@ -438,8 +442,13 @@ gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_4); + brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ From 30fc3fa24d90c1ceda33ba95832e17c67584e2bc Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Thu, 3 Dec 2015 18:27:39 +0100 Subject: [PATCH 17/26] i965/vec4/gen6: fix exec_size for instructions with width of 4 in generate_gs_svb_write() Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 549b707203f..871b49ad9ab 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -485,10 +485,13 @@ generate_gs_svb_write(struct brw_codegen *p, bool final_write = inst->sol_final_write; brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_4); /* Copy Vertex data into M0.x */ brw_MOV(p, stride(dst, 4, 4, 1), stride(retype(src0, BRW_REGISTER_TYPE_UD), 4, 4, 1)); + brw_pop_insn_state(p); + brw_push_insn_state(p); /* Send SVB Write */ brw_svb_write(p, final_write ? src1 : brw_null_reg(), /* dest == src1 */ From b91b9e4b005858bad07eec1f92438a22468ac1ae Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Fri, 4 Dec 2015 10:23:15 +0100 Subject: [PATCH 18/26] i965/vec4/gen6: fix exec_size for instructions with destination width of 4 Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 871b49ad9ab..7063426b48a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1434,6 +1434,7 @@ generate_code(struct brw_codegen *p, assert(inst->mlen <= BRW_MAX_MSG_LENGTH); unsigned pre_emit_nr_insn = p->nr_insn; + bool fix_exec_size = false; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry @@ -1458,6 +1459,8 @@ generate_code(struct brw_codegen *p, if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } + brw_set_default_exec_size(p, BRW_EXECUTE_4); + fix_exec_size = true; } switch (inst->opcode) { @@ -1946,6 +1949,9 @@ generate_code(struct brw_codegen *p, unreachable("Unsupported opcode"); } + if (fix_exec_size) + brw_set_default_exec_size(p, BRW_EXECUTE_8); + if (inst->opcode == VEC4_OPCODE_PACK_BYTES) { /* Handled dependency hints in the generator. */ From 22a10dd0308c4993350e3e0609588a6f4e1cd402 Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsalvez Date: Tue, 15 Dec 2015 09:34:38 +0100 Subject: [PATCH 19/26] i965/vec4/gen6: fix exec_size for MOV with a width of 4 in generate_gs_ff_sync() Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 7063426b48a..621c3321c81 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -705,8 +705,10 @@ generate_gs_ff_sync(struct brw_codegen *p, brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0)); /* src1 is not an immediate when we use transform feedback */ - if (src1.file != BRW_IMMEDIATE_VALUE) + if (src1.file != BRW_IMMEDIATE_VALUE) { + brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); + } brw_pop_insn_state(p); } From 5be11d22368c4fd520983ab78a9ac8fc10d79929 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 3 Dec 2015 09:50:49 +0100 Subject: [PATCH 20/26] i965: Skip execution size adjustment for instructions of width 4 This code in brw_set_dest adjusts the execution size of any instruction with a dst.width < 8. However, we don't want to do this with instructions operating on doubles, since these will have a width of 4, but still need an execution size of 8 (for SIMD8). Unfortunately, we can't just check the size of the operands involved to detect if we are doing an operation on doubles, because we can have instructions that do operations on double operands interpreted as UD, operating on any of its 2 32-bit components. Previous commits have made it so we never emit instructions with a horizontal width of 4 that don't have the correct execution size set for gen6+, so we can skip it in this case, avoiding the conflicts with fp64 requirements. Expanding the same fix to other hardware generations requires many more changes but since we are not targetting fp64 support on them wer don't really care for now. Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 1f3b32e55d8..0b99356b27d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -202,8 +202,20 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8) * or 16 (SIMD16), as that's normally correct. However, when dealing with * small registers, we automatically reduce it to match the register size. + * + * In platforms that support fp64 we can emit instructions with a width of + * 4 that need two SIMD8 registers and an exec_size of 8 or 16. In these + * cases we need to make sure that these instructions have their exec sizes + * set properly when they are emitted and we can't rely on this code to fix + * it. */ - if (dest.width < BRW_EXECUTE_8) + bool fix_exec_size; + if (devinfo->gen >= 6) + fix_exec_size = dest.width < BRW_EXECUTE_4; + else + fix_exec_size = dest.width < BRW_EXECUTE_8; + + if (fix_exec_size) brw_inst_set_exec_size(devinfo, inst, dest.width); } From 26f8262698d9f48ab5dbb85ef14cb7d5cefd9d53 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 16 Jan 2016 23:11:16 -0800 Subject: [PATCH 21/26] nir/print: Add space after shader_storage var mode Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_print.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index bdfbd2600c0..231a4f53c65 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -312,7 +312,7 @@ print_var_decl(nir_variable *var, print_state *state) const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage", "system " }; + "uniform ", "shader_storage ", "system " }; fprintf(fp, "%s%s%s%s%s%s ", cent, samp, patch, inv, mode[var->data.mode], From 3c807607df4ab457037f2daa5ea9e5ce18392b7b Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Fri, 8 Jan 2016 17:16:29 -0800 Subject: [PATCH 22/26] nir: Add compute shader shared variable storage class Previously we were receiving shared variable accesses via a lowered intrinsic function from glsl. This change allows us to send in variables instead. For example, when converting from SPIR-V. Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir.c | 6 ++++++ src/compiler/nir/nir.h | 6 +++++- src/compiler/nir/nir_clone.c | 1 + src/compiler/nir/nir_lower_atomics.c | 3 ++- src/compiler/nir/nir_print.c | 7 ++++++- src/compiler/nir/nir_sweep.c | 1 + src/compiler/nir/nir_validate.c | 5 +++++ 7 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index cd78475bdb8..386cdafe161 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -39,6 +39,7 @@ nir_shader_create(void *mem_ctx, exec_list_make_empty(&shader->uniforms); exec_list_make_empty(&shader->inputs); exec_list_make_empty(&shader->outputs); + exec_list_make_empty(&shader->shared); shader->options = options; memset(&shader->info, 0, sizeof(shader->info)); @@ -136,6 +137,11 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var) exec_list_push_tail(&shader->uniforms, &var->node); break; + case nir_var_shared: + assert(shader->stage == MESA_SHADER_COMPUTE); + exec_list_push_tail(&shader->shared, &var->node); + break; + case nir_var_system_value: exec_list_push_tail(&shader->system_values, &var->node); break; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 34f31eb9859..7d2bd03c914 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -90,6 +90,7 @@ typedef enum { nir_var_shader_storage, nir_var_system_value, nir_var_param, + nir_var_shared, } nir_variable_mode; /** @@ -172,7 +173,7 @@ typedef struct nir_variable { * * \sa nir_variable_mode */ - nir_variable_mode mode:4; + nir_variable_mode mode:5; /** * Interpolation mode for shader inputs / outputs @@ -1660,6 +1661,9 @@ typedef struct nir_shader { /** list of outputs (nir_variable) */ struct exec_list outputs; + /** list of shared compute variables (nir_variable) */ + struct exec_list shared; + /** Set of driver-specific options for the shader. * * The memory for the options is expected to be kept in a single static diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index 198ca8b9b12..d17d384ed61 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -675,6 +675,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) clone_var_list(&state, &ns->uniforms, &s->uniforms); clone_var_list(&state, &ns->inputs, &s->inputs); clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->shared, &s->shared); clone_var_list(&state, &ns->globals, &s->globals); clone_var_list(&state, &ns->system_values, &s->system_values); diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index 1935a527c6f..eefcb55a0a6 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -63,7 +63,8 @@ lower_instr(nir_intrinsic_instr *instr, } if (instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage) + instr->variables[0]->var->data.mode != nir_var_shader_storage && + instr->variables[0]->var->data.mode != nir_var_shared) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 231a4f53c65..644a21463b1 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -312,7 +312,8 @@ print_var_decl(nir_variable *var, print_state *state) const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage ", "system " }; + "uniform ", "shader_storage ", "shared ", + "system "}; fprintf(fp, "%s%s%s%s%s%s ", cent, samp, patch, inv, mode[var->data.mode], @@ -1069,6 +1070,10 @@ nir_print_shader(nir_shader *shader, FILE *fp) print_var_decl(var, &state); } + nir_foreach_variable(var, &shader->shared) { + print_var_decl(var, &state); + } + nir_foreach_variable(var, &shader->globals) { print_var_decl(var, &state); } diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c index 0710bdba7c7..5c62154ec7f 100644 --- a/src/compiler/nir/nir_sweep.c +++ b/src/compiler/nir/nir_sweep.c @@ -159,6 +159,7 @@ nir_sweep(nir_shader *nir) steal_list(nir, nir_variable, &nir->uniforms); steal_list(nir, nir_variable, &nir->inputs); steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->shared); steal_list(nir, nir_variable, &nir->globals); steal_list(nir, nir_variable, &nir->system_values); steal_list(nir, nir_register, &nir->registers); diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 0c9d816a384..0c32d5fe07a 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -1047,6 +1047,11 @@ nir_validate_shader(nir_shader *shader) validate_var_decl(var, true, &state); } + exec_list_validate(&shader->shared); + nir_foreach_variable(var, &shader->shared) { + validate_var_decl(var, true, &state); + } + exec_list_validate(&shader->globals); nir_foreach_variable(var, &shader->globals) { validate_var_decl(var, true, &state); From 683c359c543f4152150fcc52c7a75b9f6a71d1e4 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:53:44 -0800 Subject: [PATCH 23/26] nir: Add atomic operations on variables This allows us to first generate atomic operations for shared variables using these opcodes, and then later we can lower those to the shared atomics intrinsics with nir_lower_io. Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_intrinsics.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index 00725562874..3ba1563a59c 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -175,6 +175,33 @@ INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx, INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* + * variable atomic intrinsics + * + * All of these variable atomic memory operations read a value from memory, + * compute a new value using one of the operations below, write the new value + * to memory, and return the original value read. + * + * All operations take 1 source except CompSwap that takes 2. These sources + * represent: + * + * 0: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 1: For CompSwap only: the second data parameter. + * + * All operations take 1 variable deref. + */ +INTRINSIC(var_atomic_add, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_imin, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_umin, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_imax, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_umax, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_and, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_or, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_xor, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_exchange, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 0) +INTRINSIC(var_atomic_comp_swap, 2, ARR(1, 1), true, 1, 1, 0, xx, xx, xx, 0) + /* * SSBO atomic intrinsics * From e3cbb9d37ce4e3efafce834deef3ce814c3777ce Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:44:31 -0800 Subject: [PATCH 24/26] nir: Add support for lowering load/stores of shared variables Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir.c | 1 + src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_clone.c | 1 + src/compiler/nir/nir_lower_io.c | 35 ++++++++++++++++++++++++++------- src/compiler/nir/nir_print.c | 1 + 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 386cdafe161..655dc884382 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -53,6 +53,7 @@ nir_shader_create(void *mem_ctx, shader->num_inputs = 0; shader->num_outputs = 0; shader->num_uniforms = 0; + shader->num_shared = 0; shader->stage = stage; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7d2bd03c914..00f107d4243 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1692,7 +1692,7 @@ typedef struct nir_shader { * the highest index a load_input_*, load_uniform_*, etc. intrinsic can * access plus one */ - unsigned num_inputs, num_uniforms, num_outputs; + unsigned num_inputs, num_uniforms, num_outputs, num_shared; /** The shader stage, such as MESA_SHADER_VERTEX. */ gl_shader_stage stage; diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index d17d384ed61..0b426e940b4 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -705,6 +705,7 @@ nir_shader_clone(void *mem_ctx, const nir_shader *s) ns->num_inputs = s->num_inputs; ns->num_uniforms = s->num_uniforms; ns->num_outputs = s->num_outputs; + ns->num_shared = s->num_shared; free_clone_state(&state); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 11fb973a237..f844947549d 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -160,6 +160,29 @@ load_op(struct lower_io_state *state, case nir_var_uniform: op = nir_intrinsic_load_uniform; break; + case nir_var_shared: + op = nir_intrinsic_load_shared; + break; + default: + unreachable("Unknown variable mode"); + } + return op; +} + +static nir_intrinsic_op +store_op(struct lower_io_state *state, + nir_variable_mode mode, bool per_vertex) +{ + nir_intrinsic_op op; + switch (mode) { + case nir_var_shader_in: + case nir_var_shader_out: + op = per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; + break; + case nir_var_shared: + op = nir_intrinsic_store_shared; + break; default: unreachable("Unknown variable mode"); } @@ -190,6 +213,7 @@ nir_lower_io_block(nir_block *block, void *void_state) if (mode != nir_var_shader_in && mode != nir_var_shader_out && + mode != nir_var_shared && mode != nir_var_uniform) continue; @@ -236,7 +260,7 @@ nir_lower_io_block(nir_block *block, void *void_state) } case nir_intrinsic_store_var: { - assert(mode == nir_var_shader_out); + assert(mode == nir_var_shader_out || mode == nir_var_shared); nir_ssa_def *offset; nir_ssa_def *vertex_index; @@ -248,12 +272,9 @@ nir_lower_io_block(nir_block *block, void *void_state) per_vertex ? &vertex_index : NULL, state->type_size); - nir_intrinsic_op store_op = - per_vertex ? nir_intrinsic_store_per_vertex_output : - nir_intrinsic_store_output; - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, - store_op); + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(state->mem_ctx, + store_op(state, mode, per_vertex)); store->num_components = intrin->num_components; nir_src_copy(&store->src[0], &intrin->src[0], store); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 644a21463b1..b8943b83f46 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1057,6 +1057,7 @@ nir_print_shader(nir_shader *shader, FILE *fp) fprintf(fp, "inputs: %u\n", shader->num_inputs); fprintf(fp, "outputs: %u\n", shader->num_outputs); fprintf(fp, "uniforms: %u\n", shader->num_uniforms); + fprintf(fp, "shared: %u\n", shader->num_shared); nir_foreach_variable(var, &shader->uniforms) { print_var_decl(var, &state); From b1e7cdfdcf744e083bca5afdc7537e1c4a4042af Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:59:19 -0800 Subject: [PATCH 25/26] nir: Lower shared var atomics during nir_lower_io Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_lower_io.c | 87 ++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index f844947549d..408a221355d 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -189,6 +189,27 @@ store_op(struct lower_io_state *state, return op; } +static nir_intrinsic_op +atomic_op(nir_intrinsic_op opcode) +{ + switch (opcode) { +#define OP(O) case nir_intrinsic_var_##O: return nir_intrinsic_shared_##O; + OP(atomic_exchange) + OP(atomic_comp_swap) + OP(atomic_add) + OP(atomic_imin) + OP(atomic_umin) + OP(atomic_imax) + OP(atomic_umax) + OP(atomic_and) + OP(atomic_or) + OP(atomic_xor) +#undef OP + default: + unreachable("Invalid atomic"); + } +} + static bool nir_lower_io_block(nir_block *block, void *void_state) { @@ -202,9 +223,25 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_var && - intrin->intrinsic != nir_intrinsic_store_var) + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + case nir_intrinsic_store_var: + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: + /* We can lower the io for this nir instrinsic */ + break; + default: + /* We can't lower the io for this nir instrinsic, so skip it */ continue; + } nir_variable_mode mode = intrin->variables[0]->var->data.mode; @@ -293,6 +330,52 @@ nir_lower_io_block(nir_block *block, void *void_state) break; } + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: { + assert(mode == nir_var_shared); + + nir_ssa_def *offset; + + offset = get_io_offset(b, intrin->variables[0], + NULL, state->type_size); + + nir_intrinsic_instr *atomic = + nir_intrinsic_instr_create(state->mem_ctx, + atomic_op(intrin->intrinsic)); + + atomic->src[0] = nir_src_for_ssa(offset); + + atomic->const_index[0] = + intrin->variables[0]->var->data.driver_location; + + for (unsigned i = 0; + i < nir_op_infos[intrin->intrinsic].num_inputs; + i++) { + nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic); + } + + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&atomic->instr, &atomic->dest, + intrin->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&atomic->dest.ssa)); + } else { + nir_dest_copy(&atomic->dest, &intrin->dest, state->mem_ctx); + } + + nir_instr_insert_before(&intrin->instr, &atomic->instr); + nir_instr_remove(&intrin->instr); + break; + } + default: break; } From 7d021cb15e6d67ecef8b020fd36c4a680bcc9c39 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Mon, 18 Jan 2016 09:45:46 -0800 Subject: [PATCH 26/26] i965/nir: Lower nir compute shader shared variables Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_nir.c | 8 ++++++++ src/mesa/drivers/dri/i965/brw_nir.h | 1 + 3 files changed, 11 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 86d2bd92726..eaff9535c22 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5739,6 +5739,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, true); + brw_nir_lower_cs_shared(shader); + prog_data->base.total_shared += shader->num_shared; shader = brw_postprocess_nir(shader, compiler->devinfo, true); prog_data->local_size[0] = shader->info.cs.local_size[0]; diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index a5949d5d6eb..24350460466 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -377,6 +377,14 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) } } +void +brw_nir_lower_cs_shared(nir_shader *nir) +{ + nir_assign_var_locations(&nir->shared, &nir->num_shared, + type_size_scalar_bytes); + nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes); +} + #define OPT(pass, ...) ({ \ bool this_progress = false; \ NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 2d8341fd40e..440b4ceb669 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -95,6 +95,7 @@ void brw_nir_lower_fs_inputs(nir_shader *nir); void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar); void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue); void brw_nir_lower_fs_outputs(nir_shader *nir); +void brw_nir_lower_cs_shared(nir_shader *nir); nir_shader *brw_postprocess_nir(nir_shader *nir, const struct brw_device_info *devinfo,